Extend profiling tool to support Vsize and Rss memory usage profile per call

[apitrace] / retrace / glretrace_main.cpp
diff --git a/retrace/glretrace_main.cpp b/retrace/glretrace_main.cpp

index 05567a22a31b6deb80989475a898effc8ad6791d..1591287c22d9d254507dba27a23c5245a5e83d20 100755 (executable)
--- a/retrace/glretrace_main.cpp
+++ b/retrace/glretrace_main.cpp
@@ -1,6 +1,8 @@
  /**************************************************************************
   *
   * Copyright 2011 Jose Fonseca
+ * Copyright (C) 2013 Intel Corporation. All rights reversed.
+ * Author: Shuang He <shuang.he@intel.com>
   * All Rights Reserved.
   *
   * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -31,6 +33,7 @@
  #include "glstate.hpp"
  #include "glretrace.hpp"
  #include "os_time.hpp"
+#include "os_memory.hpp"
  
  /* Synchronous debug output may reduce performance however,
   * without it the callNo in the callback may be inaccurate
@@ -42,16 +45,28 @@ namespace glretrace {
  
  bool insideList = false;
  bool insideGlBeginEnd = false;
+bool supportsARBShaderObjects = false;
+
+enum {
+    GPU_START = 0,
+    GPU_DURATION,
+    OCCLUSION,
+    NUM_QUERIES,
+};
  
  struct CallQuery
  {
-    GLuint ids[3];
+    GLuint ids[NUM_QUERIES];
      unsigned call;
      bool isDraw;
      GLuint program;
      const trace::FunctionSig *sig;
      int64_t cpuStart;
      int64_t cpuEnd;
+    int64_t vsizeStart;
+    int64_t vsizeEnd;
+    int64_t rssStart;
+    int64_t rssEnd;
  };
  
  static bool supportsElapsed = true;
@@ -67,90 +82,97 @@ debugOutputCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsi
  void
  checkGlError(trace::Call &call) {
      GLenum error = glGetError();
-    if (error == GL_NO_ERROR) {
-        return;
-    }
-
-    std::ostream & os = retrace::warning(call);
-
-    os << "glGetError(";
-    os << call.name();
-    os << ") = ";
-
-    switch (error) {
-    case GL_INVALID_ENUM:
-        os << "GL_INVALID_ENUM";
-        break;
-    case GL_INVALID_VALUE:
-        os << "GL_INVALID_VALUE";
-        break;
-    case GL_INVALID_OPERATION:
-        os << "GL_INVALID_OPERATION";
-        break;
-    case GL_STACK_OVERFLOW:
-        os << "GL_STACK_OVERFLOW";
-        break;
-    case GL_STACK_UNDERFLOW:
-        os << "GL_STACK_UNDERFLOW";
-        break;
-    case GL_OUT_OF_MEMORY:
-        os << "GL_OUT_OF_MEMORY";
-        break;
-    case GL_INVALID_FRAMEBUFFER_OPERATION:
-        os << "GL_INVALID_FRAMEBUFFER_OPERATION";
-        break;
-    case GL_TABLE_TOO_LARGE:
-        os << "GL_TABLE_TOO_LARGE";
-        break;
-    default:
-        os << error;
-        break;
+    while (error != GL_NO_ERROR) {
+        std::ostream & os = retrace::warning(call);
+
+        os << "glGetError(";
+        os << call.name();
+        os << ") = ";
+
+        switch (error) {
+        case GL_INVALID_ENUM:
+            os << "GL_INVALID_ENUM";
+            break;
+        case GL_INVALID_VALUE:
+            os << "GL_INVALID_VALUE";
+            break;
+        case GL_INVALID_OPERATION:
+            os << "GL_INVALID_OPERATION";
+            break;
+        case GL_STACK_OVERFLOW:
+            os << "GL_STACK_OVERFLOW";
+            break;
+        case GL_STACK_UNDERFLOW:
+            os << "GL_STACK_UNDERFLOW";
+            break;
+        case GL_OUT_OF_MEMORY:
+            os << "GL_OUT_OF_MEMORY";
+            break;
+        case GL_INVALID_FRAMEBUFFER_OPERATION:
+            os << "GL_INVALID_FRAMEBUFFER_OPERATION";
+            break;
+        case GL_TABLE_TOO_LARGE:
+            os << "GL_TABLE_TOO_LARGE";
+            break;
+        default:
+            os << error;
+            break;
+        }
+        os << "\n";
+    
+        error = glGetError();
      }
-    os << "\n";
  }
  
-static void
-getCurrentTimes(int64_t& cpuTime, int64_t& gpuTime) {
-    GLuint query;
-
+static inline int64_t
+getCurrentTime(void) {
      if (retrace::profilingGpuTimes && supportsTimestamp) {
-        glGenQueries(1, &query);
-        glQueryCounter(query, GL_TIMESTAMP);
-        glGetQueryObjecti64vEXT(query, GL_QUERY_RESULT, &gpuTime);
+        /* Get the current GL time without stalling */
+        GLint64 timestamp = 0;
+        glGetInteger64v(GL_TIMESTAMP, &timestamp);
+        return timestamp;
      } else {
-        gpuTime = 0;
+        return os::getTime();
      }
+}
  
-    if (retrace::profilingCpuTimes) {
-        cpuTime = os::getTime();
+static inline int64_t
+getTimeFrequency(void) {
+    if (retrace::profilingGpuTimes && supportsTimestamp) {
+        return 1000000000;
      } else {
-        cpuTime = 0;
+        return os::timeFrequency;
      }
+}
  
-    if (retrace::profilingGpuTimes && supportsTimestamp) {
-        glDeleteQueries(1, &query);
-    }
+static inline void
+getCurrentVsize(int64_t& vsize) {
+    vsize = os::getVsize();
+}
+
+static inline void
+getCurrentRss(int64_t& rss) {
+    rss = os::getRss();
  }
  
  static void
  completeCallQuery(CallQuery& query) {
      /* Get call start and duration */
-    int64_t gpuStart = 0, gpuDuration = 0, cpuDuration = 0, pixels = 0;
+    int64_t gpuStart = 0, gpuDuration = 0, cpuDuration = 0, pixels = 0, vsizeDuration = 0, rssDuration = 0;
  
      if (query.isDraw) {
          if (retrace::profilingGpuTimes) {
              if (supportsTimestamp) {
-                glGetQueryObjecti64vEXT(query.ids[0], GL_QUERY_RESULT, &gpuStart);
+                glGetQueryObjecti64vEXT(query.ids[GPU_START], GL_QUERY_RESULT, &gpuStart);
              }
  
-            glGetQueryObjecti64vEXT(query.ids[1], GL_QUERY_RESULT, &gpuDuration);
+            glGetQueryObjecti64vEXT(query.ids[GPU_DURATION], GL_QUERY_RESULT, &gpuDuration);
          }
  
          if (retrace::profilingPixelsDrawn) {
-            glGetQueryObjecti64vEXT(query.ids[2], GL_QUERY_RESULT, &pixels);
+            glGetQueryObjecti64vEXT(query.ids[OCCLUSION], GL_QUERY_RESULT, &pixels);
          }
  
-        glDeleteQueries(3, query.ids);
      } else {
          pixels = -1;
      }
@@ -159,8 +181,15 @@ completeCallQuery(CallQuery& query) {
          cpuDuration = query.cpuEnd - query.cpuStart;
      }
  
+    if (retrace::profilingMemoryUsage) {
+        vsizeDuration = query.vsizeEnd - query.vsizeStart;
+        rssDuration = query.rssEnd - query.rssStart;
+    }
+
+    glDeleteQueries(NUM_QUERIES, query.ids);
+
      /* Add call to profile */
-    retrace::profiler.addCall(query.call, query.sig->name, query.program, pixels, gpuStart, gpuDuration, query.cpuStart, cpuDuration);
+    retrace::profiler.addCall(query.call, query.sig->name, query.program, pixels, gpuStart, gpuDuration, query.cpuStart, cpuDuration, query.vsizeStart, vsizeDuration, query.rssStart, rssDuration);
  }
  
  void
@@ -174,27 +203,29 @@ flushQueries() {
  
  void
  beginProfile(trace::Call &call, bool isDraw) {
+    glretrace::Context *currentContext = glretrace::getCurrentContext();
+
      /* Create call query */
      CallQuery query;
      query.isDraw = isDraw;
      query.call = call.no;
      query.sig = call.sig;
-    query.program = glretrace::currentContext ? glretrace::currentContext->activeProgram : 0;
+    query.program = currentContext ? currentContext->activeProgram : 0;
+
+    glGenQueries(NUM_QUERIES, query.ids);
  
      /* GPU profiling only for draw calls */
      if (isDraw) {
-        glGenQueries(3, query.ids);
-
          if (retrace::profilingGpuTimes) {
              if (supportsTimestamp) {
-                glQueryCounter(query.ids[0], GL_TIMESTAMP);
+                glQueryCounter(query.ids[GPU_START], GL_TIMESTAMP);
              }
  
-            glBeginQuery(GL_TIME_ELAPSED, query.ids[1]);
+            glBeginQuery(GL_TIME_ELAPSED, query.ids[GPU_DURATION]);
          }
  
          if (retrace::profilingPixelsDrawn) {
-            glBeginQuery(GL_SAMPLES_PASSED, query.ids[2]);
+            glBeginQuery(GL_SAMPLES_PASSED, query.ids[OCCLUSION]);
          }
      }
  
@@ -202,18 +233,24 @@ beginProfile(trace::Call &call, bool isDraw) {
  
      /* CPU profiling for all calls */
      if (retrace::profilingCpuTimes) {
-       callQueries.back().cpuStart = os::getTime();
+        CallQuery& query = callQueries.back();
+        query.cpuStart = getCurrentTime();
+    }
+
+    if (retrace::profilingMemoryUsage) {
+        CallQuery& query = callQueries.back();
+        query.vsizeStart = os::getVsize();
+        query.rssStart = os::getRss();
      }
  }
  
  void
  endProfile(trace::Call &call, bool isDraw) {
-    GLint64 time = os::getTime();
  
      /* CPU profiling for all calls */
      if (retrace::profilingCpuTimes) {
          CallQuery& query = callQueries.back();
-        query.cpuEnd = time;
+        query.cpuEnd = getCurrentTime();
      }
  
      /* GPU profiling only for draw calls */
@@ -226,16 +263,25 @@ endProfile(trace::Call &call, bool isDraw) {
              glEndQuery(GL_SAMPLES_PASSED);
          }
      }
+
+    if (retrace::profilingMemoryUsage) {
+        CallQuery& query = callQueries.back();
+        query.vsizeEnd = os::getVsize();
+        query.rssEnd = os::getRss();
+    }
  }
  
  void
  initContext() {
+    glretrace::Context *currentContext = glretrace::getCurrentContext();
+
      /* Ensure we have adequate extension support */
      assert(currentContext);
      supportsTimestamp   = currentContext->hasExtension("GL_ARB_timer_query");
      supportsElapsed     = currentContext->hasExtension("GL_EXT_timer_query") || supportsTimestamp;
      supportsOcclusion   = currentContext->hasExtension("GL_ARB_occlusion_query");
      supportsDebugOutput = currentContext->hasExtension("GL_ARB_debug_output");
+    supportsARBShaderObjects = currentContext->hasExtension("GL_ARB_shader_objects");
  
      /* Check for timer query support */
      if (retrace::profilingGpuTimes) {
@@ -261,6 +307,7 @@ initContext() {
  
      /* Setup debug message call back */
      if (retrace::debug && supportsDebugOutput) {
+        glretrace::Context *currentContext = glretrace::getCurrentContext();
          glDebugMessageCallbackARB(&debugOutputCallback, currentContext);
  
          if (DEBUG_OUTPUT_SYNCHRONOUS) {
@@ -271,13 +318,19 @@ initContext() {
      /* Sync the gpu and cpu start times */
      if (retrace::profilingCpuTimes || retrace::profilingGpuTimes) {
          if (!retrace::profiler.hasBaseTimes()) {
-            GLint64 gpuTime, cpuTime;
-
-            getCurrentTimes(cpuTime, gpuTime);
-            retrace::profiler.setBaseCpuTime(cpuTime);
-            retrace::profiler.setBaseGpuTime(gpuTime);
+            GLint64 currentTime = getCurrentTime();
+            retrace::profiler.setBaseCpuTime(currentTime);
+            retrace::profiler.setBaseGpuTime(currentTime);
          }
      }
+
+    if (retrace::profilingMemoryUsage) {
+        GLint64 currentVsize, currentRss;
+        getCurrentVsize(currentVsize);
+        retrace::profiler.setBaseVsizeUsage(currentVsize);
+        getCurrentRss(currentRss);
+        retrace::profiler.setBaseRssUsage(currentRss);
+    }
  }
  
  void
@@ -286,34 +339,19 @@ frame_complete(trace::Call &call) {
          /* Complete any remaining queries */
          flushQueries();
  
-        /* GPU time drifts due to being relative times, not absolute and can be
-         * affected by the gpu switch between processes.
-         *
-         * To attempt to compensate we resynchronise on frame end however there is
-         * still noticeable drift within a single frame which we do not account for.
-         */
-        if (retrace::profilingCpuTimes || retrace::profilingGpuTimes) {
-            int64_t cpuTime, gpuTime, error;
-
-            getCurrentTimes(cpuTime, gpuTime);
-            cpuTime = cpuTime - retrace::profiler.getBaseCpuTime();
-            gpuTime = gpuTime - retrace::profiler.getBaseGpuTime();
-            error   = gpuTime - cpuTime * (1.0E9 / os::timeFrequency);
-
-            retrace::profiler.setBaseGpuTime(retrace::profiler.getBaseGpuTime() + error);
-        }
-
          /* Indicate end of current frame */
          retrace::profiler.addFrameEnd();
      }
  
      retrace::frameComplete(call);
  
-    if (!currentDrawable) {
+    glretrace::Context *currentContext = glretrace::getCurrentContext();
+    if (!currentContext) {
          return;
      }
  
-    if (retrace::debug && !currentDrawable->visible) {
+    assert(currentContext->drawable);
+    if (retrace::debug && !currentContext->drawable->visible) {
          retrace::warning(call) << "could not infer drawable size (glViewport never called)\n";
      }
  }
@@ -384,9 +422,35 @@ debugOutputCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsi
  } /* namespace glretrace */
  
  
+class GLDumper : public retrace::Dumper {
+public:
+    image::Image *
+    getSnapshot(void) {
+        if (!glretrace::getCurrentContext()) {
+            return NULL;
+        }
+        return glstate::getDrawBufferImage();
+    }
+
+    bool
+    dumpState(std::ostream &os) {
+        glretrace::Context *currentContext = glretrace::getCurrentContext();
+        if (glretrace::insideGlBeginEnd ||
+            !currentContext) {
+            return false;
+        }
+        glstate::dumpCurrentContext(os);
+        return true;
+    }
+};
+
+static GLDumper glDumper;
+
+
  void
  retrace::setUp(void) {
      glws::init();
+    dumper = &glDumper;
  }
  
  
@@ -401,34 +465,13 @@ retrace::addCallbacks(retrace::Retracer &retracer)
  }
  
  
-image::Image *
-retrace::getSnapshot(void) {
-    if (!glretrace::currentDrawable) {
-        return NULL;
-    }
-
-    return glstate::getDrawBufferImage();
-}
-
-
-bool
-retrace::dumpState(std::ostream &os)
-{
-    if (glretrace::insideGlBeginEnd ||
-        !glretrace::currentDrawable ||
-        !glretrace::currentContext) {
-        return false;
-    }
-
-    glstate::dumpCurrentContext(os);
-
-    return true;
-}
-
  void
  retrace::flushRendering(void) {
-    glretrace::flushQueries();
-    glFlush();
+    glretrace::Context *currentContext = glretrace::getCurrentContext();
+    if (currentContext) {
+        glretrace::flushQueries();
+        glFlush();
+    }
  }
  
  void