]> git.cworth.org Git - fips/commitdiff
Add collection of (AMD_performance_monitor) performance counters to fips
authorCarl Worth <cworth@cworth.org>
Tue, 22 Oct 2013 18:04:25 +0000 (11:04 -0700)
committerCarl Worth <cworth@cworth.org>
Tue, 22 Oct 2013 19:54:49 +0000 (12:54 -0700)
The implementation involves a linked-list of outstanding
performance-monitor queries next to the existing list of outstanding
timer queries.

The results from the performance counters are stored (without any
interpretation in an array of values next to the existing time values
within each op_metrics_t value for each operation.

The numbers are currently printed with simple counter numbers (no
names and no units) and with the values divided by 1e6. Counters with
values of zero are not printed.

Next steps from here that will make things useful:

1. Use relative number of cycels in each stage to apportion measured
   shader time among the various stages, (so that per-stage time
   numbers are sorted in the final report).

2. Print percentage active, (by looking at per-stage active and stall times)

3. Print names for counters (other than per-stage active and stall
   which will be used in the above two calculations).

4. Fix to silently ignore performance counters if the
   AMD_performance_monitor extension is not available.

eglwrap.c
glxwrap.c
metrics.c
metrics.h

index 9e583895f888cc3c53939ea4f4855b48073b17cd..497779ce3fd2048f03f518a98f51c70575c37d07 100644 (file)
--- a/eglwrap.c
+++ b/eglwrap.c
@@ -112,6 +112,8 @@ eglMakeCurrent (EGLDisplay display, EGLSurface draw, EGLSurface read,
 
        EGLWRAP_DEFER_WITH_RETURN (ret, eglMakeCurrent, display, draw, read, context);
 
+       metrics_info_init ();
+
        metrics_set_current_op (METRICS_OP_SHADER + 0);
        metrics_counter_start ();
 
index eda358313dc26b09570547c40987c56526c9306a..9a28d7d0f231cb395b818ceadee564e1a3767cca 100644 (file)
--- a/glxwrap.c
+++ b/glxwrap.c
@@ -88,6 +88,8 @@ glXMakeCurrent (Display *dpy, GLXDrawable drawable, GLXContext ctx)
 
        GLWRAP_DEFER_WITH_RETURN (ret, glXMakeCurrent, dpy, drawable, ctx);
 
+       metrics_info_init ();
+
        metrics_set_current_op (METRICS_OP_SHADER + 0);
        metrics_counter_start ();
 
@@ -103,6 +105,8 @@ glXMakeContextCurrent (Display *dpy, GLXDrawable drawable, GLXDrawable read, GLX
 
        GLWRAP_DEFER_WITH_RETURN (ret, glXMakeContextCurrent, dpy, drawable, read, ctx);
 
+       metrics_info_init ();
+
        metrics_set_current_op (METRICS_OP_SHADER + 0);
        metrics_counter_start ();
 
index 09083e639ab966d6e80c36845c8da3c76f447059..0c776e7ebec5cdc1fe950588688b985ac36b9895 100644 (file)
--- a/metrics.c
+++ b/metrics.c
@@ -23,7 +23,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-
+#include <assert.h>
 #include <sys/time.h>
 
 #include "fips-dispatch-gl.h"
 #include "metrics.h"
 #include "xmalloc.h"
 
-typedef struct counter
+/* Timer query */
+typedef struct timer_query
 {
        unsigned id;
 
        metrics_op_t op;
-       struct counter *next;
-} counter_t;
+       struct timer_query *next;
+} timer_query_t;
+
+/* Performance-monitor query */
+typedef struct monitor
+{
+       unsigned id;
+
+       metrics_op_t op;
+       struct monitor *next;
+} monitor_t;
+
 
 typedef struct op_metrics
 {
@@ -46,14 +57,37 @@ typedef struct op_metrics
         */
        metrics_op_t op;
        double time_ns;
+
+       double *counters;
+       unsigned num_counters;
 } op_metrics_t;
 
+typedef struct counter_group_info
+{
+       GLuint id;
+       GLint num_counters;
+       GLint max_active_counters;
+       GLuint *counters;
+} counter_group_info_t;
+
+typedef struct metrics_info
+{
+       int num_groups;
+       int max_counters_per_group;
+       counter_group_info_t *groups;
+} metrics_info_t;
+
 typedef struct context
 {
+       metrics_info_t metrics_info;
+
        metrics_op_t op;
 
-       counter_t *counter_head;
-       counter_t *counter_tail;
+       timer_query_t *timer_head;
+       timer_query_t *timer_tail;
+
+       monitor_t *monitor_head;
+       monitor_t *monitor_tail;
 
        unsigned num_op_metrics;
        op_metrics_t *op_metrics;
@@ -67,6 +101,47 @@ context_t current_context;
 int frames;
 int verbose;
 
+void
+metrics_info_init (void)
+{
+       int i;
+       GLuint *group_ids;
+       metrics_info_t *metrics_info = &current_context.metrics_info;
+
+       glGetPerfMonitorGroupsAMD (&metrics_info->num_groups, 0, NULL);
+
+       group_ids = xmalloc (metrics_info->num_groups * sizeof (GLuint));
+
+       glGetPerfMonitorGroupsAMD (NULL, metrics_info->num_groups, group_ids);
+
+       metrics_info->max_counters_per_group = 0;
+
+       metrics_info->groups = xmalloc (metrics_info->num_groups * sizeof (counter_group_info_t));
+
+       for (i = 0; i < metrics_info->num_groups; i++)
+       {
+               counter_group_info_t *group;
+
+               group = &metrics_info->groups[i];
+
+               group->id = group_ids[i];
+
+               glGetPerfMonitorCountersAMD (group->id, &group->num_counters,
+                                            &group->max_active_counters, 0, NULL);
+
+               group->counters = xmalloc (group->num_counters * sizeof (GLuint));
+
+               glGetPerfMonitorCountersAMD (group->id, NULL, NULL,
+                                            group->num_counters,
+                                            group->counters);
+
+               if (group->num_counters > metrics_info->max_counters_per_group)
+                       metrics_info->max_counters_per_group = group->num_counters;
+       }
+
+       free (group_ids);
+}
+
 static const char *
 metrics_op_string (metrics_op_t op)
 {
@@ -115,30 +190,78 @@ metrics_op_string (metrics_op_t op)
 void
 metrics_counter_start (void)
 {
-       counter_t *counter;
+       context_t *ctx = &current_context;
+       timer_query_t *timer;
+       monitor_t *monitor;
+       int i;
+
+       /* Create new timer query, add to list */
+       timer = xmalloc (sizeof (timer_query_t));
 
-       counter = xmalloc (sizeof(counter_t));
+       timer->op = ctx->op;
+       timer->next = NULL;
+
+       if (ctx->timer_tail) {
+               ctx->timer_tail->next = timer;
+               ctx->timer_tail = timer;
+       } else {
+               ctx->timer_tail = timer;
+               ctx->timer_head = timer;
+       }
 
-       glGenQueries (1, &counter->id);
+       /* Create a new performance-monitor query */
+       monitor = xmalloc (sizeof (monitor_t));
 
-       counter->op = current_context.op;
-       counter->next = NULL;
+       monitor->op = ctx->op;
+       monitor->next = NULL;
 
-       if (current_context.counter_tail) {
-               current_context.counter_tail->next = counter;
-               current_context.counter_tail = counter;
+       if (ctx->monitor_tail) {
+               ctx->monitor_tail->next = monitor;
+               ctx->monitor_tail = monitor;
        } else {
-               current_context.counter_tail = counter;
-               current_context.counter_head = counter;
+               ctx->monitor_tail = monitor;
+               ctx->monitor_head = monitor;
        }
 
-       glBeginQuery (GL_TIME_ELAPSED, counter->id);
+       /* Initialize the timer_query and monitor objects */
+       glGenQueries (1, &timer->id);
+
+       glGenPerfMonitorsAMD (1, &monitor->id);
+
+       for (i = 0; i < ctx->metrics_info.num_groups; i++)
+       {
+               counter_group_info_t *group;
+               int num_counters;
+
+               group = &ctx->metrics_info.groups[i];
+
+               num_counters = group->num_counters;
+               if (group->max_active_counters < group->num_counters)
+               {
+                       fprintf (stderr, "Warning: Only monitoring %d/%d counters from group %d\n",
+                                group->max_active_counters,
+                                group->num_counters, i);
+                       num_counters = group->max_active_counters;
+
+               }
+
+               glSelectPerfMonitorCountersAMD(monitor->id,
+                                              GL_TRUE, group->id,
+                                              num_counters,
+                                              group->counters);
+       }
+
+       /* Start the queries */
+       glBeginQuery (GL_TIME_ELAPSED, timer->id);
+
+       glBeginPerfMonitorAMD (monitor->id);
 }
 
 void
 metrics_counter_stop (void)
 {
        glEndQuery (GL_TIME_ELAPSED);
+       glEndPerfMonitorAMD (current_context.monitor_tail->id);
 }
 
 void
@@ -154,23 +277,104 @@ metrics_get_current_op (void)
 }
 
 static void
-accumulate_program_time (metrics_op_t op, unsigned time_ns)
+op_metrics_init (context_t *ctx, op_metrics_t *metrics, metrics_op_t op)
 {
-       context_t *ctx = &current_context;
+       metrics_info_t *info = &ctx->metrics_info;
        unsigned i;
 
-       if (op >= ctx->num_op_metrics) {
+       metrics->op = op;
+       metrics->time_ns = 0.0;
+
+       metrics->num_counters = info->num_groups * info->max_counters_per_group;
+       metrics->counters = xmalloc (sizeof(double) * metrics->num_counters);
+
+       for (i = 0; i < metrics->num_counters; i++)
+               metrics->counters[i] = 0.0;
+}
+
+static op_metrics_t *
+ctx_get_op_metrics (context_t *ctx, metrics_op_t op)
+{
+       unsigned i;
+
+       if (op >= ctx->num_op_metrics)
+       {
                ctx->op_metrics = realloc (ctx->op_metrics,
                                           (op + 1) * sizeof (op_metrics_t));
-               for (i = ctx->num_op_metrics; i < op + 1; i++) {
-                       ctx->op_metrics[i].op = i;
-                       ctx->op_metrics[i].time_ns = 0.0;
-               }
+               for (i = ctx->num_op_metrics; i < op + 1; i++)
+                       op_metrics_init (ctx, &ctx->op_metrics[i], i);
 
                ctx->num_op_metrics = op + 1;
        }
 
-       ctx->op_metrics[op].time_ns += time_ns;
+       return &ctx->op_metrics[op];
+}
+
+static void
+accumulate_program_metrics (metrics_op_t op, GLuint *result, GLuint size)
+{
+#define CONSUME(var)                                                   \
+       if (p + sizeof(var) > ((unsigned char *) result) + size)        \
+       {                                                               \
+               fprintf (stderr, "Unexpected end-of-buffer while "      \
+                        "parsing results\n");                          \
+               break;                                                  \
+       }                                                               \
+       (var) = *((typeof(var) *) p);                                   \
+       p += sizeof(var);
+
+       context_t *ctx = &current_context;
+       unsigned char *p = (unsigned char *) result;
+
+       while (p < ((unsigned char *) result) + size)
+       {
+               GLuint group_id, counter_id, counter_type;
+               uint32_t value;
+               unsigned i;
+
+               CONSUME (group_id);
+               CONSUME (counter_id);
+
+               glGetPerfMonitorCounterInfoAMD (group_id, counter_id,
+                                               GL_COUNTER_TYPE_AMD,
+                                               &counter_type);
+
+               /* We assume that all peformance counters are made
+                * available as uint32 values. This code can easily be
+                * extended as needed. */
+               if (counter_type != GL_UNSIGNED_INT) {
+                       fprintf (stderr, "Warning: Non-uint counter value. Ignoring remainder of results\n");
+                       break;
+               }
+
+               CONSUME (value);
+
+               i = (group_id * ctx->metrics_info.max_counters_per_group +
+                    counter_id);
+
+               assert (i < ctx->op_metrics[op].num_counters);
+
+               /* FIXME: While I'm still occasionally getting bogus
+                * numbers from the performance counters, I'm simply
+                * going to discard anything larger than half the
+                * range, (something that looks like a negative signed
+                * quantity).
+                */
+               if (((int32_t) value) < 0)
+                       fprintf (stderr, ".");
+               else
+                       ctx->op_metrics[op].counters[i] += value;
+       }
+}
+
+static void
+accumulate_program_time (metrics_op_t op, unsigned time_ns)
+{
+       op_metrics_t *metrics;
+
+       metrics = ctx_get_op_metrics (&current_context, op);
+
+       metrics->time_ns += time_ns;
 }
 
 static int
@@ -191,9 +395,9 @@ static void
 print_program_metrics (void)
 {
        context_t *ctx = &current_context;
-       unsigned i, j;
        int *sorted; /* Sorted indices into the ctx->op_metrics */
        double total = 0;
+       unsigned i, j;
 
        /* Make a sorted list of the operations by time used, and figure
         * out the total so we can print percentages.
@@ -226,9 +430,16 @@ print_program_metrics (void)
                        for (j = strlen (op_string); j < 20; j++)
                                printf (" ");
                }
-               printf ("\t%7.2f ms (% 2.1f%%)\n",
+               printf ("\t%7.2f ms (% 2.1f%%)",
                        metric->time_ns / 1e6,
                        metric->time_ns / total * 100);
+               printf ("[");
+               for (j = 0; j < metric->num_counters; j++) {
+                       if (metric->counters[j] == 0.0)
+                               continue;
+                       printf ("%d: %.2f ms ", j, metric->counters[j] / 1e6);
+               }
+               printf ("]\n");
        }
 }
 
@@ -261,29 +472,68 @@ metrics_end_frame (void)
        frames++;
        gettimeofday (&tv_now, NULL);
 
-       /* Consume all counters that are ready. */
-       counter_t *counter = current_context.counter_head;
+       /* Consume all timer queries that are ready. */
+       timer_query_t *timer = current_context.timer_head;
 
-       while (counter) {
+       while (timer) {
                GLuint available, elapsed;
 
-               glGetQueryObjectuiv (counter->id, GL_QUERY_RESULT_AVAILABLE,
-                                    &available);
+               glGetQueryObjectuiv (timer->id,
+                                    GL_QUERY_RESULT_AVAILABLE, &available);
+               if (! available)
+                       break;
+
+               glGetQueryObjectuiv (timer->id,
+                                    GL_QUERY_RESULT, &elapsed);
+
+               accumulate_program_time (timer->op, elapsed);
+
+               current_context.timer_head = timer->next;
+               if (current_context.timer_head == NULL)
+                       current_context.timer_tail = NULL;
+
+               glDeleteQueries (1, &timer->id);
+
+               free (timer);
+               timer = current_context.timer_head;
+       }
+
+       /* And similarly for all performance monitors that are ready. */
+       monitor_t *monitor = current_context.monitor_head;
+
+       while (monitor) {
+               GLuint available, result_size, *result;
+               GLint bytes_written;
+
+               glGetPerfMonitorCounterDataAMD (monitor->id,
+                                               GL_PERFMON_RESULT_AVAILABLE_AMD,
+                                               sizeof (available), &available,
+                                               NULL);
                if (! available)
                        break;
 
-               glGetQueryObjectuiv (counter->id, GL_QUERY_RESULT, &elapsed);
+               glGetPerfMonitorCounterDataAMD (monitor->id,
+                                               GL_PERFMON_RESULT_SIZE_AMD,
+                                               sizeof (result_size),
+                                               &result_size, NULL);
+
+               result = xmalloc (result_size);
+
+               glGetPerfMonitorCounterDataAMD (monitor->id,
+                                               GL_PERFMON_RESULT_AMD,
+                                               result_size, result,
+                                               &bytes_written);
 
-               accumulate_program_time (counter->op, elapsed);
+               accumulate_program_metrics (monitor->op, result, result_size);
 
-               current_context.counter_head = counter->next;
-               if (current_context.counter_head == NULL)
-                       current_context.counter_tail = NULL;
+               current_context.monitor_head = monitor->next;
+               if (current_context.monitor_head == NULL)
+                       current_context.monitor_tail = NULL;
 
-               glDeleteQueries (1, &counter->id);
+               glDeletePerfMonitorsAMD (1, &monitor->id);
 
-               free (counter);
-               counter = current_context.counter_head;
+               free (monitor);
+               monitor = current_context.monitor_head;
        }
 
        if (frames % 60 == 0) {
index 45fef33cabc93ae38d788be3950459d29970bede..accafe5f8e91f2660e46687dfc39cbf959b107cc 100644 (file)
--- a/metrics.h
+++ b/metrics.h
@@ -50,6 +50,15 @@ typedef enum
        METRICS_OP_SHADER
 } metrics_op_t;
 
+/* Initialize metrics info
+ *
+ * This queries the names and ranges for all available performance counters.
+ *
+ * This should be called once before any other metrics functions.
+ */
+void
+metrics_info_init (void);
+
 /* Start accumulating GPU time.
  *
  * The time accumulated will be accounted against the