]> git.cworth.org Git - fips/blob - metrics.c
Add collection of (AMD_performance_monitor) performance counters to fips
[fips] / metrics.c
1 /* Copyright © 2013, Intel Corporation
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  */
21
22 #define _GNU_SOURCE
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <assert.h>
27 #include <sys/time.h>
28
29 #include "fips-dispatch-gl.h"
30
31 #include "metrics.h"
32 #include "xmalloc.h"
33
34 /* Timer query */
35 typedef struct timer_query
36 {
37         unsigned id;
38
39         metrics_op_t op;
40         struct timer_query *next;
41 } timer_query_t;
42
43 /* Performance-monitor query */
44 typedef struct monitor
45 {
46         unsigned id;
47
48         metrics_op_t op;
49         struct monitor *next;
50 } monitor_t;
51
52
53 typedef struct op_metrics
54 {
55         /* This happens to also be the index into the
56          * ctx->op_metrics array currently
57          */
58         metrics_op_t op;
59         double time_ns;
60
61         double *counters;
62         unsigned num_counters;
63 } op_metrics_t;
64
65 typedef struct counter_group_info
66 {
67         GLuint id;
68         GLint num_counters;
69         GLint max_active_counters;
70         GLuint *counters;
71 } counter_group_info_t;
72
73 typedef struct metrics_info
74 {
75         int num_groups;
76         int max_counters_per_group;
77         counter_group_info_t *groups;
78 } metrics_info_t;
79
80 typedef struct context
81 {
82         metrics_info_t metrics_info;
83
84         metrics_op_t op;
85
86         timer_query_t *timer_head;
87         timer_query_t *timer_tail;
88
89         monitor_t *monitor_head;
90         monitor_t *monitor_tail;
91
92         unsigned num_op_metrics;
93         op_metrics_t *op_metrics;
94 } context_t;
95
96 /* FIXME: Need a map from integers to context objects and track the
97  * current context with glXMakeContextCurrent, eglMakeCurrent, etc. */
98
99 context_t current_context;
100
101 int frames;
102 int verbose;
103
104 void
105 metrics_info_init (void)
106 {
107         int i;
108         GLuint *group_ids;
109         metrics_info_t *metrics_info = &current_context.metrics_info;
110
111         glGetPerfMonitorGroupsAMD (&metrics_info->num_groups, 0, NULL);
112
113         group_ids = xmalloc (metrics_info->num_groups * sizeof (GLuint));
114
115         glGetPerfMonitorGroupsAMD (NULL, metrics_info->num_groups, group_ids);
116
117         metrics_info->max_counters_per_group = 0;
118
119         metrics_info->groups = xmalloc (metrics_info->num_groups * sizeof (counter_group_info_t));
120
121         for (i = 0; i < metrics_info->num_groups; i++)
122         {
123                 counter_group_info_t *group;
124
125                 group = &metrics_info->groups[i];
126
127                 group->id = group_ids[i];
128
129                 glGetPerfMonitorCountersAMD (group->id, &group->num_counters,
130                                              &group->max_active_counters, 0, NULL);
131
132                 group->counters = xmalloc (group->num_counters * sizeof (GLuint));
133
134                 glGetPerfMonitorCountersAMD (group->id, NULL, NULL,
135                                              group->num_counters,
136                                              group->counters);
137
138                 if (group->num_counters > metrics_info->max_counters_per_group)
139                         metrics_info->max_counters_per_group = group->num_counters;
140         }
141
142         free (group_ids);
143 }
144
145 static const char *
146 metrics_op_string (metrics_op_t op)
147 {
148         if (op >= METRICS_OP_SHADER)
149                 return "Shader program";
150
151         switch (op)
152         {
153         case METRICS_OP_ACCUM:
154                 return "glAccum*(+)";
155         case METRICS_OP_BUFFER_DATA:
156                 return "glBufferData(+)";
157         case METRICS_OP_BUFFER_SUB_DATA:
158                 return "glCopyBufferSubData*";
159         case METRICS_OP_BITMAP:
160                 return "glBitmap*";
161         case METRICS_OP_BLIT_FRAMEBUFFER:
162                 return "glBlitFramebuffer*";
163         case METRICS_OP_CLEAR:
164                 return "glClear(+)";
165         case METRICS_OP_CLEAR_BUFFER_DATA:
166                 return "glCearBufferData(+)";
167         case METRICS_OP_CLEAR_TEX_IMAGE:
168                 return "glClearTexImage(+)";
169         case METRICS_OP_COPY_PIXELS:
170                 return "glCopyPixels";
171         case METRICS_OP_COPY_TEX_IMAGE:
172                 return "glCopyTexImage(+)";
173         case METRICS_OP_DRAW_PIXELS:
174                 return "glDrawPixels";
175         case METRICS_OP_GET_TEX_IMAGE:
176                 return "glGetTexImage(+)";
177         case METRICS_OP_READ_PIXELS:
178                 return "glReadPixels*";
179         case METRICS_OP_TEX_IMAGE:
180                 return "glTexImage*(+)";
181         default:
182                 fprintf (stderr, "Internal error: "
183                          "Unknown metrics op value: %d\n", op);
184                 exit (1);
185         }
186
187         return "";
188 }
189
190 void
191 metrics_counter_start (void)
192 {
193         context_t *ctx = &current_context;
194         timer_query_t *timer;
195         monitor_t *monitor;
196         int i;
197
198         /* Create new timer query, add to list */
199         timer = xmalloc (sizeof (timer_query_t));
200
201         timer->op = ctx->op;
202         timer->next = NULL;
203
204         if (ctx->timer_tail) {
205                 ctx->timer_tail->next = timer;
206                 ctx->timer_tail = timer;
207         } else {
208                 ctx->timer_tail = timer;
209                 ctx->timer_head = timer;
210         }
211
212         /* Create a new performance-monitor query */
213         monitor = xmalloc (sizeof (monitor_t));
214
215         monitor->op = ctx->op;
216         monitor->next = NULL;
217
218         if (ctx->monitor_tail) {
219                 ctx->monitor_tail->next = monitor;
220                 ctx->monitor_tail = monitor;
221         } else {
222                 ctx->monitor_tail = monitor;
223                 ctx->monitor_head = monitor;
224         }
225
226         /* Initialize the timer_query and monitor objects */
227         glGenQueries (1, &timer->id);
228
229         glGenPerfMonitorsAMD (1, &monitor->id);
230
231         for (i = 0; i < ctx->metrics_info.num_groups; i++)
232         {
233                 counter_group_info_t *group;
234                 int num_counters;
235
236                 group = &ctx->metrics_info.groups[i];
237
238                 num_counters = group->num_counters;
239                 if (group->max_active_counters < group->num_counters)
240                 {
241                         fprintf (stderr, "Warning: Only monitoring %d/%d counters from group %d\n",
242                                  group->max_active_counters,
243                                  group->num_counters, i);
244                         num_counters = group->max_active_counters;
245
246                 }
247
248                 glSelectPerfMonitorCountersAMD(monitor->id,
249                                                GL_TRUE, group->id,
250                                                num_counters,
251                                                group->counters);
252         }
253
254         /* Start the queries */
255         glBeginQuery (GL_TIME_ELAPSED, timer->id);
256
257         glBeginPerfMonitorAMD (monitor->id);
258 }
259
260 void
261 metrics_counter_stop (void)
262 {
263         glEndQuery (GL_TIME_ELAPSED);
264         glEndPerfMonitorAMD (current_context.monitor_tail->id);
265 }
266
267 void
268 metrics_set_current_op (metrics_op_t op)
269 {
270         current_context.op = op;
271 }
272
273 metrics_op_t
274 metrics_get_current_op (void)
275 {
276         return current_context.op;
277 }
278
279 static void
280 op_metrics_init (context_t *ctx, op_metrics_t *metrics, metrics_op_t op)
281 {
282         metrics_info_t *info = &ctx->metrics_info;
283         unsigned i;
284
285         metrics->op = op;
286         metrics->time_ns = 0.0;
287
288         metrics->num_counters = info->num_groups * info->max_counters_per_group;
289         metrics->counters = xmalloc (sizeof(double) * metrics->num_counters);
290
291         for (i = 0; i < metrics->num_counters; i++)
292                 metrics->counters[i] = 0.0;
293 }
294
295 static op_metrics_t *
296 ctx_get_op_metrics (context_t *ctx, metrics_op_t op)
297 {
298         unsigned i;
299
300         if (op >= ctx->num_op_metrics)
301         {
302                 ctx->op_metrics = realloc (ctx->op_metrics,
303                                            (op + 1) * sizeof (op_metrics_t));
304                 for (i = ctx->num_op_metrics; i < op + 1; i++)
305                         op_metrics_init (ctx, &ctx->op_metrics[i], i);
306
307                 ctx->num_op_metrics = op + 1;
308         }
309
310         return &ctx->op_metrics[op];
311 }
312
313 static void
314 accumulate_program_metrics (metrics_op_t op, GLuint *result, GLuint size)
315 {
316 #define CONSUME(var)                                                    \
317         if (p + sizeof(var) > ((unsigned char *) result) + size)        \
318         {                                                               \
319                 fprintf (stderr, "Unexpected end-of-buffer while "      \
320                          "parsing results\n");                          \
321                 break;                                                  \
322         }                                                               \
323         (var) = *((typeof(var) *) p);                                   \
324         p += sizeof(var);
325
326         context_t *ctx = &current_context;
327         unsigned char *p = (unsigned char *) result;
328
329         while (p < ((unsigned char *) result) + size)
330         {
331                 GLuint group_id, counter_id, counter_type;
332                 uint32_t value;
333                 unsigned i;
334
335                 CONSUME (group_id);
336                 CONSUME (counter_id);
337
338                 glGetPerfMonitorCounterInfoAMD (group_id, counter_id,
339                                                 GL_COUNTER_TYPE_AMD,
340                                                 &counter_type);
341
342                 /* We assume that all peformance counters are made
343                  * available as uint32 values. This code can easily be
344                  * extended as needed. */
345                 if (counter_type != GL_UNSIGNED_INT) {
346                         fprintf (stderr, "Warning: Non-uint counter value. Ignoring remainder of results\n");
347                         break;
348                 }
349
350                 CONSUME (value);
351
352                 i = (group_id * ctx->metrics_info.max_counters_per_group +
353                      counter_id);
354
355                 assert (i < ctx->op_metrics[op].num_counters);
356
357                 /* FIXME: While I'm still occasionally getting bogus
358                  * numbers from the performance counters, I'm simply
359                  * going to discard anything larger than half the
360                  * range, (something that looks like a negative signed
361                  * quantity).
362                  */
363                 if (((int32_t) value) < 0)
364                         fprintf (stderr, ".");
365                 else
366                         ctx->op_metrics[op].counters[i] += value;
367         }
368 }
369
370 static void
371 accumulate_program_time (metrics_op_t op, unsigned time_ns)
372 {
373         op_metrics_t *metrics;
374
375         metrics = ctx_get_op_metrics (&current_context, op);
376
377         metrics->time_ns += time_ns;
378 }
379
380 static int
381 time_compare(const void *in_a, const void *in_b, void *arg)
382 {
383         int a = *(const int *)in_a;
384         int b = *(const int *)in_b;
385         struct op_metrics *metrics = arg;
386
387         if (metrics[a].time_ns < metrics[b].time_ns)
388                 return -1;
389         if (metrics[a].time_ns > metrics[b].time_ns)
390                 return 1;
391         return 0;
392 }
393
394 static void
395 print_program_metrics (void)
396 {
397         context_t *ctx = &current_context;
398         int *sorted; /* Sorted indices into the ctx->op_metrics */
399         double total = 0;
400         unsigned i, j;
401
402         /* Make a sorted list of the operations by time used, and figure
403          * out the total so we can print percentages.
404          */
405         sorted = calloc(ctx->num_op_metrics, sizeof(*sorted));
406         for (i = 0; i < ctx->num_op_metrics; i++) {
407                 sorted[i] = i;
408                 total += ctx->op_metrics[i].time_ns;
409         }
410         qsort_r(sorted, ctx->num_op_metrics, sizeof(*sorted),
411                 time_compare, ctx->op_metrics);
412
413         for (i = 0; i < ctx->num_op_metrics; i++) {
414                 const char *op_string;
415                 op_metrics_t *metric =&ctx->op_metrics[sorted[i]];
416
417                 /* Since we sparsely fill the array based on program
418                  * id, many "programs" have no time.
419                  */
420                 if (metric->time_ns == 0.0)
421                         continue;
422
423                 op_string = metrics_op_string (metric->op);
424
425                 printf ("%s", op_string);
426                 if (metric->op >= METRICS_OP_SHADER) {
427                         printf (" %d:", metric->op - METRICS_OP_SHADER);
428                 } else {
429                         printf (":");
430                         for (j = strlen (op_string); j < 20; j++)
431                                 printf (" ");
432                 }
433                 printf ("\t%7.2f ms (% 2.1f%%)",
434                         metric->time_ns / 1e6,
435                         metric->time_ns / total * 100);
436                 printf ("[");
437                 for (j = 0; j < metric->num_counters; j++) {
438                         if (metric->counters[j] == 0.0)
439                                 continue;
440                         printf ("%d: %.2f ms ", j, metric->counters[j] / 1e6);
441                 }
442                 printf ("]\n");
443         }
444 }
445
446 /* Called at program exit */
447 static void
448 metrics_exit (void)
449 {
450         if (verbose)
451                 printf ("fips: terminating\n");
452 }
453
454
455 void
456 metrics_end_frame (void)
457 {
458         static int initialized = 0;
459         static struct timeval tv_start, tv_now;
460
461         if (! initialized) {
462                 gettimeofday (&tv_start, NULL);
463                 atexit (metrics_exit);
464                 if (getenv ("FIPS_VERBOSE"))
465                         verbose = 1;
466                 initialized = 1;
467         }
468
469         if (verbose)
470                 printf ("fips: frame %d complete\n", frames);
471
472         frames++;
473         gettimeofday (&tv_now, NULL);
474
475         /* Consume all timer queries that are ready. */
476         timer_query_t *timer = current_context.timer_head;
477
478         while (timer) {
479                 GLuint available, elapsed;
480
481                 glGetQueryObjectuiv (timer->id,
482                                      GL_QUERY_RESULT_AVAILABLE, &available);
483                 if (! available)
484                         break;
485
486                 glGetQueryObjectuiv (timer->id,
487                                      GL_QUERY_RESULT, &elapsed);
488
489                 accumulate_program_time (timer->op, elapsed);
490
491                 current_context.timer_head = timer->next;
492                 if (current_context.timer_head == NULL)
493                         current_context.timer_tail = NULL;
494
495                 glDeleteQueries (1, &timer->id);
496
497                 free (timer);
498                 timer = current_context.timer_head;
499         }
500
501         /* And similarly for all performance monitors that are ready. */
502         monitor_t *monitor = current_context.monitor_head;
503
504         while (monitor) {
505                 GLuint available, result_size, *result;
506                 GLint bytes_written;
507
508                 glGetPerfMonitorCounterDataAMD (monitor->id,
509                                                 GL_PERFMON_RESULT_AVAILABLE_AMD,
510                                                 sizeof (available), &available,
511                                                 NULL);
512                 if (! available)
513                         break;
514
515                 glGetPerfMonitorCounterDataAMD (monitor->id,
516                                                 GL_PERFMON_RESULT_SIZE_AMD,
517                                                 sizeof (result_size),
518                                                 &result_size, NULL);
519
520                 result = xmalloc (result_size);
521
522                 glGetPerfMonitorCounterDataAMD (monitor->id,
523                                                 GL_PERFMON_RESULT_AMD,
524                                                 result_size, result,
525                                                 &bytes_written);
526
527                 accumulate_program_metrics (monitor->op, result, result_size);
528
529                 current_context.monitor_head = monitor->next;
530                 if (current_context.monitor_head == NULL)
531                         current_context.monitor_tail = NULL;
532
533                 glDeletePerfMonitorsAMD (1, &monitor->id);
534
535                 free (monitor);
536                 monitor = current_context.monitor_head;
537         }
538
539         if (frames % 60 == 0) {
540                 double fps;
541
542                 fps = (double) frames / (tv_now.tv_sec - tv_start.tv_sec +
543                                          (tv_now.tv_usec - tv_start.tv_usec) / 1.0e6);
544
545                 printf("FPS: %.3f\n", fps);
546
547                 print_program_metrics ();
548         }
549 }