]> git.cworth.org Git - fips/blob - metrics.c
5f054e130adc7b653d062043232c7b85dd82dc17
[fips] / metrics.c
1 /* Copyright © 2013, Intel Corporation
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  */
21
22 #define _GNU_SOURCE
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <assert.h>
27 #include <sys/time.h>
28
29 #include "fips-dispatch-gl.h"
30
31 #include "metrics.h"
32 #include "xmalloc.h"
33
34 /* Timer query */
35 typedef struct timer_query
36 {
37         unsigned id;
38
39         metrics_op_t op;
40         struct timer_query *next;
41 } timer_query_t;
42
43 /* Performance-monitor query */
44 typedef struct monitor
45 {
46         unsigned id;
47
48         metrics_op_t op;
49         struct monitor *next;
50 } monitor_t;
51
52
53 typedef struct op_metrics
54 {
55         /* This happens to also be the index into the
56          * ctx->op_metrics array currently
57          */
58         metrics_op_t op;
59         double time_ns;
60
61         double **counters;
62 } op_metrics_t;
63
64 typedef struct metrics_group_info
65 {
66         GLuint id;
67         char *name;
68
69         GLuint num_counters;
70         GLuint max_active_counters;
71
72         GLuint *counter_ids;
73         char **counter_names;
74         GLuint *counter_types;
75
76 } metrics_group_info_t;
77
78 typedef struct metrics_info
79 {
80         int initialized;
81
82         unsigned num_groups;
83         metrics_group_info_t *groups;
84 } metrics_info_t;
85
86 typedef struct context
87 {
88         metrics_info_t metrics_info;
89
90         metrics_op_t op;
91
92         timer_query_t *timer_head;
93         timer_query_t *timer_tail;
94
95         monitor_t *monitor_head;
96         monitor_t *monitor_tail;
97
98         unsigned num_op_metrics;
99         op_metrics_t *op_metrics;
100 } context_t;
101
102 /* FIXME: Need a map from integers to context objects and track the
103  * current context with glXMakeContextCurrent, eglMakeCurrent, etc. */
104
105 context_t current_context;
106
107 int frames;
108 int verbose;
109
110 static void
111 metrics_group_info_init (metrics_group_info_t *group, GLuint id)
112 {
113         GLsizei length;
114         unsigned i;
115
116         group->id = id;
117
118         /* Get name */
119         glGetPerfMonitorGroupStringAMD (id, 0, &length, NULL);
120
121         group->name = xmalloc (length + 1);
122
123         glGetPerfMonitorGroupStringAMD (id, length + 1, NULL, group->name);
124
125         /* Get number of counters */
126         group->num_counters = 0;
127         group->max_active_counters = 0;
128         glGetPerfMonitorCountersAMD (group->id,
129                                      (int *) &group->num_counters,
130                                      (int *) &group->max_active_counters,
131                                      0, NULL);
132
133         /* Get counter numbers */
134         group->counter_ids = xmalloc (group->num_counters * sizeof (GLuint));
135
136         glGetPerfMonitorCountersAMD (group->id, NULL, NULL,
137                                      group->num_counters,
138                                      group->counter_ids);
139
140         /* Get counter names */
141         group->counter_names = xmalloc (group->num_counters * sizeof (char *));
142         group->counter_types = xmalloc (group->num_counters * sizeof (GLuint));
143
144         for (i = 0; i < group->num_counters; i++) {
145                 glGetPerfMonitorCounterInfoAMD (group->id,
146                                                 group->counter_ids[i],
147                                                 GL_COUNTER_TYPE_AMD,
148                                                 &group->counter_types[i]);
149
150                 /* We assume that all peformance counters are made
151                  * available as uint32 values. The code calling
152                  * CONSUME in accumulate_program_metrics will need to
153                  * be extended to accomodate other counter values. */
154                 if (group->counter_types[i] != GL_UNSIGNED_INT) {
155                         fprintf (stderr, "fips: Internal error: No support for non-uint counter values\n");
156                         exit (1);
157                 }
158
159                 glGetPerfMonitorCounterStringAMD (group->id,
160                                                   group->counter_ids[i],
161                                                   0, &length, NULL);
162
163                 group->counter_names[i] = xmalloc (length + 1);
164
165                 glGetPerfMonitorCounterStringAMD (group->id,
166                                                   group->counter_ids[i],
167                                                   length + 1, NULL,
168                                                   group->counter_names[i]);
169         }
170 }
171
172 static void
173 metrics_group_info_fini (metrics_group_info_t *group)
174 {
175         unsigned i;
176
177         for (i = 0; i < group->num_counters; i++)
178                 free (group->counter_names[i]);
179
180         free (group->counter_types);
181         free (group->counter_names);
182         free (group->counter_ids);
183
184         free (group->name);
185 }
186
187 static void
188 metrics_info_fini (metrics_info_t *info);
189
190 void
191 metrics_info_init (void)
192 {
193         unsigned i;
194         GLuint *group_ids;
195         metrics_info_t *info = &current_context.metrics_info;
196
197         if (info->initialized)
198                 metrics_info_fini (info);
199
200         glGetPerfMonitorGroupsAMD ((int *) &info->num_groups, 0, NULL);
201
202         group_ids = xmalloc (info->num_groups * sizeof (GLuint));
203
204         glGetPerfMonitorGroupsAMD (NULL, info->num_groups, group_ids);
205
206         info->groups = xmalloc (info->num_groups * sizeof (metrics_group_info_t));
207
208         for (i = 0; i < info->num_groups; i++)
209                 metrics_group_info_init (&info->groups[i], group_ids[i]);
210
211         free (group_ids);
212
213         info->initialized = 1;
214 }
215
216 static void
217 metrics_info_fini (metrics_info_t *info)
218 {
219         unsigned i;
220
221         for (i = 0; i < info->num_groups; i++)
222                 metrics_group_info_fini (&info->groups[i]);
223
224         free (info->groups);
225 }
226
227 static const char *
228 metrics_op_string (metrics_op_t op)
229 {
230         if (op >= METRICS_OP_SHADER)
231                 return "Shader program";
232
233         switch (op)
234         {
235         case METRICS_OP_ACCUM:
236                 return "glAccum*(+)";
237         case METRICS_OP_BUFFER_DATA:
238                 return "glBufferData(+)";
239         case METRICS_OP_BUFFER_SUB_DATA:
240                 return "glCopyBufferSubData*";
241         case METRICS_OP_BITMAP:
242                 return "glBitmap*";
243         case METRICS_OP_BLIT_FRAMEBUFFER:
244                 return "glBlitFramebuffer*";
245         case METRICS_OP_CLEAR:
246                 return "glClear(+)";
247         case METRICS_OP_CLEAR_BUFFER_DATA:
248                 return "glCearBufferData(+)";
249         case METRICS_OP_CLEAR_TEX_IMAGE:
250                 return "glClearTexImage(+)";
251         case METRICS_OP_COPY_PIXELS:
252                 return "glCopyPixels";
253         case METRICS_OP_COPY_TEX_IMAGE:
254                 return "glCopyTexImage(+)";
255         case METRICS_OP_DRAW_PIXELS:
256                 return "glDrawPixels";
257         case METRICS_OP_GET_TEX_IMAGE:
258                 return "glGetTexImage(+)";
259         case METRICS_OP_READ_PIXELS:
260                 return "glReadPixels*";
261         case METRICS_OP_TEX_IMAGE:
262                 return "glTexImage*(+)";
263         default:
264                 fprintf (stderr, "fips: Internal error: "
265                          "Unknown metrics op value: %d\n", op);
266                 exit (1);
267         }
268
269         return "";
270 }
271
272 void
273 metrics_counter_start (void)
274 {
275         context_t *ctx = &current_context;
276         timer_query_t *timer;
277         monitor_t *monitor;
278         unsigned i;
279
280         /* Create new timer query, add to list */
281         timer = xmalloc (sizeof (timer_query_t));
282
283         timer->op = ctx->op;
284         timer->next = NULL;
285
286         if (ctx->timer_tail) {
287                 ctx->timer_tail->next = timer;
288                 ctx->timer_tail = timer;
289         } else {
290                 ctx->timer_tail = timer;
291                 ctx->timer_head = timer;
292         }
293
294         /* Create a new performance-monitor query */
295         monitor = xmalloc (sizeof (monitor_t));
296
297         monitor->op = ctx->op;
298         monitor->next = NULL;
299
300         if (ctx->monitor_tail) {
301                 ctx->monitor_tail->next = monitor;
302                 ctx->monitor_tail = monitor;
303         } else {
304                 ctx->monitor_tail = monitor;
305                 ctx->monitor_head = monitor;
306         }
307
308         /* Initialize the timer_query and monitor objects */
309         glGenQueries (1, &timer->id);
310
311         glGenPerfMonitorsAMD (1, &monitor->id);
312
313         for (i = 0; i < ctx->metrics_info.num_groups; i++)
314         {
315                 metrics_group_info_t *group;
316                 int num_counters;
317
318                 group = &ctx->metrics_info.groups[i];
319
320                 num_counters = group->num_counters;
321                 if (group->max_active_counters < group->num_counters)
322                 {
323                         fprintf (stderr, "Warning: Only monitoring %d/%d counters from group %d\n",
324                                  group->max_active_counters,
325                                  group->num_counters, i);
326                         num_counters = group->max_active_counters;
327
328                 }
329
330                 glSelectPerfMonitorCountersAMD(monitor->id,
331                                                GL_TRUE, group->id,
332                                                num_counters,
333                                                group->counter_ids);
334         }
335
336         /* Start the queries */
337         glBeginQuery (GL_TIME_ELAPSED, timer->id);
338
339         glBeginPerfMonitorAMD (monitor->id);
340 }
341
342 void
343 metrics_counter_stop (void)
344 {
345         glEndQuery (GL_TIME_ELAPSED);
346         glEndPerfMonitorAMD (current_context.monitor_tail->id);
347 }
348
349 void
350 metrics_set_current_op (metrics_op_t op)
351 {
352         current_context.op = op;
353 }
354
355 metrics_op_t
356 metrics_get_current_op (void)
357 {
358         return current_context.op;
359 }
360
361 static void
362 op_metrics_init (context_t *ctx, op_metrics_t *metrics, metrics_op_t op)
363 {
364         metrics_info_t *info = &ctx->metrics_info;
365         unsigned i, j;
366
367         metrics->op = op;
368         metrics->time_ns = 0.0;
369
370         metrics->counters = xmalloc (sizeof(double *) * info->num_groups);
371
372         for (i = 0; i < info->num_groups; i++) {
373                 metrics->counters[i] = xmalloc (sizeof (double) *
374                                                 info->groups[i].num_counters);
375                 for (j = 0; j < info->groups[i].num_counters; j++)
376                         metrics->counters[i][j] = 0.0;
377         }
378 }
379
380 static op_metrics_t *
381 ctx_get_op_metrics (context_t *ctx, metrics_op_t op)
382 {
383         unsigned i;
384
385         if (op >= ctx->num_op_metrics)
386         {
387                 ctx->op_metrics = realloc (ctx->op_metrics,
388                                            (op + 1) * sizeof (op_metrics_t));
389                 for (i = ctx->num_op_metrics; i < op + 1; i++)
390                         op_metrics_init (ctx, &ctx->op_metrics[i], i);
391
392                 ctx->num_op_metrics = op + 1;
393         }
394
395         return &ctx->op_metrics[op];
396 }
397
398 static void
399 accumulate_program_metrics (metrics_op_t op, GLuint *result, GLuint size)
400 {
401 #define CONSUME(var)                                                    \
402         if (p + sizeof(var) > ((unsigned char *) result) + size)        \
403         {                                                               \
404                 fprintf (stderr, "Unexpected end-of-buffer while "      \
405                          "parsing results\n");                          \
406                 break;                                                  \
407         }                                                               \
408         (var) = *((typeof(var) *) p);                                   \
409         p += sizeof(var);
410
411         context_t *ctx = &current_context;
412         metrics_info_t *info = &ctx->metrics_info;
413         unsigned char *p = (unsigned char *) result;
414
415         while (p < ((unsigned char *) result) + size)
416         {
417                 GLuint group_id, group_index;
418                 GLuint counter_id, counter_index;
419                 metrics_group_info_t *group;
420                 uint32_t value;
421                 unsigned i;
422
423                 CONSUME (group_id);
424                 CONSUME (counter_id);
425                 CONSUME (value);
426
427                 for (i = 0; i < info->num_groups; i++) {
428                         if (info->groups[i].id == i)
429                                 break;
430                 }
431                 group_index = i;
432                 assert (group_index < info->num_groups);
433                 group = &info->groups[group_index];
434
435                 for (i = 0; i < group->num_counters; i++) {
436                         if (group->counter_ids[i] == counter_id)
437                                 break;
438                 }
439                 counter_index = i;
440                 assert (counter_index < group->num_counters);
441
442                 ctx->op_metrics[op].counters[group_index][counter_index] += value;
443         }
444 }
445
446 static void
447 accumulate_program_time (metrics_op_t op, unsigned time_ns)
448 {
449         op_metrics_t *metrics;
450
451         metrics = ctx_get_op_metrics (&current_context, op);
452
453         metrics->time_ns += time_ns;
454 }
455
456 static int
457 time_compare(const void *in_a, const void *in_b, void *arg)
458 {
459         int a = *(const int *)in_a;
460         int b = *(const int *)in_b;
461         struct op_metrics *metrics = arg;
462
463         if (metrics[a].time_ns < metrics[b].time_ns)
464                 return -1;
465         if (metrics[a].time_ns > metrics[b].time_ns)
466                 return 1;
467         return 0;
468 }
469
470 static void
471 print_op_metrics (context_t *ctx, op_metrics_t *metric, double total)
472 {
473         metrics_info_t *info = &ctx->metrics_info;
474         metrics_group_info_t *group;
475         const char *op_string;
476         unsigned i, group_index, counter;
477         double value;
478
479         /* Since we sparsely fill the array based on program
480          * id, many "programs" have no time.
481          */
482         if (metric->time_ns == 0.0)
483                 return;
484
485         op_string = metrics_op_string (metric->op);
486
487         printf ("%s", op_string);
488         if (metric->op >= METRICS_OP_SHADER) {
489                 printf (" %d:", metric->op - METRICS_OP_SHADER);
490         } else {
491                 printf (":");
492                 for (i = strlen (op_string); i < 20; i++)
493                         printf (" ");
494         }
495
496         printf ("\t%7.2f ms (% 2.1f%%)",
497                 metric->time_ns / 1e6,
498                 metric->time_ns / total * 100);
499
500         printf ("[");
501         for (group_index = 0; group_index < info->num_groups; group_index++) {
502                 group = &info->groups[group_index];
503                 for (counter = 0; counter < group->num_counters; counter++) {
504                         value = metric->counters[group_index][counter];
505                         if (value == 0.0)
506                                 continue;
507                         printf ("%s: %.2f ", group->counter_names[counter],
508                                 value / 1e6);
509                 }
510         }
511         printf ("]\n");
512 }
513
514 static void
515 print_program_metrics (void)
516 {
517         context_t *ctx = &current_context;
518         int *sorted; /* Sorted indices into the ctx->op_metrics */
519         double total = 0;
520         unsigned i;
521
522         /* Make a sorted list of the operations by time used, and figure
523          * out the total so we can print percentages.
524          */
525         sorted = calloc(ctx->num_op_metrics, sizeof(*sorted));
526         for (i = 0; i < ctx->num_op_metrics; i++) {
527                 sorted[i] = i;
528                 total += ctx->op_metrics[i].time_ns;
529         }
530         qsort_r(sorted, ctx->num_op_metrics, sizeof(*sorted),
531                 time_compare, ctx->op_metrics);
532
533         for (i = 0; i < ctx->num_op_metrics; i++)
534                 print_op_metrics (ctx, &ctx->op_metrics[sorted[i]], total);
535
536         free (sorted);
537 }
538
539 /* Called at program exit */
540 static void
541 metrics_exit (void)
542 {
543         if (verbose)
544                 printf ("fips: terminating\n");
545
546         metrics_info_fini (&current_context.metrics_info);
547 }
548
549
550 void
551 metrics_end_frame (void)
552 {
553         static int initialized = 0;
554         static struct timeval tv_start, tv_now;
555
556         if (! initialized) {
557                 gettimeofday (&tv_start, NULL);
558                 atexit (metrics_exit);
559                 if (getenv ("FIPS_VERBOSE"))
560                         verbose = 1;
561                 initialized = 1;
562         }
563
564         if (verbose)
565                 printf ("fips: frame %d complete\n", frames);
566
567         frames++;
568         gettimeofday (&tv_now, NULL);
569
570         /* Consume all timer queries that are ready. */
571         timer_query_t *timer = current_context.timer_head;
572
573         while (timer) {
574                 GLuint available, elapsed;
575
576                 glGetQueryObjectuiv (timer->id,
577                                      GL_QUERY_RESULT_AVAILABLE, &available);
578                 if (! available)
579                         break;
580
581                 glGetQueryObjectuiv (timer->id,
582                                      GL_QUERY_RESULT, &elapsed);
583
584                 accumulate_program_time (timer->op, elapsed);
585
586                 current_context.timer_head = timer->next;
587                 if (current_context.timer_head == NULL)
588                         current_context.timer_tail = NULL;
589
590                 glDeleteQueries (1, &timer->id);
591
592                 free (timer);
593                 timer = current_context.timer_head;
594         }
595
596         /* And similarly for all performance monitors that are ready. */
597         monitor_t *monitor = current_context.monitor_head;
598
599         while (monitor) {
600                 GLuint available, result_size, *result;
601                 GLint bytes_written;
602
603                 glGetPerfMonitorCounterDataAMD (monitor->id,
604                                                 GL_PERFMON_RESULT_AVAILABLE_AMD,
605                                                 sizeof (available), &available,
606                                                 NULL);
607                 if (! available)
608                         break;
609
610                 glGetPerfMonitorCounterDataAMD (monitor->id,
611                                                 GL_PERFMON_RESULT_SIZE_AMD,
612                                                 sizeof (result_size),
613                                                 &result_size, NULL);
614
615                 result = xmalloc (result_size);
616
617                 glGetPerfMonitorCounterDataAMD (monitor->id,
618                                                 GL_PERFMON_RESULT_AMD,
619                                                 result_size, result,
620                                                 &bytes_written);
621
622                 accumulate_program_metrics (monitor->op, result, result_size);
623
624                 free (result);
625
626                 current_context.monitor_head = monitor->next;
627                 if (current_context.monitor_head == NULL)
628                         current_context.monitor_tail = NULL;
629
630                 glDeletePerfMonitorsAMD (1, &monitor->id);
631
632                 free (monitor);
633                 monitor = current_context.monitor_head;
634         }
635
636         if (frames % 60 == 0) {
637                 double fps;
638
639                 fps = (double) frames / (tv_now.tv_sec - tv_start.tv_sec +
640                                          (tv_now.tv_usec - tv_start.tv_usec) / 1.0e6);
641
642                 printf("FPS: %.3f\n", fps);
643
644                 print_program_metrics ();
645         }
646 }