]> git.cworth.org Git - fips/blob - metrics.c
6704572533a41b901fa93cf481d731c23f408707
[fips] / metrics.c
1 /* Copyright © 2013, Intel Corporation
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  */
21
22 #define _GNU_SOURCE
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <assert.h>
27 #include <sys/time.h>
28
29 #include "fips-dispatch-gl.h"
30
31 #include "metrics.h"
32 #include "context.h"
33 #include "metrics-info.h"
34 #include "xmalloc.h"
35
36 int frames;
37 int verbose;
38
39 #define MAX_MONITORS_IN_FLIGHT 1000
40
41 /* Timer query */
42 typedef struct timer_query
43 {
44         unsigned id;
45
46         metrics_op_t op;
47         struct timer_query *next;
48 } timer_query_t;
49
50 /* Performance-monitor query */
51 typedef struct monitor
52 {
53         unsigned id;
54
55         metrics_op_t op;
56         struct monitor *next;
57 } monitor_t;
58
59 typedef struct op_metrics
60 {
61         /* This happens to also be the index into the
62          * metrics->op_metrics array currently
63          */
64         metrics_op_t op;
65         double time_ns;
66
67         double **counters;
68 } op_metrics_t;
69
70 struct metrics
71 {
72         /* Description of all available peformance counters, counter
73          * groups, their names and IDs, etc. */
74         metrics_info_t *info;
75
76         /* The current operation being measured. */
77         metrics_op_t op;
78
79         /* GL_TIME_ELAPSED query for which glEndQuery has not yet
80          * been called. */
81         unsigned timer_begun_id;
82
83         /* GL_TIME_ELAPSED queries for which glEndQuery has been
84          * called, (but results have not yet been queried). */
85         timer_query_t *timer_head;
86         timer_query_t *timer_tail;
87
88         /* Performance monitor for which glEndPerfMonitorAMD has not
89          * yet been called. */
90         unsigned monitor_begun_id;
91
92         /* Performance monitors for which glEndPerfMonitorAMD has
93          * been called, (but results have not yet been queried). */
94         monitor_t *monitor_head;
95         monitor_t *monitor_tail;
96
97         int monitors_in_flight;
98
99         unsigned num_op_metrics;
100         op_metrics_t *op_metrics;
101 };
102
103 metrics_t *
104 metrics_create (metrics_info_t *info)
105 {
106         metrics_t *metrics;
107
108         metrics = xmalloc (sizeof (metrics_t));
109
110         metrics->info = info;
111
112         metrics->op = 0;
113
114         metrics->timer_begun_id = 0;
115
116         metrics->timer_head = NULL;
117         metrics->timer_tail = NULL;
118
119         metrics->monitor_begun_id = 0;
120
121         metrics->monitor_head = NULL;
122         metrics->monitor_tail = NULL;
123
124         metrics->monitors_in_flight = 0;
125
126         metrics->num_op_metrics = 0;
127         metrics->op_metrics = NULL;
128
129         return metrics;
130 }
131
132 void
133 metrics_fini (metrics_t *metrics)
134 {
135         timer_query_t *timer, *timer_next;
136         monitor_t *monitor, *monitor_next;
137
138         /* Discard and cleanup any outstanding queries. */
139         if (metrics->timer_begun_id) {
140                 glEndQuery (GL_TIME_ELAPSED);
141                 glDeleteQueries (1, &metrics->timer_begun_id);
142                 metrics->timer_begun_id = 0;
143         }
144
145         for (timer = metrics->timer_head;
146              timer;
147              timer = timer_next)
148         {
149                 glDeleteQueries (1, &timer->id);
150                 timer_next = timer->next;
151                 free (timer);
152         }
153         metrics->timer_head = NULL;
154         metrics->timer_tail = NULL;
155
156         if (metrics->monitor_begun_id) {
157                 glEndPerfMonitorAMD (metrics->monitor_begun_id);
158                 glDeletePerfMonitorsAMD (1, &metrics->monitor_begun_id);
159                 metrics->monitor_begun_id = 0;
160         }
161
162         for (monitor = metrics->monitor_head;
163              monitor;
164              monitor = monitor_next)
165         {
166                 glDeletePerfMonitorsAMD (1, &monitor->id);
167                 monitor_next = monitor->next;
168                 free (monitor);
169         }
170         metrics->monitor_head = NULL;
171         metrics->monitor_tail = NULL;
172
173         metrics->monitors_in_flight = 0;
174 }
175
176 void
177 metrics_destroy (metrics_t *metrics)
178 {
179         metrics_fini (metrics);
180
181         free (metrics);
182 }
183
184 static const char *
185 metrics_op_string (metrics_op_t op)
186 {
187         if (op >= METRICS_OP_SHADER)
188                 return "Shader program";
189
190         switch (op)
191         {
192         case METRICS_OP_ACCUM:
193                 return "glAccum*(+)";
194         case METRICS_OP_BUFFER_DATA:
195                 return "glBufferData(+)";
196         case METRICS_OP_BUFFER_SUB_DATA:
197                 return "glCopyBufferSubData*";
198         case METRICS_OP_BITMAP:
199                 return "glBitmap*";
200         case METRICS_OP_BLIT_FRAMEBUFFER:
201                 return "glBlitFramebuffer*";
202         case METRICS_OP_CLEAR:
203                 return "glClear(+)";
204         case METRICS_OP_CLEAR_BUFFER_DATA:
205                 return "glCearBufferData(+)";
206         case METRICS_OP_CLEAR_TEX_IMAGE:
207                 return "glClearTexImage(+)";
208         case METRICS_OP_COPY_PIXELS:
209                 return "glCopyPixels";
210         case METRICS_OP_COPY_TEX_IMAGE:
211                 return "glCopyTexImage(+)";
212         case METRICS_OP_DRAW_PIXELS:
213                 return "glDrawPixels";
214         case METRICS_OP_GET_TEX_IMAGE:
215                 return "glGetTexImage(+)";
216         case METRICS_OP_READ_PIXELS:
217                 return "glReadPixels*";
218         case METRICS_OP_TEX_IMAGE:
219                 return "glTexImage*(+)";
220         default:
221                 fprintf (stderr, "fips: Internal error: "
222                          "Unknown metrics op value: %d\n", op);
223                 exit (1);
224         }
225
226         return "";
227 }
228
229 void
230 metrics_counter_start (metrics_t *metrics)
231 {
232         unsigned i;
233
234         /* Initialize the timer_query and monitor objects */
235         glGenQueries (1, &metrics->timer_begun_id);
236
237         glGenPerfMonitorsAMD (1, &metrics->monitor_begun_id);
238
239         for (i = 0; i < metrics->info->num_groups; i++)
240         {
241                 metrics_group_info_t *group;
242                 int num_counters;
243
244                 group = &metrics->info->groups[i];
245
246                 num_counters = group->num_counters;
247                 if (group->max_active_counters < group->num_counters)
248                 {
249                         fprintf (stderr, "Warning: Only monitoring %d/%d counters from group %d\n",
250                                  group->max_active_counters,
251                                  group->num_counters, i);
252                         num_counters = group->max_active_counters;
253
254                 }
255
256                 glSelectPerfMonitorCountersAMD(metrics->monitor_begun_id,
257                                                GL_TRUE, group->id,
258                                                num_counters,
259                                                group->counter_ids);
260         }
261
262         /* Start the queries */
263         glBeginQuery (GL_TIME_ELAPSED, metrics->timer_begun_id);
264
265         glBeginPerfMonitorAMD (metrics->monitor_begun_id);
266 }
267
268 void
269 metrics_counter_stop (metrics_t *metrics)
270 {
271         timer_query_t *timer;
272         monitor_t *monitor;
273
274         /* Stop the current timer and monitor. */
275         glEndQuery (GL_TIME_ELAPSED);
276         glEndPerfMonitorAMD (metrics->monitor_begun_id);
277
278         /* Add these IDs to our lists of outstanding queries and
279          * monitors so the results can be collected later. */
280         timer = xmalloc (sizeof (timer_query_t));
281
282         timer->op = metrics->op;
283         timer->id = metrics->timer_begun_id;
284         timer->next = NULL;
285
286         if (metrics->timer_tail) {
287                 metrics->timer_tail->next = timer;
288                 metrics->timer_tail = timer;
289         } else {
290                 metrics->timer_tail = timer;
291                 metrics->timer_head = timer;
292         }
293
294         /* Create a new performance-monitor query */
295         monitor = xmalloc (sizeof (monitor_t));
296
297         monitor->op = metrics->op;
298         monitor->id = metrics->monitor_begun_id;
299         monitor->next = NULL;
300
301         if (metrics->monitor_tail) {
302                 metrics->monitor_tail->next = monitor;
303                 metrics->monitor_tail = monitor;
304         } else {
305                 metrics->monitor_tail = monitor;
306                 metrics->monitor_head = monitor;
307         }
308
309         metrics->monitors_in_flight++;
310
311         /* Avoid being a resource hog and collect outstanding results
312          * once we have sent off a large number of
313          * queries. (Presumably, many of the outstanding queries are
314          * available by now.)
315          */
316         if (metrics->monitors_in_flight > MAX_MONITORS_IN_FLIGHT)
317                 metrics_collect_available (metrics);
318 }
319
320 void
321 metrics_set_current_op (metrics_t *metrics, metrics_op_t op)
322 {
323         metrics->op = op;
324 }
325
326 metrics_op_t
327 metrics_get_current_op (metrics_t *metrics)
328 {
329         return metrics->op;
330 }
331
332 static void
333 op_metrics_init (metrics_info_t *info, op_metrics_t *metrics, metrics_op_t op)
334 {
335         unsigned i, j;
336
337         metrics->op = op;
338         metrics->time_ns = 0.0;
339
340         metrics->counters = xmalloc (sizeof(double *) * info->num_groups);
341
342         for (i = 0; i < info->num_groups; i++) {
343                 metrics->counters[i] = xmalloc (sizeof (double) *
344                                                 info->groups[i].num_counters);
345                 for (j = 0; j < info->groups[i].num_counters; j++)
346                         metrics->counters[i][j] = 0.0;
347         }
348 }
349
350 static op_metrics_t *
351 _get_op_metrics (metrics_t *metrics, metrics_op_t op)
352 {
353         unsigned i;
354
355         if (op >= metrics->num_op_metrics)
356         {
357                 metrics->op_metrics = realloc (metrics->op_metrics,
358                                                (op + 1) * sizeof (op_metrics_t));
359                 for (i = metrics->num_op_metrics; i < op + 1; i++)
360                         op_metrics_init (metrics->info, &metrics->op_metrics[i], i);
361
362                 metrics->num_op_metrics = op + 1;
363         }
364
365         return &metrics->op_metrics[op];
366 }
367
368 static void
369 accumulate_program_metrics (metrics_t *metrics, metrics_op_t op,
370                             GLuint *result, GLuint size)
371 {
372 #define CONSUME(var)                                                    \
373         if (p + sizeof(var) > ((unsigned char *) result) + size)        \
374         {                                                               \
375                 fprintf (stderr, "Unexpected end-of-buffer while "      \
376                          "parsing results\n");                          \
377                 break;                                                  \
378         }                                                               \
379         (var) = *((typeof(var) *) p);                                   \
380         p += sizeof(var);
381
382         metrics_info_t *info = metrics->info;
383         op_metrics_t *op_metrics = _get_op_metrics (metrics, op);
384         unsigned char *p = (unsigned char *) result;
385
386         while (p < ((unsigned char *) result) + size)
387         {
388                 GLuint group_id, group_index;
389                 GLuint counter_id, counter_index;
390                 metrics_group_info_t *group;
391                 double value;
392                 unsigned i;
393
394                 CONSUME (group_id);
395                 CONSUME (counter_id);
396
397                 for (i = 0; i < info->num_groups; i++) {
398                         if (info->groups[i].id == group_id)
399                                 break;
400                 }
401                 group_index = i;
402                 assert (group_index < info->num_groups);
403                 group = &info->groups[group_index];
404
405                 for (i = 0; i < group->num_counters; i++) {
406                         if (group->counter_ids[i] == counter_id)
407                                 break;
408                 }
409                 counter_index = i;
410                 assert (counter_index < group->num_counters);
411
412                 switch (group->counter_types[counter_index])
413                 {
414                         uint uint_value;
415                         uint64_t uint64_value;
416                         float float_value;
417                 case GL_UNSIGNED_INT:
418                         CONSUME (uint_value);
419                         value = uint_value;
420                         break;
421                 case GL_UNSIGNED_INT64_AMD:
422                         CONSUME (uint64_value);
423                         value = uint64_value;
424                         break;
425                 case GL_PERCENTAGE_AMD:
426                 case GL_FLOAT:
427                         CONSUME (float_value);
428                         value = float_value;
429                         break;
430                 default:
431                         fprintf (stderr, "fips: Warning: Unknown counter value type (%d)\n",
432                                  group->counter_types[counter_index]);
433                         value = 0.0;
434                         break;
435                 }
436
437                 op_metrics->counters[group_index][counter_index] += value;
438         }
439 }
440
441 static void
442 accumulate_program_time (metrics_t *metrics, metrics_op_t op, unsigned time_ns)
443 {
444         op_metrics_t *op_metrics;
445
446         op_metrics = _get_op_metrics (metrics, op);
447
448         op_metrics->time_ns += time_ns;
449 }
450
451 typedef struct per_stage_metrics
452 {
453         op_metrics_t *metrics;
454         shader_stage_info_t *stage;
455         double time_ns;
456         double active;
457 } per_stage_metrics_t;
458
459 static int
460 _is_shader_stage_counter (metrics_info_t *info,
461                           unsigned group_index,
462                           unsigned counter_index)
463 {
464         shader_stage_info_t *stage;
465         unsigned i;
466
467         for (i = 0; i < info->num_shader_stages; i++) {
468                 stage = &info->stages[i];
469
470                 if (stage->active_group_index == group_index &&
471                     stage->active_counter_index == counter_index)
472                 {
473                         return 1;
474                 }
475
476                 if (stage->stall_group_index == group_index &&
477                     stage->stall_counter_index == counter_index)
478                 {
479                         return 1;
480                 }
481         }
482
483         return 0;
484 }
485
486 static void
487 print_per_stage_metrics (metrics_t *metrics,
488                          per_stage_metrics_t *per_stage,
489                          double total)
490 {
491         metrics_info_t *info = metrics->info;
492         op_metrics_t *op_metrics = per_stage->metrics;
493         metrics_group_info_t *group;
494         const char *op_string;
495         unsigned group_index, counter;
496         double value;
497
498         /* Don't print anything for stages with no alloted time. */
499         if (per_stage->time_ns == 0.0)
500                 return;
501
502         op_string = metrics_op_string (op_metrics->op);
503
504         printf ("%21s", op_string);
505
506         if (op_metrics->op >= METRICS_OP_SHADER) {
507                 printf (" %3d", op_metrics->op - METRICS_OP_SHADER);
508         } else {
509                 printf ("    ");
510
511         }
512
513         if (per_stage->stage)
514                 printf (" %cS:", per_stage->stage->name[0]);
515         else
516                 printf ("   :");
517
518         printf ("\t%7.2f ms (%4.1f%%)",
519                 per_stage->time_ns / 1e6,
520                 per_stage->time_ns / total * 100);
521
522         if (per_stage->active)
523                 printf (", %4.1f%% active", per_stage->active * 100);
524
525         printf ("\n");
526
527         /* I'm not seeing a lot of value printing the rest of these
528          * performance counters by default yet. Use --verbose to get
529          * them for now. */
530         if (! verbose)
531                 return;
532
533         printf ("[");
534         for (group_index = 0; group_index < info->num_groups; group_index++) {
535                 group = &info->groups[group_index];
536                 for (counter = 0; counter < group->num_counters; counter++) {
537
538                         /* Don't print this counter value if it's a
539                          * per-stage cycle counter, (which we have
540                          * already accounted for). */
541                         if (_is_shader_stage_counter (info, group_index, counter))
542                                 continue;
543
544                         value = op_metrics->counters[group_index][counter];
545                         if (value == 0.0)
546                                 continue;
547                         printf ("%s: %.2f ", group->counter_names[counter],
548                                 value / 1e6);
549                 }
550         }
551         printf ("]\n");
552 }
553
554 static int
555 time_compare(const void *in_a, const void *in_b, void *arg unused)
556 {
557         const per_stage_metrics_t *a = in_a;
558         const per_stage_metrics_t *b = in_b;
559
560
561         if (a->time_ns < b->time_ns)
562                 return -1;
563         if (a->time_ns > b->time_ns)
564                 return 1;
565         return 0;
566 }
567
568 static void
569 print_program_metrics (metrics_t *metrics)
570 {
571         metrics_info_t *info = metrics->info;
572         unsigned num_shader_stages = info->num_shader_stages;
573         per_stage_metrics_t *sorted, *per_stage;
574         double total_time, op_cycles;
575         op_metrics_t *op;
576         unsigned group_index, counter_index;
577         unsigned i, j, num_sorted;
578
579         /* Make a sorted list of the per-stage operations by time
580          * used, and figure out the total so we can print percentages.
581          */
582         num_sorted = metrics->num_op_metrics * num_shader_stages;
583
584         sorted = xmalloc (sizeof (*sorted) * num_sorted);
585
586         total_time = 0.0;
587
588         for (i = 0; i < metrics->num_op_metrics; i++) {
589
590                 op = &metrics->op_metrics[i];
591
592                 /* Accumulate total time across all ops. */
593                 total_time += op->time_ns;
594
595                 /* Also, find total cycles in all stages of this op. */
596                 op_cycles = 0.0;
597
598                 for (j = 0; j < num_shader_stages; j++) {
599                         /* Active cycles */
600                         group_index = info->stages[j].active_group_index;
601                         counter_index = info->stages[j].active_counter_index;
602                         op_cycles += op->counters[group_index][counter_index];
603
604                         /* Stall cycles */
605                         group_index = info->stages[j].stall_group_index;
606                         counter_index = info->stages[j].stall_counter_index;
607                         op_cycles += op->counters[group_index][counter_index];
608                 }
609
610                 for (j = 0; j < num_shader_stages; j++) {
611                         double active_cycles, stall_cycles, stage_cycles;
612
613                         /* Active cycles */
614                         group_index = info->stages[j].active_group_index;
615                         counter_index = info->stages[j].active_counter_index;
616                         active_cycles = op->counters[group_index][counter_index];
617
618                         /* Stall cycles */
619                         group_index = info->stages[j].stall_group_index;
620                         counter_index = info->stages[j].stall_counter_index;
621                         stall_cycles = op->counters[group_index][counter_index];
622
623                         stage_cycles = active_cycles + stall_cycles;
624
625                         per_stage = &sorted[i * num_shader_stages + j];
626                         per_stage->metrics = op;
627
628                         if (op_cycles) {
629                                 per_stage->stage = &info->stages[j];
630                                 per_stage->time_ns = op->time_ns * (stage_cycles / op_cycles);
631                         } else {
632                                 /* If we don't have any per-stage cycle counts
633                                  * for this operation, then use the first
634                                  * stage as a placeholder for all the time,
635                                  * but NULL-ify the stage info so that the
636                                  * report doesn't lie about this time being
637                                  * from any particular stage. */
638                                 per_stage->stage = NULL;
639                                 if (j == 0) {
640                                         per_stage->time_ns = op->time_ns;
641                                 } else {
642                                         per_stage->time_ns = 0.0;
643                                 }
644                         }
645
646                         if (stage_cycles) {
647                                 per_stage->active = active_cycles / stage_cycles;
648                         } else {
649                                 per_stage->active = 0.0;
650                         }
651                 }
652         }
653
654         qsort_r (sorted, num_sorted, sizeof (*sorted),
655                  time_compare, metrics->op_metrics);
656
657         for (i = 0; i < num_sorted; i++)
658                 print_per_stage_metrics (metrics, &sorted[i], total_time);
659
660         free (sorted);
661 }
662
663 void
664 metrics_collect_available (metrics_t *metrics)
665 {
666         /* Consume all timer queries that are ready. */
667         timer_query_t *timer = metrics->timer_head;
668
669         while (timer) {
670                 GLuint available, elapsed;
671
672                 glGetQueryObjectuiv (timer->id,
673                                      GL_QUERY_RESULT_AVAILABLE, &available);
674                 if (! available)
675                         break;
676
677                 glGetQueryObjectuiv (timer->id,
678                                      GL_QUERY_RESULT, &elapsed);
679
680                 accumulate_program_time (metrics, timer->op, elapsed);
681
682                 metrics->timer_head = timer->next;
683                 if (metrics->timer_head == NULL)
684                         metrics->timer_tail = NULL;
685
686                 glDeleteQueries (1, &timer->id);
687
688                 free (timer);
689                 timer = metrics->timer_head;
690         }
691
692         /* And similarly for all performance monitors that are ready. */
693         monitor_t *monitor = metrics->monitor_head;
694
695         while (monitor) {
696                 GLuint available, result_size, *result;
697                 GLint bytes_written;
698
699                 glGetPerfMonitorCounterDataAMD (monitor->id,
700                                                 GL_PERFMON_RESULT_AVAILABLE_AMD,
701                                                 sizeof (available), &available,
702                                                 NULL);
703                 if (! available)
704                         break;
705
706                 glGetPerfMonitorCounterDataAMD (monitor->id,
707                                                 GL_PERFMON_RESULT_SIZE_AMD,
708                                                 sizeof (result_size),
709                                                 &result_size, NULL);
710
711                 result = xmalloc (result_size);
712
713                 glGetPerfMonitorCounterDataAMD (monitor->id,
714                                                 GL_PERFMON_RESULT_AMD,
715                                                 result_size, result,
716                                                 &bytes_written);
717
718                 accumulate_program_metrics (metrics, monitor->op, result, result_size);
719
720                 free (result);
721
722                 metrics->monitor_head = monitor->next;
723                 if (metrics->monitor_head == NULL)
724                         metrics->monitor_tail = NULL;
725
726                 glDeletePerfMonitorsAMD (1, &monitor->id);
727
728                 free (monitor);
729
730                 metrics->monitors_in_flight--;
731
732                 monitor = metrics->monitor_head;
733         }
734 }
735
736
737 void
738 metrics_end_frame (metrics_t *metrics)
739 {
740         static int initialized = 0;
741         static struct timeval tv_start, tv_now;
742
743         if (! initialized) {
744                 gettimeofday (&tv_start, NULL);
745                 if (getenv ("FIPS_VERBOSE"))
746                         verbose = 1;
747                 initialized = 1;
748         }
749
750         frames++;
751
752         metrics_collect_available (metrics);
753
754         if (frames % 15 == 0) {
755                 double fps;
756
757                 gettimeofday (&tv_now, NULL);
758
759                 fps = (double) frames / (tv_now.tv_sec - tv_start.tv_sec +
760                                          (tv_now.tv_usec - tv_start.tv_usec) / 1.0e6);
761
762                 printf("FPS: %.3f\n", fps);
763
764                 print_program_metrics (metrics);
765         }
766 }