]> git.cworth.org Git - fips/blob - metrics.c
Use consistent argument order for subtract functions and macros.
[fips] / metrics.c
1 /* Copyright © 2013, Intel Corporation
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  */
21
22 #define _GNU_SOURCE
23
24 #include <inttypes.h>
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <assert.h>
29 #include <time.h>
30 #include <sys/time.h>
31 #include <string.h>
32 #include <errno.h>
33
34 #include "fips-dispatch-gl.h"
35
36 #include "metrics.h"
37 #include "context.h"
38 #include "metrics-info.h"
39 #include "xmalloc.h"
40
41 int frames;
42 int verbose;
43
44 #define MAX_MONITORS_IN_FLIGHT 1000
45
46 /* Timer query */
47 typedef struct timer_query
48 {
49         unsigned id;
50
51         metrics_op_t op;
52         struct timer_query *next;
53 } timer_query_t;
54
55 /* Performance-monitor query */
56 typedef struct monitor
57 {
58         unsigned id;
59
60         metrics_op_t op;
61         struct monitor *next;
62 } monitor_t;
63
64 typedef struct op_metrics
65 {
66         /* This happens to also be the index into the
67          * metrics->op_metrics array currently
68          */
69         metrics_op_t op;
70         double time_ns;
71
72         double **counters;
73 } op_metrics_t;
74
75 struct metrics
76 {
77         /* Description of all available peformance counters, counter
78          * groups, their names and IDs, etc. */
79         metrics_info_t *info;
80
81         /* The current operation being measured. */
82         metrics_op_t op;
83
84         /* GL_TIME_ELAPSED query for which glEndQuery has not yet
85          * been called. */
86         unsigned timer_begun_id;
87
88         /* GL_TIME_ELAPSED queries for which glEndQuery has been
89          * called, (but results have not yet been queried). */
90         timer_query_t *timer_head;
91         timer_query_t *timer_tail;
92
93         /* Performance monitor for which glEndPerfMonitorAMD has not
94          * yet been called. */
95         unsigned monitor_begun_id;
96
97         /* Performance monitors for which glEndPerfMonitorAMD has
98          * been called, (but results have not yet been queried). */
99         monitor_t *monitor_head;
100         monitor_t *monitor_tail;
101
102         int monitors_in_flight;
103
104         unsigned num_op_metrics;
105         op_metrics_t *op_metrics;
106
107         /* Per-frame time and latency measurement. */
108         GLint64 swap_begin_timestamp;
109         unsigned swap_end_timestamp_id;
110         GLint64 previous_swap_end_timestamp;
111
112         struct timespec previous_cpu_time_ts;
113         struct timespec cpu_time_ts;
114 };
115
116 /* Return (a - b) while accounting for the fact that between a and b,
117  * the counter may have wrapped around (with a maximum number of bits
118  * in the counter of GL_QUERY_COUNTER_BITS). */
119 static uint64_t
120 subtract_timestamp (uint64_t a, uint64_t b)
121 {
122         GLint timestamp_bits;
123
124         glGetQueryiv (GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &timestamp_bits);
125
126         /* Note: "<<" isn't defined for the number of bits in the
127          * type, and the compiler will punish you.
128          */
129         if (timestamp_bits == 64)
130                 return a - b;
131
132         if (b > a)
133                 return (1ull << timestamp_bits) + a - b;
134         else
135                 return a - b;
136 }
137
138 metrics_t *
139 metrics_create (metrics_info_t *info)
140 {
141         metrics_t *metrics;
142
143         metrics = xmalloc (sizeof (metrics_t));
144
145         metrics->info = info;
146
147         metrics->op = 0;
148
149         metrics->timer_begun_id = 0;
150
151         metrics->timer_head = NULL;
152         metrics->timer_tail = NULL;
153
154         metrics->monitor_begun_id = 0;
155
156         metrics->monitor_head = NULL;
157         metrics->monitor_tail = NULL;
158
159         metrics->monitors_in_flight = 0;
160
161         metrics->num_op_metrics = 0;
162         metrics->op_metrics = NULL;
163
164         glGenQueries (1, &metrics->swap_end_timestamp_id);
165
166         /* Get the first frame timestamp started immediately. */
167         glQueryCounter (metrics->swap_end_timestamp_id, GL_TIMESTAMP);
168
169         /* As well as the initial CPU time. */
170         clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &metrics->cpu_time_ts);
171
172         return metrics;
173 }
174
175 void
176 metrics_fini (metrics_t *metrics)
177 {
178         timer_query_t *timer, *timer_next;
179         monitor_t *monitor, *monitor_next;
180
181         /* Discard and cleanup any outstanding queries. */
182         if (metrics->timer_begun_id) {
183                 glEndQuery (GL_TIME_ELAPSED);
184                 glDeleteQueries (1, &metrics->timer_begun_id);
185                 metrics->timer_begun_id = 0;
186         }
187
188         for (timer = metrics->timer_head;
189              timer;
190              timer = timer_next)
191         {
192                 glDeleteQueries (1, &timer->id);
193                 timer_next = timer->next;
194                 free (timer);
195         }
196         metrics->timer_head = NULL;
197         metrics->timer_tail = NULL;
198
199         if (metrics->info->have_perfmon) {
200
201                 if (metrics->monitor_begun_id) {
202                         glEndPerfMonitorAMD (metrics->monitor_begun_id);
203                         glDeletePerfMonitorsAMD (1, &metrics->monitor_begun_id);
204                         metrics->monitor_begun_id = 0;
205                 }
206
207                 for (monitor = metrics->monitor_head;
208                      monitor;
209                      monitor = monitor_next)
210                 {
211                         glDeletePerfMonitorsAMD (1, &monitor->id);
212                         monitor_next = monitor->next;
213                         free (monitor);
214                 }
215                 metrics->monitor_head = NULL;
216                 metrics->monitor_tail = NULL;
217
218         }
219
220         metrics->monitors_in_flight = 0;
221 }
222
223 void
224 metrics_destroy (metrics_t *metrics)
225 {
226         metrics_fini (metrics);
227
228         free (metrics);
229 }
230
231 static const char *
232 metrics_op_string (metrics_op_t op)
233 {
234         if (op >= METRICS_OP_SHADER)
235                 return "Shader program";
236
237         switch (op)
238         {
239         case METRICS_OP_ACCUM:
240                 return "glAccum*(+)";
241         case METRICS_OP_BUFFER_DATA:
242                 return "glBufferData(+)";
243         case METRICS_OP_BUFFER_SUB_DATA:
244                 return "glCopyBufferSubData*";
245         case METRICS_OP_BITMAP:
246                 return "glBitmap*";
247         case METRICS_OP_BLIT_FRAMEBUFFER:
248                 return "glBlitFramebuffer*";
249         case METRICS_OP_CLEAR:
250                 return "glClear(+)";
251         case METRICS_OP_CLEAR_BUFFER_DATA:
252                 return "glCearBufferData(+)";
253         case METRICS_OP_CLEAR_TEX_IMAGE:
254                 return "glClearTexImage(+)";
255         case METRICS_OP_COPY_PIXELS:
256                 return "glCopyPixels";
257         case METRICS_OP_COPY_TEX_IMAGE:
258                 return "glCopyTexImage(+)";
259         case METRICS_OP_DRAW_PIXELS:
260                 return "glDrawPixels";
261         case METRICS_OP_GET_TEX_IMAGE:
262                 return "glGetTexImage(+)";
263         case METRICS_OP_READ_PIXELS:
264                 return "glReadPixels*";
265         case METRICS_OP_TEX_IMAGE:
266                 return "glTexImage*(+)";
267         default:
268                 fprintf (stderr, "fips: Internal error: "
269                          "Unknown metrics op value: %d\n", op);
270                 exit (1);
271         }
272
273         return "";
274 }
275
276 void
277 metrics_counter_start (metrics_t *metrics)
278 {
279         unsigned i;
280
281         /* Initialize the timer_query object. */
282         glGenQueries (1, &metrics->timer_begun_id);
283
284         /* Most everything else in this function is
285          * performance-monitor related. If we don't have that
286          * extension, just start the timer query and be done. */
287         if (! metrics->info->have_perfmon) {
288                 glBeginQuery (GL_TIME_ELAPSED, metrics->timer_begun_id);
289                 return;
290         }
291
292         /* Initialize the performance-monitor object */
293         glGenPerfMonitorsAMD (1, &metrics->monitor_begun_id);
294
295         for (i = 0; i < metrics->info->num_groups; i++)
296         {
297                 metrics_group_info_t *group;
298                 int num_counters;
299
300                 group = &metrics->info->groups[i];
301
302                 num_counters = group->num_counters;
303                 if (group->max_active_counters < group->num_counters)
304                 {
305                         fprintf (stderr, "Warning: Only monitoring %d/%d counters from group %d\n",
306                                  group->max_active_counters,
307                                  group->num_counters, i);
308                         num_counters = group->max_active_counters;
309
310                 }
311
312                 glSelectPerfMonitorCountersAMD(metrics->monitor_begun_id,
313                                                GL_TRUE, group->id,
314                                                num_counters,
315                                                group->counter_ids);
316         }
317
318         /* Start the queries */
319         glBeginQuery (GL_TIME_ELAPSED, metrics->timer_begun_id);
320
321         glBeginPerfMonitorAMD (metrics->monitor_begun_id);
322 }
323
324 void
325 metrics_counter_stop (metrics_t *metrics)
326 {
327         timer_query_t *timer;
328         monitor_t *monitor;
329
330         /* Stop the current timer and monitor. */
331         glEndQuery (GL_TIME_ELAPSED);
332
333         if (metrics->info->have_perfmon)
334                 glEndPerfMonitorAMD (metrics->monitor_begun_id);
335
336         /* Add these IDs to our lists of outstanding queries and
337          * monitors so the results can be collected later. */
338         timer = xmalloc (sizeof (timer_query_t));
339
340         timer->op = metrics->op;
341         timer->id = metrics->timer_begun_id;
342         timer->next = NULL;
343
344         if (metrics->timer_tail) {
345                 metrics->timer_tail->next = timer;
346                 metrics->timer_tail = timer;
347         } else {
348                 metrics->timer_tail = timer;
349                 metrics->timer_head = timer;
350         }
351
352         if (metrics->info->have_perfmon) {
353                 /* Create a new performance-monitor query */
354                 monitor = xmalloc (sizeof (monitor_t));
355
356                 monitor->op = metrics->op;
357                 monitor->id = metrics->monitor_begun_id;
358                 monitor->next = NULL;
359
360                 if (metrics->monitor_tail) {
361                         metrics->monitor_tail->next = monitor;
362                         metrics->monitor_tail = monitor;
363                 } else {
364                         metrics->monitor_tail = monitor;
365                         metrics->monitor_head = monitor;
366                 }
367         }
368
369         metrics->monitors_in_flight++;
370
371         /* Avoid being a resource hog and collect outstanding results
372          * once we have sent off a large number of
373          * queries. (Presumably, many of the outstanding queries are
374          * available by now.)
375          */
376         if (metrics->monitors_in_flight > MAX_MONITORS_IN_FLIGHT)
377                 metrics_collect_available (metrics);
378 }
379
380 void
381 metrics_set_current_op (metrics_t *metrics, metrics_op_t op)
382 {
383         metrics->op = op;
384 }
385
386 metrics_op_t
387 metrics_get_current_op (metrics_t *metrics)
388 {
389         return metrics->op;
390 }
391
392 static void
393 op_metrics_init (metrics_info_t *info, op_metrics_t *metrics, metrics_op_t op)
394 {
395         unsigned i, j;
396
397         metrics->op = op;
398         metrics->time_ns = 0.0;
399
400         metrics->counters = xmalloc (sizeof(double *) * info->num_groups);
401
402         for (i = 0; i < info->num_groups; i++) {
403                 metrics->counters[i] = xmalloc (sizeof (double) *
404                                                 info->groups[i].num_counters);
405                 for (j = 0; j < info->groups[i].num_counters; j++)
406                         metrics->counters[i][j] = 0.0;
407         }
408 }
409
410 static op_metrics_t *
411 _get_op_metrics (metrics_t *metrics, metrics_op_t op)
412 {
413         unsigned i;
414
415         if (op >= metrics->num_op_metrics)
416         {
417                 metrics->op_metrics = realloc (metrics->op_metrics,
418                                                (op + 1) * sizeof (op_metrics_t));
419                 for (i = metrics->num_op_metrics; i < op + 1; i++)
420                         op_metrics_init (metrics->info, &metrics->op_metrics[i], i);
421
422                 metrics->num_op_metrics = op + 1;
423         }
424
425         return &metrics->op_metrics[op];
426 }
427
428 static void
429 accumulate_program_metrics (metrics_t *metrics, metrics_op_t op,
430                             GLuint *result, GLuint size)
431 {
432 #define CONSUME(var)                                                    \
433         if (p + sizeof(var) > ((unsigned char *) result) + size)        \
434         {                                                               \
435                 fprintf (stderr, "Unexpected end-of-buffer while "      \
436                          "parsing results\n");                          \
437                 value = 0.0;                                            \
438                 break;                                                  \
439         }                                                               \
440         (var) = *((typeof(var) *) p);                                   \
441         p += sizeof(var);
442
443         metrics_info_t *info = metrics->info;
444         op_metrics_t *op_metrics = _get_op_metrics (metrics, op);
445         unsigned char *p = (unsigned char *) result;
446
447         while (p < ((unsigned char *) result) + size)
448         {
449                 GLuint group_id, group_index;
450                 GLuint counter_id, counter_index;
451                 metrics_group_info_t *group;
452                 double value;
453                 unsigned i;
454
455                 CONSUME (group_id);
456                 CONSUME (counter_id);
457
458                 for (i = 0; i < info->num_groups; i++) {
459                         if (info->groups[i].id == group_id)
460                                 break;
461                 }
462                 group_index = i;
463                 assert (group_index < info->num_groups);
464                 group = &info->groups[group_index];
465
466                 for (i = 0; i < group->num_counters; i++) {
467                         if (group->counter_ids[i] == counter_id)
468                                 break;
469                 }
470                 counter_index = i;
471                 assert (counter_index < group->num_counters);
472
473                 switch (group->counter_types[counter_index])
474                 {
475                         uint uint_value;
476                         uint64_t uint64_value;
477                         float float_value;
478                 case GL_UNSIGNED_INT:
479                         CONSUME (uint_value);
480                         value = uint_value;
481                         break;
482                 case GL_UNSIGNED_INT64_AMD:
483                         CONSUME (uint64_value);
484                         value = uint64_value;
485                         break;
486                 case GL_PERCENTAGE_AMD:
487                 case GL_FLOAT:
488                         CONSUME (float_value);
489                         value = float_value;
490                         break;
491                 default:
492                         fprintf (stderr, "fips: Warning: Unknown counter value type (%d)\n",
493                                  group->counter_types[counter_index]);
494                         value = 0.0;
495                         break;
496                 }
497
498                 op_metrics->counters[group_index][counter_index] += value;
499         }
500 }
501
502 static void
503 accumulate_program_time (metrics_t *metrics, metrics_op_t op, unsigned time_ns)
504 {
505         op_metrics_t *op_metrics;
506
507         op_metrics = _get_op_metrics (metrics, op);
508
509         op_metrics->time_ns += time_ns;
510 }
511
512 typedef struct per_stage_metrics
513 {
514         op_metrics_t *metrics;
515         shader_stage_info_t *stage;
516         double time_ns;
517         double active;
518 } per_stage_metrics_t;
519
520 static int
521 _is_shader_stage_counter (metrics_info_t *info,
522                           unsigned group_index,
523                           unsigned counter_index)
524 {
525         shader_stage_info_t *stage;
526         unsigned i;
527
528         for (i = 0; i < info->num_shader_stages; i++) {
529                 stage = &info->stages[i];
530
531                 if (stage->active_group_index == group_index &&
532                     stage->active_counter_index == counter_index)
533                 {
534                         return 1;
535                 }
536
537                 if (stage->stall_group_index == group_index &&
538                     stage->stall_counter_index == counter_index)
539                 {
540                         return 1;
541                 }
542         }
543
544         return 0;
545 }
546
547 static void
548 print_per_stage_metrics (metrics_t *metrics,
549                          per_stage_metrics_t *per_stage,
550                          double total)
551 {
552         metrics_info_t *info = metrics->info;
553         op_metrics_t *op_metrics = per_stage->metrics;
554         metrics_group_info_t *group;
555         const char *op_string;
556         unsigned group_index, counter;
557         double value;
558
559         /* Don't print anything for stages with no alloted time. */
560         if (per_stage->time_ns == 0.0)
561                 return;
562
563         op_string = metrics_op_string (op_metrics->op);
564
565         printf ("%21s", op_string);
566
567         if (op_metrics->op >= METRICS_OP_SHADER) {
568                 printf (" %3d", op_metrics->op - METRICS_OP_SHADER);
569         } else {
570                 printf ("    ");
571
572         }
573
574         if (per_stage->stage)
575                 printf (" %cS:", per_stage->stage->name[0]);
576         else
577                 printf ("   :");
578
579         printf ("\t%7.2f ms (%4.1f%%)",
580                 per_stage->time_ns / 1e6,
581                 per_stage->time_ns / total * 100);
582
583         if (per_stage->active)
584                 printf (", %4.1f%% active", per_stage->active * 100);
585
586         printf ("\n");
587
588         /* I'm not seeing a lot of value printing the rest of these
589          * performance counters by default yet. Use --verbose to get
590          * them for now. */
591         if (! verbose)
592                 return;
593
594         printf ("[");
595         for (group_index = 0; group_index < info->num_groups; group_index++) {
596                 group = &info->groups[group_index];
597                 for (counter = 0; counter < group->num_counters; counter++) {
598
599                         /* Don't print this counter value if it's a
600                          * per-stage cycle counter, (which we have
601                          * already accounted for). */
602                         if (_is_shader_stage_counter (info, group_index, counter))
603                                 continue;
604
605                         value = op_metrics->counters[group_index][counter];
606                         if (value == 0.0)
607                                 continue;
608                         printf ("%s: %.2f ", group->counter_names[counter],
609                                 value / 1e6);
610                 }
611         }
612         printf ("]\n");
613 }
614
615 static int
616 time_compare(const void *in_a, const void *in_b, void *arg unused)
617 {
618         const per_stage_metrics_t *a = in_a;
619         const per_stage_metrics_t *b = in_b;
620
621
622         if (a->time_ns < b->time_ns)
623                 return -1;
624         if (a->time_ns > b->time_ns)
625                 return 1;
626         return 0;
627 }
628
629 static void
630 print_program_metrics (metrics_t *metrics)
631 {
632         metrics_info_t *info = metrics->info;
633         unsigned num_shader_stages = info->num_shader_stages;
634         per_stage_metrics_t *sorted, *per_stage;
635         double total_time, op_cycles;
636         op_metrics_t *op;
637         unsigned group_index, counter_index;
638         unsigned i, j, num_sorted;
639
640         if (! info->have_perfmon &&
641             ! info->printed_missing_perfmon_warning)
642         {
643                 fprintf (stderr,
644                          "Warning: The GL_AMD_performance_monitor extensions is not available.\n"
645                          "         Some fips features, (such as per-shader-stage timings),\n"
646                          "         will not be available.\n");
647
648                 info->printed_missing_perfmon_warning = true;
649         }
650
651         /* Make a sorted list of the per-stage operations by time
652          * used, and figure out the total so we can print percentages.
653          */
654         if (num_shader_stages)
655                 num_sorted = metrics->num_op_metrics * num_shader_stages;
656         else
657                 num_sorted = metrics->num_op_metrics;
658
659         sorted = xmalloc (sizeof (*sorted) * num_sorted);
660
661         total_time = 0.0;
662
663         for (i = 0; i < metrics->num_op_metrics; i++) {
664
665                 op = &metrics->op_metrics[i];
666
667                 /* Accumulate total time across all ops. */
668                 total_time += op->time_ns;
669
670                 /* Also, find total cycles in all stages of this op. */
671                 op_cycles = 0.0;
672
673                 if (num_shader_stages == 0) {
674                         per_stage = &sorted[i];
675                         per_stage->metrics = op;
676                         per_stage->stage = NULL;
677                         per_stage->time_ns = op->time_ns;
678                         per_stage->active = 0.0;
679                 }
680
681                 for (j = 0; j < num_shader_stages; j++) {
682                         /* Active cycles */
683                         group_index = info->stages[j].active_group_index;
684                         counter_index = info->stages[j].active_counter_index;
685                         op_cycles += op->counters[group_index][counter_index];
686
687                         /* Stall cycles */
688                         group_index = info->stages[j].stall_group_index;
689                         counter_index = info->stages[j].stall_counter_index;
690                         op_cycles += op->counters[group_index][counter_index];
691                 }
692
693                 for (j = 0; j < num_shader_stages; j++) {
694                         double active_cycles, stall_cycles, stage_cycles;
695
696                         /* Active cycles */
697                         group_index = info->stages[j].active_group_index;
698                         counter_index = info->stages[j].active_counter_index;
699                         active_cycles = op->counters[group_index][counter_index];
700
701                         /* Stall cycles */
702                         group_index = info->stages[j].stall_group_index;
703                         counter_index = info->stages[j].stall_counter_index;
704                         stall_cycles = op->counters[group_index][counter_index];
705
706                         stage_cycles = active_cycles + stall_cycles;
707
708                         per_stage = &sorted[i * num_shader_stages + j];
709                         per_stage->metrics = op;
710
711                         if (op_cycles) {
712                                 per_stage->stage = &info->stages[j];
713                                 per_stage->time_ns = op->time_ns * (stage_cycles / op_cycles);
714                         } else {
715                                 /* If we don't have any per-stage cycle counts
716                                  * for this operation, then use the first
717                                  * stage as a placeholder for all the time,
718                                  * but NULL-ify the stage info so that the
719                                  * report doesn't lie about this time being
720                                  * from any particular stage. */
721                                 per_stage->stage = NULL;
722                                 if (j == 0) {
723                                         per_stage->time_ns = op->time_ns;
724                                 } else {
725                                         per_stage->time_ns = 0.0;
726                                 }
727                         }
728
729                         if (stage_cycles) {
730                                 per_stage->active = active_cycles / stage_cycles;
731                         } else {
732                                 per_stage->active = 0.0;
733                         }
734                 }
735         }
736
737         qsort_r (sorted, num_sorted, sizeof (*sorted),
738                  time_compare, metrics->op_metrics);
739
740         for (i = 0; i < num_sorted; i++)
741                 print_per_stage_metrics (metrics, &sorted[i], total_time);
742
743         free (sorted);
744 }
745
746 void
747 metrics_collect_available (metrics_t *metrics)
748 {
749         /* Consume all timer queries that are ready. */
750         timer_query_t *timer = metrics->timer_head;
751
752         while (timer) {
753                 GLuint available, elapsed;
754
755                 glGetQueryObjectuiv (timer->id,
756                                      GL_QUERY_RESULT_AVAILABLE, &available);
757                 if (! available)
758                         break;
759
760                 glGetQueryObjectuiv (timer->id,
761                                      GL_QUERY_RESULT, &elapsed);
762
763                 accumulate_program_time (metrics, timer->op, elapsed);
764
765                 metrics->timer_head = timer->next;
766                 if (metrics->timer_head == NULL)
767                         metrics->timer_tail = NULL;
768
769                 glDeleteQueries (1, &timer->id);
770
771                 free (timer);
772                 timer = metrics->timer_head;
773         }
774
775         if (! metrics->info->have_perfmon)
776                 return;
777
778         /* And similarly for all performance monitors that are ready. */
779         monitor_t *monitor = metrics->monitor_head;
780
781         while (monitor) {
782                 GLuint available, result_size, *result;
783                 GLint bytes_written;
784
785                 glGetPerfMonitorCounterDataAMD (monitor->id,
786                                                 GL_PERFMON_RESULT_AVAILABLE_AMD,
787                                                 sizeof (available), &available,
788                                                 NULL);
789                 if (! available)
790                         break;
791
792                 glGetPerfMonitorCounterDataAMD (monitor->id,
793                                                 GL_PERFMON_RESULT_SIZE_AMD,
794                                                 sizeof (result_size),
795                                                 &result_size, NULL);
796
797                 result = xmalloc (result_size);
798
799                 glGetPerfMonitorCounterDataAMD (monitor->id,
800                                                 GL_PERFMON_RESULT_AMD,
801                                                 result_size, result,
802                                                 &bytes_written);
803
804                 accumulate_program_metrics (metrics, monitor->op, result, result_size);
805
806                 free (result);
807
808                 metrics->monitor_head = monitor->next;
809                 if (metrics->monitor_head == NULL)
810                         metrics->monitor_tail = NULL;
811
812                 glDeletePerfMonitorsAMD (1, &monitor->id);
813
814                 free (monitor);
815
816                 metrics->monitors_in_flight--;
817
818                 monitor = metrics->monitor_head;
819         }
820 }
821
822 /* Subtract timespec values: Return (a - b) in seconds as a double. */
823 #define SUBTRACT_TIMESPEC(a, b) (double) (a.tv_sec - b.tv_sec + \
824                                           (a.tv_nsec - b.tv_nsec) / 1e9)
825
826 void
827 metrics_end_frame_pre_swap (metrics_t *metrics)
828 {
829         GLuint64 swap_end_timestamp, frame_time_ns, latency_ns;
830         double cpu_time;
831
832         /* Don't leave any counters running over the end_frame work we
833          * do here. The counters will be started again at the end of
834          * metrics_end_frame_post_swap.  */
835
836         metrics_counter_stop (metrics);
837
838         /* Now that an entire frame's worth of content has gone by, we
839          * can be sure that the timer quiery for the previous frame's
840          * swap is available.
841          *
842          * Note: The only case in which this query isn't immediately
843          * available would be if there was effectively nothing in this
844          * frame. In that case, triggering a block on this query
845          * result is not a concern (since an empty frame is not
846          * interesting, and all of our counters are stopped anyway).
847          *
848          * Accepting this one block is much better than adding a
849          * linked list for these results that would reaslisticly never
850          * have more than one entry anyway.
851          */
852
853         glGetQueryObjectui64v (metrics->swap_end_timestamp_id,
854                                GL_QUERY_RESULT, &swap_end_timestamp);
855
856         if (frames == 0) {
857                 /* Print header */
858                 printf ("# frame: Frame_Number Frame_Time_milliseconds Frame_latency_milliseconds CPU_load GPU_load\n");
859         } else {
860                 /* Subtract previous frame's times to get frame times. */
861                 frame_time_ns = subtract_timestamp (swap_end_timestamp,
862                                                     metrics->previous_swap_end_timestamp);
863
864                 latency_ns = subtract_timestamp (swap_end_timestamp,
865                                                  metrics->swap_begin_timestamp);
866
867                 cpu_time = SUBTRACT_TIMESPEC (metrics->cpu_time_ts,
868                                               metrics->previous_cpu_time_ts);
869
870                 /* We've waited one frame to ensure we have a timestamp
871                  * result. So the time we've actually measured here is
872                  * for the previous frame. */
873                 printf ("frame: %d %g %g %g %g\n",
874                         frames - 1,
875                         (double) frame_time_ns / 1e6,
876                         (double) latency_ns / 1e6,
877                         cpu_time / (frame_time_ns / 1e9),
878                         0.0);
879         }
880
881         metrics->previous_swap_end_timestamp = swap_end_timestamp;
882         metrics->previous_cpu_time_ts = metrics->cpu_time_ts;
883
884         /* Before the actual swap call, we get the current timestamp
885          * value. This is a synchronous get so we can use this as the
886          * baseline for a frame latency measurment. */
887
888         glGetInteger64v (GL_TIMESTAMP, &metrics->swap_begin_timestamp);
889 }
890
891 void
892 metrics_end_frame_post_swap (metrics_t *metrics)
893 {
894         static int initialized = 0;
895         static struct timeval tv_start, tv_now;
896         int err;
897
898         /* Now that the swap command has been queued, we issue an
899          * asynchronous query of the timestamp value. Comparing this
900          * to the synchronous get we just sent in
901          * metrics_end_frame_pre_swap allows us to measure the
902          * per-frame swap latency. */
903
904         glQueryCounter (metrics->swap_end_timestamp_id, GL_TIMESTAMP);
905
906         /* Query the total amount of CPU time spent by this process. */
907         err = clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &metrics->cpu_time_ts);
908         if (err == -1 ) {
909                 fprintf (stderr, "Failed to query CPU time with clock_gettime: %s\n",
910                          strerror (errno));
911                 exit (1);
912         }
913
914         if (! initialized) {
915                 gettimeofday (&tv_start, NULL);
916                 if (getenv ("FIPS_VERBOSE"))
917                         verbose = 1;
918                 initialized = 1;
919         }
920
921         metrics_collect_available (metrics);
922
923         frames++;
924
925         if (frames % 15 == 0) {
926                 double fps;
927
928                 gettimeofday (&tv_now, NULL);
929
930                 fps = (double) frames / (tv_now.tv_sec - tv_start.tv_sec +
931                                          (tv_now.tv_usec - tv_start.tv_usec) / 1.0e6);
932
933                 printf("FPS: %.3f\n", fps);
934
935                 print_program_metrics (metrics);
936         }
937
938         /* Start the counter up again now that we're done. */
939         metrics_counter_start (metrics);
940 }