]> git.cworth.org Git - fips/blob - metrics.c
stash
[fips] / metrics.c
1 /* Copyright © 2013, Intel Corporation
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  */
21
22 #define _GNU_SOURCE
23
24 #include <inttypes.h>
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <assert.h>
29 #include <time.h>
30 #include <sys/time.h>
31 #include <string.h>
32 #include <errno.h>
33
34 #include "fips-dispatch-gl.h"
35
36 #include "metrics.h"
37 #include "context.h"
38 #include "metrics-info.h"
39 #include "xmalloc.h"
40
41 int frames;
42 int verbose;
43
44 #define MAX_MONITORS_IN_FLIGHT 1000
45
46 /* Timer query */
47 typedef struct timer_query
48 {
49         unsigned id;
50
51         metrics_op_t op;
52         struct timer_query *next;
53 } timer_query_t;
54
55 /* Performance-monitor query */
56 typedef struct monitor
57 {
58         unsigned id;
59
60         metrics_op_t op;
61         struct monitor *next;
62 } monitor_t;
63
64 typedef struct op_metrics
65 {
66         /* This happens to also be the index into the
67          * metrics->op_metrics array currently
68          */
69         metrics_op_t op;
70         double time_ns;
71
72         double **counters;
73 } op_metrics_t;
74
75 struct metrics
76 {
77         /* Flag to indicate first frame is being processed. */
78         bool first_frame;
79
80         /* Description of all available peformance counters, counter
81          * groups, their names and IDs, etc. */
82         metrics_info_t *info;
83
84         /* The current operation being measured. */
85         metrics_op_t op;
86
87         /* GL_TIME_ELAPSED query for which glEndQuery has not yet
88          * been called. */
89         unsigned timer_begun_id;
90
91         /* GL_TIME_ELAPSED queries for which glEndQuery has been
92          * called, (but results have not yet been queried). */
93         timer_query_t *timer_head;
94         timer_query_t *timer_tail;
95
96         /* Performance monitor for which glEndPerfMonitorAMD has not
97          * yet been called. */
98         unsigned monitor_begun_id;
99
100         /* Performance monitors for which glEndPerfMonitorAMD has
101          * been called, (but results have not yet been queried). */
102         monitor_t *monitor_head;
103         monitor_t *monitor_tail;
104
105         int monitors_in_flight;
106
107         unsigned num_op_metrics;
108         op_metrics_t *op_metrics;
109
110         /* Per-frame time and latency measurement. */
111         GLint64 swap_begin_timestamp;
112         unsigned swap_end_timestamp_id;
113         GLint64 previous_swap_end_timestamp;
114
115         struct timespec previous_cpu_time_ts;
116         struct timespec cpu_time_ts;
117 };
118
119 /* Return (a - b) while accounting for the fact that between a and b,
120  * the counter may have wrapped around (with a maximum number of bits
121  * in the counter of GL_QUERY_COUNTER_BITS). */
122 static uint64_t
123 subtract_timestamp (uint64_t a, uint64_t b)
124 {
125         GLint timestamp_bits;
126
127         glGetQueryiv (GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &timestamp_bits);
128
129         /* Note: "<<" isn't defined for the number of bits in the
130          * type, and the compiler will punish you.
131          */
132         if (timestamp_bits == 64)
133                 return a - b;
134
135         if (b > a)
136                 return (1ull << timestamp_bits) + a - b;
137         else
138                 return a - b;
139 }
140
141 metrics_t *
142 metrics_create (metrics_info_t *info)
143 {
144         metrics_t *metrics;
145
146         metrics = xmalloc (sizeof (metrics_t));
147
148         metrics->info = info;
149
150         metrics->first_frame = true;
151
152         metrics->op = 0;
153
154         metrics->timer_begun_id = 0;
155
156         metrics->timer_head = NULL;
157         metrics->timer_tail = NULL;
158
159         metrics->monitor_begun_id = 0;
160
161         metrics->monitor_head = NULL;
162         metrics->monitor_tail = NULL;
163
164         metrics->monitors_in_flight = 0;
165
166         metrics->num_op_metrics = 0;
167         metrics->op_metrics = NULL;
168
169         glGenQueries (1, &metrics->swap_end_timestamp_id);
170
171         /* Get the first frame timestamp started immediately. */
172         glQueryCounter (metrics->swap_end_timestamp_id, GL_TIMESTAMP);
173
174         /* As well as the initial CPU time. */
175         clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &metrics->cpu_time_ts);
176
177         return metrics;
178 }
179
180 void
181 metrics_fini (metrics_t *metrics)
182 {
183         timer_query_t *timer, *timer_next;
184         monitor_t *monitor, *monitor_next;
185
186         /* Discard and cleanup any outstanding queries. */
187         if (metrics->timer_begun_id) {
188                 glDeleteQueries (1, &metrics->timer_begun_id);
189                 metrics->timer_begun_id = 0;
190         }
191
192         for (timer = metrics->timer_head;
193              timer;
194              timer = timer_next)
195         {
196                 glDeleteQueries (1, &timer->id);
197                 timer_next = timer->next;
198                 free (timer);
199         }
200         metrics->timer_head = NULL;
201         metrics->timer_tail = NULL;
202
203         if (metrics->info->have_perfmon) {
204
205                 if (metrics->monitor_begun_id) {
206                         glDeletePerfMonitorsAMD (1, &metrics->monitor_begun_id);
207                         metrics->monitor_begun_id = 0;
208                 }
209
210                 for (monitor = metrics->monitor_head;
211                      monitor;
212                      monitor = monitor_next)
213                 {
214                         glDeletePerfMonitorsAMD (1, &monitor->id);
215                         monitor_next = monitor->next;
216                         free (monitor);
217                 }
218                 metrics->monitor_head = NULL;
219                 metrics->monitor_tail = NULL;
220
221         }
222
223         metrics->monitors_in_flight = 0;
224 }
225
226 void
227 metrics_destroy (metrics_t *metrics)
228 {
229         metrics_fini (metrics);
230
231         free (metrics);
232 }
233
234 static const char *
235 metrics_op_string (metrics_op_t op)
236 {
237         if (op >= METRICS_OP_SHADER)
238                 return "Shader program";
239
240         switch (op)
241         {
242         case METRICS_OP_ACCUM:
243                 return "glAccum*(+)";
244         case METRICS_OP_BUFFER_DATA:
245                 return "glBufferData(+)";
246         case METRICS_OP_BUFFER_SUB_DATA:
247                 return "glCopyBufferSubData*";
248         case METRICS_OP_BITMAP:
249                 return "glBitmap*";
250         case METRICS_OP_BLIT_FRAMEBUFFER:
251                 return "glBlitFramebuffer*";
252         case METRICS_OP_CLEAR:
253                 return "glClear(+)";
254         case METRICS_OP_CLEAR_BUFFER_DATA:
255                 return "glCearBufferData(+)";
256         case METRICS_OP_CLEAR_TEX_IMAGE:
257                 return "glClearTexImage(+)";
258         case METRICS_OP_COPY_PIXELS:
259                 return "glCopyPixels";
260         case METRICS_OP_COPY_TEX_IMAGE:
261                 return "glCopyTexImage(+)";
262         case METRICS_OP_DRAW_PIXELS:
263                 return "glDrawPixels";
264         case METRICS_OP_GET_TEX_IMAGE:
265                 return "glGetTexImage(+)";
266         case METRICS_OP_READ_PIXELS:
267                 return "glReadPixels*";
268         case METRICS_OP_TEX_IMAGE:
269                 return "glTexImage*(+)";
270         default:
271                 fprintf (stderr, "fips: Internal error: "
272                          "Unknown metrics op value: %d\n", op);
273                 exit (1);
274         }
275
276         return "";
277 }
278
279 void
280 metrics_counter_start (metrics_t *metrics)
281 {
282         unsigned i;
283
284         /* Initialize the timer_query object. */
285         glGenQueries (1, &metrics->timer_begun_id);
286
287         /* Most everything else in this function is
288          * performance-monitor related. If we don't have that
289          * extension, just start the timer query and be done. */
290         if (! metrics->info->have_perfmon) {
291                 glBeginQuery (GL_TIME_ELAPSED, metrics->timer_begun_id);
292                 return;
293         }
294
295         /* Initialize the performance-monitor object */
296         glGenPerfMonitorsAMD (1, &metrics->monitor_begun_id);
297
298         for (i = 0; i < metrics->info->num_groups; i++)
299         {
300                 metrics_group_info_t *group;
301                 int num_counters;
302
303                 group = &metrics->info->groups[i];
304
305                 num_counters = group->num_counters;
306                 if (group->max_active_counters < group->num_counters)
307                 {
308                         fprintf (stderr, "Warning: Only monitoring %d/%d counters from group %d\n",
309                                  group->max_active_counters,
310                                  group->num_counters, i);
311                         num_counters = group->max_active_counters;
312
313                 }
314
315                 glSelectPerfMonitorCountersAMD(metrics->monitor_begun_id,
316                                                GL_TRUE, group->id,
317                                                num_counters,
318                                                group->counter_ids);
319         }
320
321         /* Start the queries */
322         glBeginQuery (GL_TIME_ELAPSED, metrics->timer_begun_id);
323
324         glBeginPerfMonitorAMD (metrics->monitor_begun_id);
325 }
326
327 void
328 metrics_counter_stop (metrics_t *metrics)
329 {
330         timer_query_t *timer;
331         monitor_t *monitor;
332
333         /* Stop the current timer and monitor. */
334         glEndQuery (GL_TIME_ELAPSED);
335
336         if (metrics->info->have_perfmon)
337                 glEndPerfMonitorAMD (metrics->monitor_begun_id);
338
339         /* Add these IDs to our lists of outstanding queries and
340          * monitors so the results can be collected later. */
341         timer = xmalloc (sizeof (timer_query_t));
342
343         timer->op = metrics->op;
344         timer->id = metrics->timer_begun_id;
345         timer->next = NULL;
346
347         if (metrics->timer_tail) {
348                 metrics->timer_tail->next = timer;
349                 metrics->timer_tail = timer;
350         } else {
351                 metrics->timer_tail = timer;
352                 metrics->timer_head = timer;
353         }
354
355         if (metrics->info->have_perfmon) {
356                 /* Create a new performance-monitor query */
357                 monitor = xmalloc (sizeof (monitor_t));
358
359                 monitor->op = metrics->op;
360                 monitor->id = metrics->monitor_begun_id;
361                 monitor->next = NULL;
362
363                 if (metrics->monitor_tail) {
364                         metrics->monitor_tail->next = monitor;
365                         metrics->monitor_tail = monitor;
366                 } else {
367                         metrics->monitor_tail = monitor;
368                         metrics->monitor_head = monitor;
369                 }
370         }
371
372         metrics->monitors_in_flight++;
373
374         /* Avoid being a resource hog and collect outstanding results
375          * once we have sent off a large number of
376          * queries. (Presumably, many of the outstanding queries are
377          * available by now.)
378          */
379         if (metrics->monitors_in_flight > MAX_MONITORS_IN_FLIGHT)
380                 metrics_collect_available (metrics);
381 }
382
383 void
384 metrics_set_current_op (metrics_t *metrics, metrics_op_t op)
385 {
386         metrics->op = op;
387 }
388
389 metrics_op_t
390 metrics_get_current_op (metrics_t *metrics)
391 {
392         return metrics->op;
393 }
394
395 static void
396 op_metrics_init (metrics_info_t *info, op_metrics_t *metrics, metrics_op_t op)
397 {
398         unsigned i, j;
399
400         metrics->op = op;
401         metrics->time_ns = 0.0;
402
403         metrics->counters = xmalloc (sizeof(double *) * info->num_groups);
404
405         for (i = 0; i < info->num_groups; i++) {
406                 metrics->counters[i] = xmalloc (sizeof (double) *
407                                                 info->groups[i].num_counters);
408                 for (j = 0; j < info->groups[i].num_counters; j++)
409                         metrics->counters[i][j] = 0.0;
410         }
411 }
412
413 static op_metrics_t *
414 _get_op_metrics (metrics_t *metrics, metrics_op_t op)
415 {
416         unsigned i;
417
418         if (op >= metrics->num_op_metrics)
419         {
420                 metrics->op_metrics = realloc (metrics->op_metrics,
421                                                (op + 1) * sizeof (op_metrics_t));
422                 for (i = metrics->num_op_metrics; i < op + 1; i++)
423                         op_metrics_init (metrics->info, &metrics->op_metrics[i], i);
424
425                 metrics->num_op_metrics = op + 1;
426         }
427
428         return &metrics->op_metrics[op];
429 }
430
431 static void
432 accumulate_program_metrics (metrics_t *metrics, metrics_op_t op,
433                             GLuint *result, GLuint size)
434 {
435 #define CONSUME(var)                                                    \
436         if (p + sizeof(var) > ((unsigned char *) result) + size)        \
437         {                                                               \
438                 fprintf (stderr, "Unexpected end-of-buffer while "      \
439                          "parsing results\n");                          \
440                 value = 0.0;                                            \
441                 break;                                                  \
442         }                                                               \
443         (var) = *((typeof(var) *) p);                                   \
444         p += sizeof(var);
445
446         metrics_info_t *info = metrics->info;
447         op_metrics_t *op_metrics = _get_op_metrics (metrics, op);
448         unsigned char *p = (unsigned char *) result;
449
450         while (p < ((unsigned char *) result) + size)
451         {
452                 GLuint group_id, group_index;
453                 GLuint counter_id, counter_index;
454                 metrics_group_info_t *group;
455                 double value;
456                 unsigned i;
457
458                 CONSUME (group_id);
459                 CONSUME (counter_id);
460
461                 for (i = 0; i < info->num_groups; i++) {
462                         if (info->groups[i].id == group_id)
463                                 break;
464                 }
465                 group_index = i;
466                 assert (group_index < info->num_groups);
467                 group = &info->groups[group_index];
468
469                 for (i = 0; i < group->num_counters; i++) {
470                         if (group->counter_ids[i] == counter_id)
471                                 break;
472                 }
473                 counter_index = i;
474                 assert (counter_index < group->num_counters);
475
476                 switch (group->counter_types[counter_index])
477                 {
478                         uint uint_value;
479                         uint64_t uint64_value;
480                         float float_value;
481                 case GL_UNSIGNED_INT:
482                         CONSUME (uint_value);
483                         value = uint_value;
484                         break;
485                 case GL_UNSIGNED_INT64_AMD:
486                         CONSUME (uint64_value);
487                         value = uint64_value;
488                         break;
489                 case GL_PERCENTAGE_AMD:
490                 case GL_FLOAT:
491                         CONSUME (float_value);
492                         value = float_value;
493                         break;
494                 default:
495                         fprintf (stderr, "fips: Warning: Unknown counter value type (%d)\n",
496                                  group->counter_types[counter_index]);
497                         value = 0.0;
498                         break;
499                 }
500
501                 op_metrics->counters[group_index][counter_index] += value;
502         }
503 }
504
505 static void
506 accumulate_program_time (metrics_t *metrics, metrics_op_t op, unsigned time_ns)
507 {
508         op_metrics_t *op_metrics;
509
510         op_metrics = _get_op_metrics (metrics, op);
511
512         op_metrics->time_ns += time_ns;
513 }
514
515 typedef struct per_stage_metrics
516 {
517         op_metrics_t *metrics;
518         shader_stage_info_t *stage;
519         double time_ns;
520         double active;
521 } per_stage_metrics_t;
522
523 static int
524 _is_shader_stage_counter (metrics_info_t *info,
525                           unsigned group_index,
526                           unsigned counter_index)
527 {
528         shader_stage_info_t *stage;
529         unsigned i;
530
531         for (i = 0; i < info->num_shader_stages; i++) {
532                 stage = &info->stages[i];
533
534                 if (stage->active_group_index == group_index &&
535                     stage->active_counter_index == counter_index)
536                 {
537                         return 1;
538                 }
539
540                 if (stage->stall_group_index == group_index &&
541                     stage->stall_counter_index == counter_index)
542                 {
543                         return 1;
544                 }
545         }
546
547         return 0;
548 }
549
550 static void
551 print_per_stage_metrics (metrics_t *metrics,
552                          per_stage_metrics_t *per_stage,
553                          double total)
554 {
555         metrics_info_t *info = metrics->info;
556         op_metrics_t *op_metrics = per_stage->metrics;
557         metrics_group_info_t *group;
558         const char *op_string;
559         unsigned group_index, counter;
560         double value;
561         int program_name_length = 0;
562
563         /* Don't print anything for stages with no alloted time. */
564         if (per_stage->time_ns == 0.0)
565                 return;
566
567         op_string = metrics_op_string (op_metrics->op);
568
569         if (op_metrics->op >= METRICS_OP_SHADER) {
570                 int program = op_metrics->op - METRICS_OP_SHADER;
571                 glGetObjectLabel (GL_PROGRAM, program, 0,
572                                   &program_name_length, NULL);
573                 if (program_name_length) {
574                         char *program_name;
575
576                         program_name = malloc(program_name_length + 1);
577                         if (program_name == 0) {
578                                 fprintf (stderr, "Out of memory.\n");
579                                 exit (1);
580                         }
581                         glGetObjectLabel (GL_PROGRAM, program,
582                                           program_name_length + 1,
583                                           NULL, program_name);
584
585                         printf ("%21s    ", program_name);
586                 } else {
587                         printf ("%21s %3d", op_string, program);
588                 }
589         }
590         else {
591                 printf ("%21s    ", op_string);
592         }
593
594         if (per_stage->stage)
595                 printf (" %cS:", per_stage->stage->name[0]);
596         else
597                 printf ("   :");
598
599         printf ("\t%7.2f ms (%4.1f%%)",
600                 per_stage->time_ns / 1e6,
601                 per_stage->time_ns / total * 100);
602
603         if (per_stage->active)
604                 printf (", %4.1f%% active", per_stage->active * 100);
605
606         printf ("\n");
607
608         /* I'm not seeing a lot of value printing the rest of these
609          * performance counters by default yet. Use --verbose to get
610          * them for now. */
611         if (! verbose)
612                 return;
613
614         printf ("[");
615         for (group_index = 0; group_index < info->num_groups; group_index++) {
616                 group = &info->groups[group_index];
617                 for (counter = 0; counter < group->num_counters; counter++) {
618
619                         /* Don't print this counter value if it's a
620                          * per-stage cycle counter, (which we have
621                          * already accounted for). */
622                         if (_is_shader_stage_counter (info, group_index, counter))
623                                 continue;
624
625                         value = op_metrics->counters[group_index][counter];
626                         if (value == 0.0)
627                                 continue;
628                         printf ("%s: %.2f ", group->counter_names[counter],
629                                 value / 1e6);
630                 }
631         }
632         printf ("]\n");
633 }
634
635 static int
636 time_compare(const void *in_a, const void *in_b, void *arg unused)
637 {
638         const per_stage_metrics_t *a = in_a;
639         const per_stage_metrics_t *b = in_b;
640
641
642         if (a->time_ns < b->time_ns)
643                 return -1;
644         if (a->time_ns > b->time_ns)
645                 return 1;
646         return 0;
647 }
648
649 static void
650 print_program_metrics (metrics_t *metrics)
651 {
652         metrics_info_t *info = metrics->info;
653         unsigned num_shader_stages = info->num_shader_stages;
654         per_stage_metrics_t *sorted, *per_stage;
655         double total_time, op_cycles;
656         op_metrics_t *op;
657         unsigned group_index, counter_index;
658         unsigned i, j, num_sorted;
659
660         if (! info->have_perfmon &&
661             ! info->printed_missing_perfmon_warning)
662         {
663                 fprintf (stderr,
664                          "Warning: The GL_AMD_performance_monitor extensions is not available.\n"
665                          "         Some fips features, (such as per-shader-stage timings),\n"
666                          "         will not be available.\n");
667
668                 info->printed_missing_perfmon_warning = true;
669         }
670
671         /* Make a sorted list of the per-stage operations by time
672          * used, and figure out the total so we can print percentages.
673          */
674         if (num_shader_stages)
675                 num_sorted = metrics->num_op_metrics * num_shader_stages;
676         else
677                 num_sorted = metrics->num_op_metrics;
678
679         sorted = xmalloc (sizeof (*sorted) * num_sorted);
680
681         total_time = 0.0;
682
683         for (i = 0; i < metrics->num_op_metrics; i++) {
684
685                 op = &metrics->op_metrics[i];
686
687                 /* Accumulate total time across all ops. */
688                 total_time += op->time_ns;
689
690                 /* Also, find total cycles in all stages of this op. */
691                 op_cycles = 0.0;
692
693                 if (num_shader_stages == 0) {
694                         per_stage = &sorted[i];
695                         per_stage->metrics = op;
696                         per_stage->stage = NULL;
697                         per_stage->time_ns = op->time_ns;
698                         per_stage->active = 0.0;
699                 }
700
701                 for (j = 0; j < num_shader_stages; j++) {
702                         /* Active cycles */
703                         group_index = info->stages[j].active_group_index;
704                         counter_index = info->stages[j].active_counter_index;
705                         op_cycles += op->counters[group_index][counter_index];
706
707                         /* Stall cycles */
708                         group_index = info->stages[j].stall_group_index;
709                         counter_index = info->stages[j].stall_counter_index;
710                         op_cycles += op->counters[group_index][counter_index];
711                 }
712
713                 for (j = 0; j < num_shader_stages; j++) {
714                         double active_cycles, stall_cycles, stage_cycles;
715
716                         /* Active cycles */
717                         group_index = info->stages[j].active_group_index;
718                         counter_index = info->stages[j].active_counter_index;
719                         active_cycles = op->counters[group_index][counter_index];
720
721                         /* Stall cycles */
722                         group_index = info->stages[j].stall_group_index;
723                         counter_index = info->stages[j].stall_counter_index;
724                         stall_cycles = op->counters[group_index][counter_index];
725
726                         stage_cycles = active_cycles + stall_cycles;
727
728                         per_stage = &sorted[i * num_shader_stages + j];
729                         per_stage->metrics = op;
730
731                         if (op_cycles) {
732                                 per_stage->stage = &info->stages[j];
733                                 per_stage->time_ns = op->time_ns * (stage_cycles / op_cycles);
734                         } else {
735                                 /* If we don't have any per-stage cycle counts
736                                  * for this operation, then use the first
737                                  * stage as a placeholder for all the time,
738                                  * but NULL-ify the stage info so that the
739                                  * report doesn't lie about this time being
740                                  * from any particular stage. */
741                                 per_stage->stage = NULL;
742                                 if (j == 0) {
743                                         per_stage->time_ns = op->time_ns;
744                                 } else {
745                                         per_stage->time_ns = 0.0;
746                                 }
747                         }
748
749                         if (stage_cycles) {
750                                 per_stage->active = active_cycles / stage_cycles;
751                         } else {
752                                 per_stage->active = 0.0;
753                         }
754                 }
755         }
756
757         qsort_r (sorted, num_sorted, sizeof (*sorted),
758                  time_compare, metrics->op_metrics);
759
760         for (i = 0; i < num_sorted; i++)
761                 print_per_stage_metrics (metrics, &sorted[i], total_time);
762
763         free (sorted);
764 }
765
766 void
767 metrics_collect_available (metrics_t *metrics)
768 {
769         /* Consume all timer queries that are ready. */
770         timer_query_t *timer = metrics->timer_head;
771
772         while (timer) {
773                 GLuint available, elapsed;
774
775                 glGetQueryObjectuiv (timer->id,
776                                      GL_QUERY_RESULT_AVAILABLE, &available);
777                 if (! available)
778                         break;
779
780                 glGetQueryObjectuiv (timer->id,
781                                      GL_QUERY_RESULT, &elapsed);
782
783                 accumulate_program_time (metrics, timer->op, elapsed);
784
785                 metrics->timer_head = timer->next;
786                 if (metrics->timer_head == NULL)
787                         metrics->timer_tail = NULL;
788
789                 glDeleteQueries (1, &timer->id);
790
791                 free (timer);
792                 timer = metrics->timer_head;
793         }
794
795         if (! metrics->info->have_perfmon)
796                 return;
797
798         /* And similarly for all performance monitors that are ready. */
799         monitor_t *monitor = metrics->monitor_head;
800
801         while (monitor) {
802                 GLuint available, result_size, *result;
803                 GLint bytes_written;
804
805                 glGetPerfMonitorCounterDataAMD (monitor->id,
806                                                 GL_PERFMON_RESULT_AVAILABLE_AMD,
807                                                 sizeof (available), &available,
808                                                 NULL);
809                 if (! available)
810                         break;
811
812                 glGetPerfMonitorCounterDataAMD (monitor->id,
813                                                 GL_PERFMON_RESULT_SIZE_AMD,
814                                                 sizeof (result_size),
815                                                 &result_size, NULL);
816
817                 result = xmalloc (result_size);
818
819                 glGetPerfMonitorCounterDataAMD (monitor->id,
820                                                 GL_PERFMON_RESULT_AMD,
821                                                 result_size, result,
822                                                 &bytes_written);
823
824                 accumulate_program_metrics (metrics, monitor->op, result, result_size);
825
826                 free (result);
827
828                 metrics->monitor_head = monitor->next;
829                 if (metrics->monitor_head == NULL)
830                         metrics->monitor_tail = NULL;
831
832                 glDeletePerfMonitorsAMD (1, &monitor->id);
833
834                 free (monitor);
835
836                 metrics->monitors_in_flight--;
837
838                 monitor = metrics->monitor_head;
839         }
840 }
841
842 /* Subtract timespec values: Return (a - b) in seconds as a double. */
843 #define SUBTRACT_TIMESPEC(a, b) (double) (a.tv_sec - b.tv_sec + \
844                                           (a.tv_nsec - b.tv_nsec) / 1e9)
845
846 void
847 metrics_end_frame_pre_swap (metrics_t *metrics)
848 {
849         GLuint64 swap_end_timestamp, frame_time_ns, latency_ns;
850         double cpu_time;
851
852         /* Don't leave any counters running over the end_frame work we
853          * do here. The counters will be started again at the end of
854          * metrics_end_frame_post_swap.  */
855
856         metrics_counter_stop (metrics);
857
858         /* Now that an entire frame's worth of content has gone by, we
859          * can be sure that the timer quiery for the previous frame's
860          * swap is available.
861          *
862          * Note: The only case in which this query isn't immediately
863          * available would be if there was effectively nothing in this
864          * frame. In that case, triggering a block on this query
865          * result is not a concern (since an empty frame is not
866          * interesting, and all of our counters are stopped anyway).
867          *
868          * Accepting this one block is much better than adding a
869          * linked list for these results that would reaslisticly never
870          * have more than one entry anyway.
871          */
872
873         glGetQueryObjectui64v (metrics->swap_end_timestamp_id,
874                                GL_QUERY_RESULT, &swap_end_timestamp);
875  
876         if (metrics->first_frame) {
877                 /* Print header */
878                 printf ("# frame: Frame_Number Frame_Time_milliseconds Frame_latency_milliseconds CPU_load GPU_load\n");
879
880                 metrics->first_frame = false;
881         } else {
882                 /* Subtract previous frame's times to get frame times. */
883                 frame_time_ns = subtract_timestamp (swap_end_timestamp,
884                                                     metrics->previous_swap_end_timestamp);
885
886                 latency_ns = subtract_timestamp (swap_end_timestamp,
887                                                  metrics->swap_begin_timestamp);
888
889                 cpu_time = SUBTRACT_TIMESPEC (metrics->cpu_time_ts,
890                                               metrics->previous_cpu_time_ts);
891
892                 /* We've waited one frame to ensure we have a timestamp
893                  * result. So the time we've actually measured here is
894                  * for the previous frame. */
895                 printf ("frame: %d %g %g %g %g\n",
896                         frames - 1,
897                         (double) frame_time_ns / 1e6,
898                         (double) latency_ns / 1e6,
899                         cpu_time / (frame_time_ns / 1e9),
900                         0.0);
901         }
902
903         metrics->previous_swap_end_timestamp = swap_end_timestamp;
904         metrics->previous_cpu_time_ts = metrics->cpu_time_ts;
905
906         /* Before the actual swap call, we get the current timestamp
907          * value. This is a synchronous get so we can use this as the
908          * baseline for a frame latency measurment. */
909
910         glGetInteger64v (GL_TIMESTAMP, &metrics->swap_begin_timestamp);
911 }
912
913 void
914 metrics_end_frame_post_swap (metrics_t *metrics)
915 {
916         static int initialized = 0;
917         static struct timeval tv_start, tv_now;
918         int err;
919
920         /* Now that the swap command has been queued, we issue an
921          * asynchronous query of the timestamp value. Comparing this
922          * to the synchronous get we just sent in
923          * metrics_end_frame_pre_swap allows us to measure the
924          * per-frame swap latency. */
925
926         glQueryCounter (metrics->swap_end_timestamp_id, GL_TIMESTAMP);
927
928         /* Query the total amount of CPU time spent by this process. */
929         err = clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &metrics->cpu_time_ts);
930         if (err == -1 ) {
931                 fprintf (stderr, "Failed to query CPU time with clock_gettime: %s\n",
932                          strerror (errno));
933                 exit (1);
934         }
935
936         if (! initialized) {
937                 gettimeofday (&tv_start, NULL);
938                 if (getenv ("FIPS_VERBOSE"))
939                         verbose = 1;
940                 initialized = 1;
941         }
942
943         metrics_collect_available (metrics);
944
945         frames++;
946
947         if (frames) {
948                 double fps;
949
950                 gettimeofday (&tv_now, NULL);
951
952                 fps = (double) frames / (tv_now.tv_sec - tv_start.tv_sec +
953                                          (tv_now.tv_usec - tv_start.tv_usec) / 1.0e6);
954
955                 printf("FPS: %.3f\n", fps);
956
957                 print_program_metrics (metrics);
958         }
959
960         /* Start the counter up again now that we're done. */
961         metrics_counter_start (metrics);
962 }