1 /* Copyright © 2013, Intel Corporation
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 #include "fips-dispatch-gl.h"
35 typedef struct timer_query
40 struct timer_query *next;
43 /* Performance-monitor query */
44 typedef struct monitor
53 typedef struct op_metrics
55 /* This happens to also be the index into the
56 * ctx->op_metrics array currently
64 typedef struct metrics_group_info
70 GLuint max_active_counters;
74 GLuint *counter_types;
76 } metrics_group_info_t;
78 typedef struct metrics_info
81 metrics_group_info_t *groups;
84 typedef struct context
86 metrics_info_t metrics_info;
90 timer_query_t *timer_head;
91 timer_query_t *timer_tail;
93 monitor_t *monitor_head;
94 monitor_t *monitor_tail;
96 unsigned num_op_metrics;
97 op_metrics_t *op_metrics;
100 /* FIXME: Need a map from integers to context objects and track the
101 * current context with glXMakeContextCurrent, eglMakeCurrent, etc. */
103 context_t current_context;
109 metrics_group_info_init (metrics_group_info_t *group, GLuint id)
117 glGetPerfMonitorGroupStringAMD (id, 0, &length, NULL);
119 group->name = xmalloc (length + 1);
121 glGetPerfMonitorGroupStringAMD (id, length + 1, NULL, group->name);
123 /* Get number of counters */
124 group->num_counters = 0;
125 group->max_active_counters = 0;
126 glGetPerfMonitorCountersAMD (group->id,
127 (int *) &group->num_counters,
128 (int *) &group->max_active_counters,
131 /* Get counter numbers */
132 group->counters = xmalloc (group->num_counters * sizeof (GLuint));
134 glGetPerfMonitorCountersAMD (group->id, NULL, NULL,
138 /* Get counter names */
139 group->counter_names = xmalloc (group->num_counters * sizeof (char *));
140 group->counter_types = xmalloc (group->num_counters * sizeof (GLuint));
142 for (i = 0; i < group->num_counters; i++) {
143 glGetPerfMonitorCounterInfoAMD (group->id,
146 &group->counter_types[i]);
148 /* We assume that all peformance counters are made
149 * available as uint32 values. The code calling
150 * CONSUME in accumulate_program_metrics will need to
151 * be extended to accomodate other counter values. */
152 if (group->counter_types[i] != GL_UNSIGNED_INT) {
153 fprintf (stderr, "fips: Internal error: No support for non-uint counter values\n");
157 glGetPerfMonitorCounterStringAMD (group->id,
161 group->counter_names[i] = xmalloc (length + 1);
163 glGetPerfMonitorCounterStringAMD (group->id,
166 group->counter_names[i]);
171 metrics_info_init (void)
175 metrics_info_t *metrics_info = ¤t_context.metrics_info;
177 glGetPerfMonitorGroupsAMD ((int *) &metrics_info->num_groups, 0, NULL);
179 group_ids = xmalloc (metrics_info->num_groups * sizeof (GLuint));
181 glGetPerfMonitorGroupsAMD (NULL, metrics_info->num_groups, group_ids);
183 metrics_info->groups = xmalloc (metrics_info->num_groups * sizeof (metrics_group_info_t));
185 for (i = 0; i < metrics_info->num_groups; i++)
186 metrics_group_info_init (&metrics_info->groups[i], i);
192 metrics_op_string (metrics_op_t op)
194 if (op >= METRICS_OP_SHADER)
195 return "Shader program";
199 case METRICS_OP_ACCUM:
200 return "glAccum*(+)";
201 case METRICS_OP_BUFFER_DATA:
202 return "glBufferData(+)";
203 case METRICS_OP_BUFFER_SUB_DATA:
204 return "glCopyBufferSubData*";
205 case METRICS_OP_BITMAP:
207 case METRICS_OP_BLIT_FRAMEBUFFER:
208 return "glBlitFramebuffer*";
209 case METRICS_OP_CLEAR:
211 case METRICS_OP_CLEAR_BUFFER_DATA:
212 return "glCearBufferData(+)";
213 case METRICS_OP_CLEAR_TEX_IMAGE:
214 return "glClearTexImage(+)";
215 case METRICS_OP_COPY_PIXELS:
216 return "glCopyPixels";
217 case METRICS_OP_COPY_TEX_IMAGE:
218 return "glCopyTexImage(+)";
219 case METRICS_OP_DRAW_PIXELS:
220 return "glDrawPixels";
221 case METRICS_OP_GET_TEX_IMAGE:
222 return "glGetTexImage(+)";
223 case METRICS_OP_READ_PIXELS:
224 return "glReadPixels*";
225 case METRICS_OP_TEX_IMAGE:
226 return "glTexImage*(+)";
228 fprintf (stderr, "fips: Internal error: "
229 "Unknown metrics op value: %d\n", op);
237 metrics_counter_start (void)
239 context_t *ctx = ¤t_context;
240 timer_query_t *timer;
244 /* Create new timer query, add to list */
245 timer = xmalloc (sizeof (timer_query_t));
250 if (ctx->timer_tail) {
251 ctx->timer_tail->next = timer;
252 ctx->timer_tail = timer;
254 ctx->timer_tail = timer;
255 ctx->timer_head = timer;
258 /* Create a new performance-monitor query */
259 monitor = xmalloc (sizeof (monitor_t));
261 monitor->op = ctx->op;
262 monitor->next = NULL;
264 if (ctx->monitor_tail) {
265 ctx->monitor_tail->next = monitor;
266 ctx->monitor_tail = monitor;
268 ctx->monitor_tail = monitor;
269 ctx->monitor_head = monitor;
272 /* Initialize the timer_query and monitor objects */
273 glGenQueries (1, &timer->id);
275 glGenPerfMonitorsAMD (1, &monitor->id);
277 for (i = 0; i < ctx->metrics_info.num_groups; i++)
279 metrics_group_info_t *group;
282 group = &ctx->metrics_info.groups[i];
284 num_counters = group->num_counters;
285 if (group->max_active_counters < group->num_counters)
287 fprintf (stderr, "Warning: Only monitoring %d/%d counters from group %d\n",
288 group->max_active_counters,
289 group->num_counters, i);
290 num_counters = group->max_active_counters;
294 glSelectPerfMonitorCountersAMD(monitor->id,
300 /* Start the queries */
301 glBeginQuery (GL_TIME_ELAPSED, timer->id);
303 glBeginPerfMonitorAMD (monitor->id);
307 metrics_counter_stop (void)
309 glEndQuery (GL_TIME_ELAPSED);
310 glEndPerfMonitorAMD (current_context.monitor_tail->id);
314 metrics_set_current_op (metrics_op_t op)
316 current_context.op = op;
320 metrics_get_current_op (void)
322 return current_context.op;
326 op_metrics_init (context_t *ctx, op_metrics_t *metrics, metrics_op_t op)
328 metrics_info_t *info = &ctx->metrics_info;
332 metrics->time_ns = 0.0;
334 metrics->counters = xmalloc (sizeof(double *) * info->num_groups);
336 for (i = 0; i < info->num_groups; i++) {
337 metrics->counters[i] = xmalloc (sizeof (double) *
338 info->groups[i].num_counters);
339 for (j = 0; j < info->groups[i].num_counters; j++)
340 metrics->counters[i][j] = 0.0;
344 static op_metrics_t *
345 ctx_get_op_metrics (context_t *ctx, metrics_op_t op)
349 if (op >= ctx->num_op_metrics)
351 ctx->op_metrics = realloc (ctx->op_metrics,
352 (op + 1) * sizeof (op_metrics_t));
353 for (i = ctx->num_op_metrics; i < op + 1; i++)
354 op_metrics_init (ctx, &ctx->op_metrics[i], i);
356 ctx->num_op_metrics = op + 1;
359 return &ctx->op_metrics[op];
363 accumulate_program_metrics (metrics_op_t op, GLuint *result, GLuint size)
365 #define CONSUME(var) \
366 if (p + sizeof(var) > ((unsigned char *) result) + size) \
368 fprintf (stderr, "Unexpected end-of-buffer while " \
369 "parsing results\n"); \
372 (var) = *((typeof(var) *) p); \
375 context_t *ctx = ¤t_context;
376 unsigned char *p = (unsigned char *) result;
378 while (p < ((unsigned char *) result) + size)
380 GLuint group_id, counter_id, counter_index;
381 metrics_group_info_t *group;
386 CONSUME (counter_id);
389 assert (group_id < ctx->metrics_info.num_groups);
390 group = &ctx->metrics_info.groups[group_id];
392 for (i = 0; i < group->num_counters; i++) {
393 if (group->counters[i] == counter_id)
397 assert (counter_index < group->num_counters);
399 ctx->op_metrics[op].counters[group_id][counter_index] += value;
404 accumulate_program_time (metrics_op_t op, unsigned time_ns)
406 op_metrics_t *metrics;
408 metrics = ctx_get_op_metrics (¤t_context, op);
410 metrics->time_ns += time_ns;
414 time_compare(const void *in_a, const void *in_b, void *arg)
416 int a = *(const int *)in_a;
417 int b = *(const int *)in_b;
418 struct op_metrics *metrics = arg;
420 if (metrics[a].time_ns < metrics[b].time_ns)
422 if (metrics[a].time_ns > metrics[b].time_ns)
428 print_op_metrics (context_t *ctx, op_metrics_t *metric, double total)
430 metrics_info_t *info = &ctx->metrics_info;
431 metrics_group_info_t *group;
432 const char *op_string;
433 unsigned i, group_id, counter;
436 /* Since we sparsely fill the array based on program
437 * id, many "programs" have no time.
439 if (metric->time_ns == 0.0)
442 op_string = metrics_op_string (metric->op);
444 printf ("%s", op_string);
445 if (metric->op >= METRICS_OP_SHADER) {
446 printf (" %d:", metric->op - METRICS_OP_SHADER);
449 for (i = strlen (op_string); i < 20; i++)
453 printf ("\t%7.2f ms (% 2.1f%%)",
454 metric->time_ns / 1e6,
455 metric->time_ns / total * 100);
458 for (group_id = 0; group_id < info->num_groups; group_id++) {
459 group = &info->groups[group_id];
460 for (counter = 0; counter < group->num_counters; counter++) {
461 value = metric->counters[group_id][counter];
464 printf ("%s: %.2f ", group->counter_names[counter],
472 print_program_metrics (void)
474 context_t *ctx = ¤t_context;
475 int *sorted; /* Sorted indices into the ctx->op_metrics */
479 /* Make a sorted list of the operations by time used, and figure
480 * out the total so we can print percentages.
482 sorted = calloc(ctx->num_op_metrics, sizeof(*sorted));
483 for (i = 0; i < ctx->num_op_metrics; i++) {
485 total += ctx->op_metrics[i].time_ns;
487 qsort_r(sorted, ctx->num_op_metrics, sizeof(*sorted),
488 time_compare, ctx->op_metrics);
490 for (i = 0; i < ctx->num_op_metrics; i++)
491 print_op_metrics (ctx, &ctx->op_metrics[sorted[i]], total);
494 /* Called at program exit */
499 printf ("fips: terminating\n");
504 metrics_end_frame (void)
506 static int initialized = 0;
507 static struct timeval tv_start, tv_now;
510 gettimeofday (&tv_start, NULL);
511 atexit (metrics_exit);
512 if (getenv ("FIPS_VERBOSE"))
518 printf ("fips: frame %d complete\n", frames);
521 gettimeofday (&tv_now, NULL);
523 /* Consume all timer queries that are ready. */
524 timer_query_t *timer = current_context.timer_head;
527 GLuint available, elapsed;
529 glGetQueryObjectuiv (timer->id,
530 GL_QUERY_RESULT_AVAILABLE, &available);
534 glGetQueryObjectuiv (timer->id,
535 GL_QUERY_RESULT, &elapsed);
537 accumulate_program_time (timer->op, elapsed);
539 current_context.timer_head = timer->next;
540 if (current_context.timer_head == NULL)
541 current_context.timer_tail = NULL;
543 glDeleteQueries (1, &timer->id);
546 timer = current_context.timer_head;
549 /* And similarly for all performance monitors that are ready. */
550 monitor_t *monitor = current_context.monitor_head;
553 GLuint available, result_size, *result;
556 glGetPerfMonitorCounterDataAMD (monitor->id,
557 GL_PERFMON_RESULT_AVAILABLE_AMD,
558 sizeof (available), &available,
563 glGetPerfMonitorCounterDataAMD (monitor->id,
564 GL_PERFMON_RESULT_SIZE_AMD,
565 sizeof (result_size),
568 result = xmalloc (result_size);
570 glGetPerfMonitorCounterDataAMD (monitor->id,
571 GL_PERFMON_RESULT_AMD,
575 accumulate_program_metrics (monitor->op, result, result_size);
577 current_context.monitor_head = monitor->next;
578 if (current_context.monitor_head == NULL)
579 current_context.monitor_tail = NULL;
581 glDeletePerfMonitorsAMD (1, &monitor->id);
584 monitor = current_context.monitor_head;
587 if (frames % 60 == 0) {
590 fps = (double) frames / (tv_now.tv_sec - tv_start.tv_sec +
591 (tv_now.tv_usec - tv_start.tv_usec) / 1.0e6);
593 printf("FPS: %.3f\n", fps);
595 print_program_metrics ();