1 /* Copyright © 2013, Intel Corporation
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 #include "fips-dispatch-gl.h"
35 typedef struct timer_query
40 struct timer_query *next;
43 /* Performance-monitor query */
44 typedef struct monitor
53 typedef struct op_metrics
55 /* This happens to also be the index into the
56 * ctx->op_metrics array currently
62 unsigned num_counters;
65 typedef struct counter_group_info
69 GLint max_active_counters;
71 } counter_group_info_t;
73 typedef struct metrics_info
76 int max_counters_per_group;
77 counter_group_info_t *groups;
80 typedef struct context
82 metrics_info_t metrics_info;
86 timer_query_t *timer_head;
87 timer_query_t *timer_tail;
89 monitor_t *monitor_head;
90 monitor_t *monitor_tail;
92 unsigned num_op_metrics;
93 op_metrics_t *op_metrics;
96 /* FIXME: Need a map from integers to context objects and track the
97 * current context with glXMakeContextCurrent, eglMakeCurrent, etc. */
99 context_t current_context;
105 metrics_info_init (void)
109 metrics_info_t *metrics_info = ¤t_context.metrics_info;
111 glGetPerfMonitorGroupsAMD (&metrics_info->num_groups, 0, NULL);
113 group_ids = xmalloc (metrics_info->num_groups * sizeof (GLuint));
115 glGetPerfMonitorGroupsAMD (NULL, metrics_info->num_groups, group_ids);
117 metrics_info->max_counters_per_group = 0;
119 metrics_info->groups = xmalloc (metrics_info->num_groups * sizeof (counter_group_info_t));
121 for (i = 0; i < metrics_info->num_groups; i++)
123 counter_group_info_t *group;
125 group = &metrics_info->groups[i];
127 group->id = group_ids[i];
129 glGetPerfMonitorCountersAMD (group->id, &group->num_counters,
130 &group->max_active_counters, 0, NULL);
132 group->counters = xmalloc (group->num_counters * sizeof (GLuint));
134 glGetPerfMonitorCountersAMD (group->id, NULL, NULL,
138 if (group->num_counters > metrics_info->max_counters_per_group)
139 metrics_info->max_counters_per_group = group->num_counters;
146 metrics_op_string (metrics_op_t op)
148 if (op >= METRICS_OP_SHADER)
149 return "Shader program";
153 case METRICS_OP_ACCUM:
154 return "glAccum*(+)";
155 case METRICS_OP_BUFFER_DATA:
156 return "glBufferData(+)";
157 case METRICS_OP_BUFFER_SUB_DATA:
158 return "glCopyBufferSubData*";
159 case METRICS_OP_BITMAP:
161 case METRICS_OP_BLIT_FRAMEBUFFER:
162 return "glBlitFramebuffer*";
163 case METRICS_OP_CLEAR:
165 case METRICS_OP_CLEAR_BUFFER_DATA:
166 return "glCearBufferData(+)";
167 case METRICS_OP_CLEAR_TEX_IMAGE:
168 return "glClearTexImage(+)";
169 case METRICS_OP_COPY_PIXELS:
170 return "glCopyPixels";
171 case METRICS_OP_COPY_TEX_IMAGE:
172 return "glCopyTexImage(+)";
173 case METRICS_OP_DRAW_PIXELS:
174 return "glDrawPixels";
175 case METRICS_OP_GET_TEX_IMAGE:
176 return "glGetTexImage(+)";
177 case METRICS_OP_READ_PIXELS:
178 return "glReadPixels*";
179 case METRICS_OP_TEX_IMAGE:
180 return "glTexImage*(+)";
182 fprintf (stderr, "fips: Internal error: "
183 "Unknown metrics op value: %d\n", op);
191 metrics_counter_start (void)
193 context_t *ctx = ¤t_context;
194 timer_query_t *timer;
198 /* Create new timer query, add to list */
199 timer = xmalloc (sizeof (timer_query_t));
204 if (ctx->timer_tail) {
205 ctx->timer_tail->next = timer;
206 ctx->timer_tail = timer;
208 ctx->timer_tail = timer;
209 ctx->timer_head = timer;
212 /* Create a new performance-monitor query */
213 monitor = xmalloc (sizeof (monitor_t));
215 monitor->op = ctx->op;
216 monitor->next = NULL;
218 if (ctx->monitor_tail) {
219 ctx->monitor_tail->next = monitor;
220 ctx->monitor_tail = monitor;
222 ctx->monitor_tail = monitor;
223 ctx->monitor_head = monitor;
226 /* Initialize the timer_query and monitor objects */
227 glGenQueries (1, &timer->id);
229 glGenPerfMonitorsAMD (1, &monitor->id);
231 for (i = 0; i < ctx->metrics_info.num_groups; i++)
233 counter_group_info_t *group;
236 group = &ctx->metrics_info.groups[i];
238 num_counters = group->num_counters;
239 if (group->max_active_counters < group->num_counters)
241 fprintf (stderr, "Warning: Only monitoring %d/%d counters from group %d\n",
242 group->max_active_counters,
243 group->num_counters, i);
244 num_counters = group->max_active_counters;
248 glSelectPerfMonitorCountersAMD(monitor->id,
254 /* Start the queries */
255 glBeginQuery (GL_TIME_ELAPSED, timer->id);
257 glBeginPerfMonitorAMD (monitor->id);
261 metrics_counter_stop (void)
263 glEndQuery (GL_TIME_ELAPSED);
264 glEndPerfMonitorAMD (current_context.monitor_tail->id);
268 metrics_set_current_op (metrics_op_t op)
270 current_context.op = op;
274 metrics_get_current_op (void)
276 return current_context.op;
280 op_metrics_init (context_t *ctx, op_metrics_t *metrics, metrics_op_t op)
282 metrics_info_t *info = &ctx->metrics_info;
286 metrics->time_ns = 0.0;
288 metrics->num_counters = info->num_groups * info->max_counters_per_group;
289 metrics->counters = xmalloc (sizeof(double) * metrics->num_counters);
291 for (i = 0; i < metrics->num_counters; i++)
292 metrics->counters[i] = 0.0;
295 static op_metrics_t *
296 ctx_get_op_metrics (context_t *ctx, metrics_op_t op)
300 if (op >= ctx->num_op_metrics)
302 ctx->op_metrics = realloc (ctx->op_metrics,
303 (op + 1) * sizeof (op_metrics_t));
304 for (i = ctx->num_op_metrics; i < op + 1; i++)
305 op_metrics_init (ctx, &ctx->op_metrics[i], i);
307 ctx->num_op_metrics = op + 1;
310 return &ctx->op_metrics[op];
314 accumulate_program_metrics (metrics_op_t op, GLuint *result, GLuint size)
316 #define CONSUME(var) \
317 if (p + sizeof(var) > ((unsigned char *) result) + size) \
319 fprintf (stderr, "Unexpected end-of-buffer while " \
320 "parsing results\n"); \
323 (var) = *((typeof(var) *) p); \
326 context_t *ctx = ¤t_context;
327 unsigned char *p = (unsigned char *) result;
329 while (p < ((unsigned char *) result) + size)
331 GLuint group_id, counter_id, counter_type;
336 CONSUME (counter_id);
338 glGetPerfMonitorCounterInfoAMD (group_id, counter_id,
342 /* We assume that all peformance counters are made
343 * available as uint32 values. This code can easily be
344 * extended as needed. */
345 if (counter_type != GL_UNSIGNED_INT) {
346 fprintf (stderr, "Warning: Non-uint counter value. Ignoring remainder of results\n");
352 i = (group_id * ctx->metrics_info.max_counters_per_group +
355 assert (i < ctx->op_metrics[op].num_counters);
357 /* FIXME: While I'm still occasionally getting bogus
358 * numbers from the performance counters, I'm simply
359 * going to discard anything larger than half the
360 * range, (something that looks like a negative signed
363 if (((int32_t) value) < 0)
364 fprintf (stderr, ".");
366 ctx->op_metrics[op].counters[i] += value;
371 accumulate_program_time (metrics_op_t op, unsigned time_ns)
373 op_metrics_t *metrics;
375 metrics = ctx_get_op_metrics (¤t_context, op);
377 metrics->time_ns += time_ns;
381 time_compare(const void *in_a, const void *in_b, void *arg)
383 int a = *(const int *)in_a;
384 int b = *(const int *)in_b;
385 struct op_metrics *metrics = arg;
387 if (metrics[a].time_ns < metrics[b].time_ns)
389 if (metrics[a].time_ns > metrics[b].time_ns)
395 print_program_metrics (void)
397 context_t *ctx = ¤t_context;
398 int *sorted; /* Sorted indices into the ctx->op_metrics */
402 /* Make a sorted list of the operations by time used, and figure
403 * out the total so we can print percentages.
405 sorted = calloc(ctx->num_op_metrics, sizeof(*sorted));
406 for (i = 0; i < ctx->num_op_metrics; i++) {
408 total += ctx->op_metrics[i].time_ns;
410 qsort_r(sorted, ctx->num_op_metrics, sizeof(*sorted),
411 time_compare, ctx->op_metrics);
413 for (i = 0; i < ctx->num_op_metrics; i++) {
414 const char *op_string;
415 op_metrics_t *metric =&ctx->op_metrics[sorted[i]];
417 /* Since we sparsely fill the array based on program
418 * id, many "programs" have no time.
420 if (metric->time_ns == 0.0)
423 op_string = metrics_op_string (metric->op);
425 printf ("%s", op_string);
426 if (metric->op >= METRICS_OP_SHADER) {
427 printf (" %d:", metric->op - METRICS_OP_SHADER);
430 for (j = strlen (op_string); j < 20; j++)
433 printf ("\t%7.2f ms (% 2.1f%%)",
434 metric->time_ns / 1e6,
435 metric->time_ns / total * 100);
437 for (j = 0; j < metric->num_counters; j++) {
438 if (metric->counters[j] == 0.0)
440 printf ("%d: %.2f ms ", j, metric->counters[j] / 1e6);
446 /* Called at program exit */
451 printf ("fips: terminating\n");
456 metrics_end_frame (void)
458 static int initialized = 0;
459 static struct timeval tv_start, tv_now;
462 gettimeofday (&tv_start, NULL);
463 atexit (metrics_exit);
464 if (getenv ("FIPS_VERBOSE"))
470 printf ("fips: frame %d complete\n", frames);
473 gettimeofday (&tv_now, NULL);
475 /* Consume all timer queries that are ready. */
476 timer_query_t *timer = current_context.timer_head;
479 GLuint available, elapsed;
481 glGetQueryObjectuiv (timer->id,
482 GL_QUERY_RESULT_AVAILABLE, &available);
486 glGetQueryObjectuiv (timer->id,
487 GL_QUERY_RESULT, &elapsed);
489 accumulate_program_time (timer->op, elapsed);
491 current_context.timer_head = timer->next;
492 if (current_context.timer_head == NULL)
493 current_context.timer_tail = NULL;
495 glDeleteQueries (1, &timer->id);
498 timer = current_context.timer_head;
501 /* And similarly for all performance monitors that are ready. */
502 monitor_t *monitor = current_context.monitor_head;
505 GLuint available, result_size, *result;
508 glGetPerfMonitorCounterDataAMD (monitor->id,
509 GL_PERFMON_RESULT_AVAILABLE_AMD,
510 sizeof (available), &available,
515 glGetPerfMonitorCounterDataAMD (monitor->id,
516 GL_PERFMON_RESULT_SIZE_AMD,
517 sizeof (result_size),
520 result = xmalloc (result_size);
522 glGetPerfMonitorCounterDataAMD (monitor->id,
523 GL_PERFMON_RESULT_AMD,
527 accumulate_program_metrics (monitor->op, result, result_size);
529 current_context.monitor_head = monitor->next;
530 if (current_context.monitor_head == NULL)
531 current_context.monitor_tail = NULL;
533 glDeletePerfMonitorsAMD (1, &monitor->id);
536 monitor = current_context.monitor_head;
539 if (frames % 60 == 0) {
542 fps = (double) frames / (tv_now.tv_sec - tv_start.tv_sec +
543 (tv_now.tv_usec - tv_start.tv_usec) / 1.0e6);
545 printf("FPS: %.3f\n", fps);
547 print_program_metrics ();