1 /* Copyright © 2013, Intel Corporation
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 #include "fips-dispatch-gl.h"
35 typedef struct timer_query
40 struct timer_query *next;
43 /* Performance-monitor query */
44 typedef struct monitor
53 typedef struct op_metrics
55 /* This happens to also be the index into the
56 * ctx->op_metrics array currently
62 unsigned num_counters;
65 typedef struct counter_group_info
69 GLint max_active_counters;
71 } counter_group_info_t;
73 typedef struct metrics_info
76 int max_counters_per_group;
77 counter_group_info_t *groups;
80 typedef struct context
82 metrics_info_t metrics_info;
86 timer_query_t *timer_head;
87 timer_query_t *timer_tail;
89 monitor_t *monitor_head;
90 monitor_t *monitor_tail;
92 unsigned num_op_metrics;
93 op_metrics_t *op_metrics;
96 /* FIXME: Need a map from integers to context objects and track the
97 * current context with glXMakeContextCurrent, eglMakeCurrent, etc. */
99 context_t current_context;
105 metrics_info_init (void)
109 metrics_info_t *metrics_info = ¤t_context.metrics_info;
111 glGetPerfMonitorGroupsAMD (&metrics_info->num_groups, 0, NULL);
113 group_ids = xmalloc (metrics_info->num_groups * sizeof (GLuint));
115 glGetPerfMonitorGroupsAMD (NULL, metrics_info->num_groups, group_ids);
117 metrics_info->max_counters_per_group = 0;
119 metrics_info->groups = xmalloc (metrics_info->num_groups * sizeof (counter_group_info_t));
121 for (i = 0; i < metrics_info->num_groups; i++)
123 counter_group_info_t *group;
125 group = &metrics_info->groups[i];
127 group->id = group_ids[i];
129 glGetPerfMonitorCountersAMD (group->id, &group->num_counters,
130 &group->max_active_counters, 0, NULL);
132 group->counters = xmalloc (group->num_counters * sizeof (GLuint));
134 glGetPerfMonitorCountersAMD (group->id, NULL, NULL,
138 if (group->num_counters > metrics_info->max_counters_per_group)
139 metrics_info->max_counters_per_group = group->num_counters;
146 metrics_op_string (metrics_op_t op)
148 if (op >= METRICS_OP_SHADER)
149 return "Shader program";
153 case METRICS_OP_ACCUM:
154 return "glAccum*(+)";
155 case METRICS_OP_BUFFER_DATA:
156 return "glBufferData(+)";
157 case METRICS_OP_BUFFER_SUB_DATA:
158 return "glCopyBufferSubData*";
159 case METRICS_OP_BITMAP:
161 case METRICS_OP_BLIT_FRAMEBUFFER:
162 return "glBlitFramebuffer*";
163 case METRICS_OP_CLEAR:
165 case METRICS_OP_CLEAR_BUFFER_DATA:
166 return "glCearBufferData(+)";
167 case METRICS_OP_CLEAR_TEX_IMAGE:
168 return "glClearTexImage(+)";
169 case METRICS_OP_COPY_PIXELS:
170 return "glCopyPixels";
171 case METRICS_OP_COPY_TEX_IMAGE:
172 return "glCopyTexImage(+)";
173 case METRICS_OP_DRAW_PIXELS:
174 return "glDrawPixels";
175 case METRICS_OP_GET_TEX_IMAGE:
176 return "glGetTexImage(+)";
177 case METRICS_OP_READ_PIXELS:
178 return "glReadPixels*";
179 case METRICS_OP_TEX_IMAGE:
180 return "glTexImage*(+)";
182 fprintf (stderr, "fips: Internal error: "
183 "Unknown metrics op value: %d\n", op);
191 metrics_counter_start (void)
193 context_t *ctx = ¤t_context;
194 timer_query_t *timer;
198 /* Create new timer query, add to list */
199 timer = xmalloc (sizeof (timer_query_t));
204 if (ctx->timer_tail) {
205 ctx->timer_tail->next = timer;
206 ctx->timer_tail = timer;
208 ctx->timer_tail = timer;
209 ctx->timer_head = timer;
212 /* Create a new performance-monitor query */
213 monitor = xmalloc (sizeof (monitor_t));
215 monitor->op = ctx->op;
216 monitor->next = NULL;
218 if (ctx->monitor_tail) {
219 ctx->monitor_tail->next = monitor;
220 ctx->monitor_tail = monitor;
222 ctx->monitor_tail = monitor;
223 ctx->monitor_head = monitor;
226 /* Initialize the timer_query and monitor objects */
227 glGenQueries (1, &timer->id);
229 glGenPerfMonitorsAMD (1, &monitor->id);
231 for (i = 0; i < ctx->metrics_info.num_groups; i++)
233 counter_group_info_t *group;
236 group = &ctx->metrics_info.groups[i];
238 num_counters = group->num_counters;
239 if (group->max_active_counters < group->num_counters)
241 fprintf (stderr, "Warning: Only monitoring %d/%d counters from group %d\n",
242 group->max_active_counters,
243 group->num_counters, i);
244 num_counters = group->max_active_counters;
248 glSelectPerfMonitorCountersAMD(monitor->id,
254 /* Start the queries */
255 glBeginQuery (GL_TIME_ELAPSED, timer->id);
257 glBeginPerfMonitorAMD (monitor->id);
261 metrics_counter_stop (void)
263 glEndQuery (GL_TIME_ELAPSED);
264 glEndPerfMonitorAMD (current_context.monitor_tail->id);
268 metrics_set_current_op (metrics_op_t op)
270 current_context.op = op;
274 metrics_get_current_op (void)
276 return current_context.op;
280 op_metrics_init (context_t *ctx, op_metrics_t *metrics, metrics_op_t op)
282 metrics_info_t *info = &ctx->metrics_info;
286 metrics->time_ns = 0.0;
288 metrics->num_counters = info->num_groups * info->max_counters_per_group;
289 metrics->counters = xmalloc (sizeof(double) * metrics->num_counters);
291 for (i = 0; i < metrics->num_counters; i++)
292 metrics->counters[i] = 0.0;
295 static op_metrics_t *
296 ctx_get_op_metrics (context_t *ctx, metrics_op_t op)
300 if (op >= ctx->num_op_metrics)
302 ctx->op_metrics = realloc (ctx->op_metrics,
303 (op + 1) * sizeof (op_metrics_t));
304 for (i = ctx->num_op_metrics; i < op + 1; i++)
305 op_metrics_init (ctx, &ctx->op_metrics[i], i);
307 ctx->num_op_metrics = op + 1;
310 return &ctx->op_metrics[op];
314 accumulate_program_metrics (metrics_op_t op, GLuint *result, GLuint size)
316 #define CONSUME(var) \
317 if (p + sizeof(var) > ((unsigned char *) result) + size) \
319 fprintf (stderr, "Unexpected end-of-buffer while " \
320 "parsing results\n"); \
323 (var) = *((typeof(var) *) p); \
326 context_t *ctx = ¤t_context;
327 unsigned char *p = (unsigned char *) result;
329 while (p < ((unsigned char *) result) + size)
331 GLuint group_id, counter_id, counter_type;
336 CONSUME (counter_id);
338 glGetPerfMonitorCounterInfoAMD (group_id, counter_id,
342 /* We assume that all peformance counters are made
343 * available as uint32 values. This code can easily be
344 * extended as needed. */
345 if (counter_type != GL_UNSIGNED_INT) {
346 fprintf (stderr, "Warning: Non-uint counter value. Ignoring remainder of results\n");
352 i = (group_id * ctx->metrics_info.max_counters_per_group +
355 assert (i < ctx->op_metrics[op].num_counters);
357 /* FIXME: While I'm still occasionally getting bogus
358 * numbers from the performance counters, I'm simply
359 * going to discard anything larger than half the
360 * range, (something that looks like a negative signed
363 if (((int32_t) value) < 0)
364 fprintf (stderr, ".");
366 ctx->op_metrics[op].counters[i] += value;
371 accumulate_program_time (metrics_op_t op, unsigned time_ns)
373 op_metrics_t *metrics;
375 metrics = ctx_get_op_metrics (¤t_context, op);
377 metrics->time_ns += time_ns;
381 time_compare(const void *in_a, const void *in_b, void *arg)
383 int a = *(const int *)in_a;
384 int b = *(const int *)in_b;
385 struct op_metrics *metrics = arg;
387 if (metrics[a].time_ns < metrics[b].time_ns)
389 if (metrics[a].time_ns > metrics[b].time_ns)
395 print_op_metrics (op_metrics_t *metric, double total)
397 const char *op_string;
400 /* Since we sparsely fill the array based on program
401 * id, many "programs" have no time.
403 if (metric->time_ns == 0.0)
406 op_string = metrics_op_string (metric->op);
408 printf ("%s", op_string);
409 if (metric->op >= METRICS_OP_SHADER) {
410 printf (" %d:", metric->op - METRICS_OP_SHADER);
413 for (i = strlen (op_string); i < 20; i++)
417 printf ("\t%7.2f ms (% 2.1f%%)",
418 metric->time_ns / 1e6,
419 metric->time_ns / total * 100);
422 for (i = 0; i < metric->num_counters; i++) {
423 if (metric->counters[i] == 0.0)
425 printf ("%d: %.2f ms ", i, metric->counters[i] / 1e6);
431 print_program_metrics (void)
433 context_t *ctx = ¤t_context;
434 int *sorted; /* Sorted indices into the ctx->op_metrics */
438 /* Make a sorted list of the operations by time used, and figure
439 * out the total so we can print percentages.
441 sorted = calloc(ctx->num_op_metrics, sizeof(*sorted));
442 for (i = 0; i < ctx->num_op_metrics; i++) {
444 total += ctx->op_metrics[i].time_ns;
446 qsort_r(sorted, ctx->num_op_metrics, sizeof(*sorted),
447 time_compare, ctx->op_metrics);
449 for (i = 0; i < ctx->num_op_metrics; i++)
450 print_op_metrics (&ctx->op_metrics[sorted[i]], total);
453 /* Called at program exit */
458 printf ("fips: terminating\n");
463 metrics_end_frame (void)
465 static int initialized = 0;
466 static struct timeval tv_start, tv_now;
469 gettimeofday (&tv_start, NULL);
470 atexit (metrics_exit);
471 if (getenv ("FIPS_VERBOSE"))
477 printf ("fips: frame %d complete\n", frames);
480 gettimeofday (&tv_now, NULL);
482 /* Consume all timer queries that are ready. */
483 timer_query_t *timer = current_context.timer_head;
486 GLuint available, elapsed;
488 glGetQueryObjectuiv (timer->id,
489 GL_QUERY_RESULT_AVAILABLE, &available);
493 glGetQueryObjectuiv (timer->id,
494 GL_QUERY_RESULT, &elapsed);
496 accumulate_program_time (timer->op, elapsed);
498 current_context.timer_head = timer->next;
499 if (current_context.timer_head == NULL)
500 current_context.timer_tail = NULL;
502 glDeleteQueries (1, &timer->id);
505 timer = current_context.timer_head;
508 /* And similarly for all performance monitors that are ready. */
509 monitor_t *monitor = current_context.monitor_head;
512 GLuint available, result_size, *result;
515 glGetPerfMonitorCounterDataAMD (monitor->id,
516 GL_PERFMON_RESULT_AVAILABLE_AMD,
517 sizeof (available), &available,
522 glGetPerfMonitorCounterDataAMD (monitor->id,
523 GL_PERFMON_RESULT_SIZE_AMD,
524 sizeof (result_size),
527 result = xmalloc (result_size);
529 glGetPerfMonitorCounterDataAMD (monitor->id,
530 GL_PERFMON_RESULT_AMD,
534 accumulate_program_metrics (monitor->op, result, result_size);
536 current_context.monitor_head = monitor->next;
537 if (current_context.monitor_head == NULL)
538 current_context.monitor_tail = NULL;
540 glDeletePerfMonitorsAMD (1, &monitor->id);
543 monitor = current_context.monitor_head;
546 if (frames % 60 == 0) {
549 fps = (double) frames / (tv_now.tv_sec - tv_start.tv_sec +
550 (tv_now.tv_usec - tv_start.tv_usec) / 1.0e6);
552 printf("FPS: %.3f\n", fps);
554 print_program_metrics ();