1 /* Copyright © 2013, Intel Corporation
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 #include "fips-dispatch-gl.h"
35 typedef struct timer_query
40 struct timer_query *next;
43 /* Performance-monitor query */
44 typedef struct monitor
53 typedef struct op_metrics
55 /* This happens to also be the index into the
56 * ctx->op_metrics array currently
64 typedef struct metrics_group_info
70 GLuint max_active_counters;
74 GLuint *counter_types;
76 } metrics_group_info_t;
78 typedef struct metrics_info
83 metrics_group_info_t *groups;
86 typedef struct context
88 metrics_info_t metrics_info;
92 timer_query_t *timer_head;
93 timer_query_t *timer_tail;
95 monitor_t *monitor_head;
96 monitor_t *monitor_tail;
98 unsigned num_op_metrics;
99 op_metrics_t *op_metrics;
102 /* FIXME: Need a map from integers to context objects and track the
103 * current context with glXMakeContextCurrent, eglMakeCurrent, etc. */
105 context_t current_context;
111 metrics_group_info_init (metrics_group_info_t *group, GLuint id)
119 glGetPerfMonitorGroupStringAMD (id, 0, &length, NULL);
121 group->name = xmalloc (length + 1);
123 glGetPerfMonitorGroupStringAMD (id, length + 1, NULL, group->name);
125 /* Get number of counters */
126 group->num_counters = 0;
127 group->max_active_counters = 0;
128 glGetPerfMonitorCountersAMD (group->id,
129 (int *) &group->num_counters,
130 (int *) &group->max_active_counters,
133 /* Get counter numbers */
134 group->counter_ids = xmalloc (group->num_counters * sizeof (GLuint));
136 glGetPerfMonitorCountersAMD (group->id, NULL, NULL,
140 /* Get counter names */
141 group->counter_names = xmalloc (group->num_counters * sizeof (char *));
142 group->counter_types = xmalloc (group->num_counters * sizeof (GLuint));
144 for (i = 0; i < group->num_counters; i++) {
145 glGetPerfMonitorCounterInfoAMD (group->id,
146 group->counter_ids[i],
148 &group->counter_types[i]);
150 /* We assume that all peformance counters are made
151 * available as uint32 values. The code calling
152 * CONSUME in accumulate_program_metrics will need to
153 * be extended to accomodate other counter values. */
154 if (group->counter_types[i] != GL_UNSIGNED_INT) {
155 fprintf (stderr, "fips: Internal error: No support for non-uint counter values\n");
159 glGetPerfMonitorCounterStringAMD (group->id,
160 group->counter_ids[i],
163 group->counter_names[i] = xmalloc (length + 1);
165 glGetPerfMonitorCounterStringAMD (group->id,
166 group->counter_ids[i],
168 group->counter_names[i]);
173 metrics_group_info_fini (metrics_group_info_t *group)
177 for (i = 0; i < group->num_counters; i++)
178 free (group->counter_names[i]);
180 free (group->counter_types);
181 free (group->counter_names);
182 free (group->counter_ids);
188 metrics_info_fini (metrics_info_t *info);
191 metrics_info_init (void)
195 metrics_info_t *metrics_info = ¤t_context.metrics_info;
197 if (metrics_info->initialized)
198 metrics_info_fini (metrics_info);
200 glGetPerfMonitorGroupsAMD ((int *) &metrics_info->num_groups, 0, NULL);
202 group_ids = xmalloc (metrics_info->num_groups * sizeof (GLuint));
204 glGetPerfMonitorGroupsAMD (NULL, metrics_info->num_groups, group_ids);
206 metrics_info->groups = xmalloc (metrics_info->num_groups * sizeof (metrics_group_info_t));
208 for (i = 0; i < metrics_info->num_groups; i++)
209 metrics_group_info_init (&metrics_info->groups[i], group_ids[i]);
213 metrics_info->initialized = 1;
217 metrics_info_fini (metrics_info_t *info)
221 for (i = 0; i < info->num_groups; i++)
222 metrics_group_info_fini (&info->groups[i]);
228 metrics_op_string (metrics_op_t op)
230 if (op >= METRICS_OP_SHADER)
231 return "Shader program";
235 case METRICS_OP_ACCUM:
236 return "glAccum*(+)";
237 case METRICS_OP_BUFFER_DATA:
238 return "glBufferData(+)";
239 case METRICS_OP_BUFFER_SUB_DATA:
240 return "glCopyBufferSubData*";
241 case METRICS_OP_BITMAP:
243 case METRICS_OP_BLIT_FRAMEBUFFER:
244 return "glBlitFramebuffer*";
245 case METRICS_OP_CLEAR:
247 case METRICS_OP_CLEAR_BUFFER_DATA:
248 return "glCearBufferData(+)";
249 case METRICS_OP_CLEAR_TEX_IMAGE:
250 return "glClearTexImage(+)";
251 case METRICS_OP_COPY_PIXELS:
252 return "glCopyPixels";
253 case METRICS_OP_COPY_TEX_IMAGE:
254 return "glCopyTexImage(+)";
255 case METRICS_OP_DRAW_PIXELS:
256 return "glDrawPixels";
257 case METRICS_OP_GET_TEX_IMAGE:
258 return "glGetTexImage(+)";
259 case METRICS_OP_READ_PIXELS:
260 return "glReadPixels*";
261 case METRICS_OP_TEX_IMAGE:
262 return "glTexImage*(+)";
264 fprintf (stderr, "fips: Internal error: "
265 "Unknown metrics op value: %d\n", op);
273 metrics_counter_start (void)
275 context_t *ctx = ¤t_context;
276 timer_query_t *timer;
280 /* Create new timer query, add to list */
281 timer = xmalloc (sizeof (timer_query_t));
286 if (ctx->timer_tail) {
287 ctx->timer_tail->next = timer;
288 ctx->timer_tail = timer;
290 ctx->timer_tail = timer;
291 ctx->timer_head = timer;
294 /* Create a new performance-monitor query */
295 monitor = xmalloc (sizeof (monitor_t));
297 monitor->op = ctx->op;
298 monitor->next = NULL;
300 if (ctx->monitor_tail) {
301 ctx->monitor_tail->next = monitor;
302 ctx->monitor_tail = monitor;
304 ctx->monitor_tail = monitor;
305 ctx->monitor_head = monitor;
308 /* Initialize the timer_query and monitor objects */
309 glGenQueries (1, &timer->id);
311 glGenPerfMonitorsAMD (1, &monitor->id);
313 for (i = 0; i < ctx->metrics_info.num_groups; i++)
315 metrics_group_info_t *group;
318 group = &ctx->metrics_info.groups[i];
320 num_counters = group->num_counters;
321 if (group->max_active_counters < group->num_counters)
323 fprintf (stderr, "Warning: Only monitoring %d/%d counters from group %d\n",
324 group->max_active_counters,
325 group->num_counters, i);
326 num_counters = group->max_active_counters;
330 glSelectPerfMonitorCountersAMD(monitor->id,
336 /* Start the queries */
337 glBeginQuery (GL_TIME_ELAPSED, timer->id);
339 glBeginPerfMonitorAMD (monitor->id);
343 metrics_counter_stop (void)
345 glEndQuery (GL_TIME_ELAPSED);
346 glEndPerfMonitorAMD (current_context.monitor_tail->id);
350 metrics_set_current_op (metrics_op_t op)
352 current_context.op = op;
356 metrics_get_current_op (void)
358 return current_context.op;
362 op_metrics_init (context_t *ctx, op_metrics_t *metrics, metrics_op_t op)
364 metrics_info_t *info = &ctx->metrics_info;
368 metrics->time_ns = 0.0;
370 metrics->counters = xmalloc (sizeof(double *) * info->num_groups);
372 for (i = 0; i < info->num_groups; i++) {
373 metrics->counters[i] = xmalloc (sizeof (double) *
374 info->groups[i].num_counters);
375 for (j = 0; j < info->groups[i].num_counters; j++)
376 metrics->counters[i][j] = 0.0;
380 static op_metrics_t *
381 ctx_get_op_metrics (context_t *ctx, metrics_op_t op)
385 if (op >= ctx->num_op_metrics)
387 ctx->op_metrics = realloc (ctx->op_metrics,
388 (op + 1) * sizeof (op_metrics_t));
389 for (i = ctx->num_op_metrics; i < op + 1; i++)
390 op_metrics_init (ctx, &ctx->op_metrics[i], i);
392 ctx->num_op_metrics = op + 1;
395 return &ctx->op_metrics[op];
399 accumulate_program_metrics (metrics_op_t op, GLuint *result, GLuint size)
401 #define CONSUME(var) \
402 if (p + sizeof(var) > ((unsigned char *) result) + size) \
404 fprintf (stderr, "Unexpected end-of-buffer while " \
405 "parsing results\n"); \
408 (var) = *((typeof(var) *) p); \
411 context_t *ctx = ¤t_context;
412 metrics_info_t *info = &ctx->metrics_info;
413 unsigned char *p = (unsigned char *) result;
415 while (p < ((unsigned char *) result) + size)
417 GLuint group_id, group_index;
418 GLuint counter_id, counter_index;
419 metrics_group_info_t *group;
424 CONSUME (counter_id);
427 for (i = 0; i < info->num_groups; i++) {
428 if (info->groups[i].id == i)
432 assert (group_index < info->num_groups);
433 group = &info->groups[group_index];
435 for (i = 0; i < group->num_counters; i++) {
436 if (group->counter_ids[i] == counter_id)
440 assert (counter_index < group->num_counters);
442 ctx->op_metrics[op].counters[group_index][counter_index] += value;
447 accumulate_program_time (metrics_op_t op, unsigned time_ns)
449 op_metrics_t *metrics;
451 metrics = ctx_get_op_metrics (¤t_context, op);
453 metrics->time_ns += time_ns;
457 time_compare(const void *in_a, const void *in_b, void *arg)
459 int a = *(const int *)in_a;
460 int b = *(const int *)in_b;
461 struct op_metrics *metrics = arg;
463 if (metrics[a].time_ns < metrics[b].time_ns)
465 if (metrics[a].time_ns > metrics[b].time_ns)
471 print_op_metrics (context_t *ctx, op_metrics_t *metric, double total)
473 metrics_info_t *info = &ctx->metrics_info;
474 metrics_group_info_t *group;
475 const char *op_string;
476 unsigned i, group_index, counter;
479 /* Since we sparsely fill the array based on program
480 * id, many "programs" have no time.
482 if (metric->time_ns == 0.0)
485 op_string = metrics_op_string (metric->op);
487 printf ("%s", op_string);
488 if (metric->op >= METRICS_OP_SHADER) {
489 printf (" %d:", metric->op - METRICS_OP_SHADER);
492 for (i = strlen (op_string); i < 20; i++)
496 printf ("\t%7.2f ms (% 2.1f%%)",
497 metric->time_ns / 1e6,
498 metric->time_ns / total * 100);
501 for (group_index = 0; group_index < info->num_groups; group_index++) {
502 group = &info->groups[group_index];
503 for (counter = 0; counter < group->num_counters; counter++) {
504 value = metric->counters[group_index][counter];
507 printf ("%s: %.2f ", group->counter_names[counter],
515 print_program_metrics (void)
517 context_t *ctx = ¤t_context;
518 int *sorted; /* Sorted indices into the ctx->op_metrics */
522 /* Make a sorted list of the operations by time used, and figure
523 * out the total so we can print percentages.
525 sorted = calloc(ctx->num_op_metrics, sizeof(*sorted));
526 for (i = 0; i < ctx->num_op_metrics; i++) {
528 total += ctx->op_metrics[i].time_ns;
530 qsort_r(sorted, ctx->num_op_metrics, sizeof(*sorted),
531 time_compare, ctx->op_metrics);
533 for (i = 0; i < ctx->num_op_metrics; i++)
534 print_op_metrics (ctx, &ctx->op_metrics[sorted[i]], total);
539 /* Called at program exit */
544 printf ("fips: terminating\n");
546 metrics_info_fini (¤t_context.metrics_info);
551 metrics_end_frame (void)
553 static int initialized = 0;
554 static struct timeval tv_start, tv_now;
557 gettimeofday (&tv_start, NULL);
558 atexit (metrics_exit);
559 if (getenv ("FIPS_VERBOSE"))
565 printf ("fips: frame %d complete\n", frames);
568 gettimeofday (&tv_now, NULL);
570 /* Consume all timer queries that are ready. */
571 timer_query_t *timer = current_context.timer_head;
574 GLuint available, elapsed;
576 glGetQueryObjectuiv (timer->id,
577 GL_QUERY_RESULT_AVAILABLE, &available);
581 glGetQueryObjectuiv (timer->id,
582 GL_QUERY_RESULT, &elapsed);
584 accumulate_program_time (timer->op, elapsed);
586 current_context.timer_head = timer->next;
587 if (current_context.timer_head == NULL)
588 current_context.timer_tail = NULL;
590 glDeleteQueries (1, &timer->id);
593 timer = current_context.timer_head;
596 /* And similarly for all performance monitors that are ready. */
597 monitor_t *monitor = current_context.monitor_head;
600 GLuint available, result_size, *result;
603 glGetPerfMonitorCounterDataAMD (monitor->id,
604 GL_PERFMON_RESULT_AVAILABLE_AMD,
605 sizeof (available), &available,
610 glGetPerfMonitorCounterDataAMD (monitor->id,
611 GL_PERFMON_RESULT_SIZE_AMD,
612 sizeof (result_size),
615 result = xmalloc (result_size);
617 glGetPerfMonitorCounterDataAMD (monitor->id,
618 GL_PERFMON_RESULT_AMD,
622 accumulate_program_metrics (monitor->op, result, result_size);
626 current_context.monitor_head = monitor->next;
627 if (current_context.monitor_head == NULL)
628 current_context.monitor_tail = NULL;
630 glDeletePerfMonitorsAMD (1, &monitor->id);
633 monitor = current_context.monitor_head;
636 if (frames % 60 == 0) {
639 fps = (double) frames / (tv_now.tv_sec - tv_start.tv_sec +
640 (tv_now.tv_usec - tv_start.tv_usec) / 1.0e6);
642 printf("FPS: %.3f\n", fps);
644 print_program_metrics ();