X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=metrics.c;h=ad7ea3a330cc27b6354e13a8070209b0b406bd13;hb=refs%2Fheads%2Fframe-timings;hp=87da77834854e915dadb557e46d0a45c1dc10bb8;hpb=402fa1d53531bd911eaacddca109d5d551ac7c2d;p=fips diff --git a/metrics.c b/metrics.c index 87da778..ad7ea3a 100644 --- a/metrics.c +++ b/metrics.c @@ -21,16 +21,28 @@ #define _GNU_SOURCE +#include + #include #include #include +#include #include +#include +#include #include "fips-dispatch-gl.h" #include "metrics.h" +#include "context.h" +#include "metrics-info.h" #include "xmalloc.h" +int frames; +int verbose; + +#define MAX_MONITORS_IN_FLIGHT 1000 + /* Timer query */ typedef struct timer_query { @@ -49,11 +61,10 @@ typedef struct monitor struct monitor *next; } monitor_t; - typedef struct op_metrics { /* This happens to also be the index into the - * ctx->op_metrics array currently + * metrics->op_metrics array currently */ metrics_op_t op; double time_ns; @@ -61,48 +72,16 @@ typedef struct op_metrics double **counters; } op_metrics_t; -typedef struct metrics_group_info +struct metrics { - GLuint id; - char *name; - - GLuint num_counters; - GLuint max_active_counters; - - GLuint *counter_ids; - char **counter_names; - GLuint *counter_types; - -} metrics_group_info_t; - -typedef struct shader_stage_info -{ - char *name; - - GLuint active_group_index; - GLuint active_counter_index; - - GLuint stall_group_index; - GLuint stall_counter_index; - -} shader_stage_info_t; - -typedef struct metrics_info -{ - int initialized; - - unsigned num_groups; - metrics_group_info_t *groups; - - unsigned num_shader_stages; - shader_stage_info_t *stages; + /* Flag to indicate first frame is being processed. */ + bool first_frame; -} metrics_info_t; - -typedef struct context -{ - metrics_info_t metrics_info; + /* Description of all available peformance counters, counter + * groups, their names and IDs, etc. */ + metrics_info_t *info; + /* The current operation being measured. */ metrics_op_t op; /* GL_TIME_ELAPSED query for which glEndQuery has not yet @@ -127,217 +106,90 @@ typedef struct context unsigned num_op_metrics; op_metrics_t *op_metrics; -} context_t; - -/* FIXME: Need a map from integers to context objects and track the - * current context with glXMakeContextCurrent, eglMakeCurrent, etc. */ - -context_t current_context; - -int frames; -int verbose; - -#define MAX_MONITORS_IN_FLIGHT 1000 - -void -metrics_collect_available (void); - -static void -metrics_group_info_init (metrics_group_info_t *group, GLuint id) -{ - GLsizei length; - unsigned i; - - group->id = id; - - /* Get name */ - glGetPerfMonitorGroupStringAMD (id, 0, &length, NULL); - - group->name = xmalloc (length + 1); - glGetPerfMonitorGroupStringAMD (id, length + 1, NULL, group->name); + /* Per-frame time and latency measurement. */ + GLint64 swap_begin_timestamp; + unsigned swap_end_timestamp_id; + GLint64 previous_swap_end_timestamp; - /* Get number of counters */ - group->num_counters = 0; - group->max_active_counters = 0; - glGetPerfMonitorCountersAMD (group->id, - (int *) &group->num_counters, - (int *) &group->max_active_counters, - 0, NULL); + struct timespec previous_cpu_time_ts; + struct timespec cpu_time_ts; +}; - /* Get counter numbers */ - group->counter_ids = xmalloc (group->num_counters * sizeof (GLuint)); - - glGetPerfMonitorCountersAMD (group->id, NULL, NULL, - group->num_counters, - group->counter_ids); - - /* Get counter names */ - group->counter_names = xmalloc (group->num_counters * sizeof (char *)); - group->counter_types = xmalloc (group->num_counters * sizeof (GLuint)); - - for (i = 0; i < group->num_counters; i++) { - glGetPerfMonitorCounterInfoAMD (group->id, - group->counter_ids[i], - GL_COUNTER_TYPE_AMD, - &group->counter_types[i]); - - glGetPerfMonitorCounterStringAMD (group->id, - group->counter_ids[i], - 0, &length, NULL); - - group->counter_names[i] = xmalloc (length + 1); - - glGetPerfMonitorCounterStringAMD (group->id, - group->counter_ids[i], - length + 1, NULL, - group->counter_names[i]); - } -} - -static void -metrics_group_info_fini (metrics_group_info_t *group) +/* Return (a - b) while accounting for the fact that between a and b, + * the counter may have wrapped around (with a maximum number of bits + * in the counter of GL_QUERY_COUNTER_BITS). */ +static uint64_t +subtract_timestamp (uint64_t a, uint64_t b) { - unsigned i; + GLint timestamp_bits; - for (i = 0; i < group->num_counters; i++) - free (group->counter_names[i]); + glGetQueryiv (GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, ×tamp_bits); - free (group->counter_types); - free (group->counter_names); - free (group->counter_ids); + /* Note: "<<" isn't defined for the number of bits in the + * type, and the compiler will punish you. + */ + if (timestamp_bits == 64) + return a - b; - free (group->name); + if (b > a) + return (1ull << timestamp_bits) + a - b; + else + return a - b; } -/* A helper function, part of metrics_info_init below. */ - -typedef enum { - SHADER_ACTIVE, - SHADER_STALL -} shader_phase_t; - -static void -_add_shader_stage (metrics_info_t *info, const char *name, - GLuint group_index, GLuint counter_index, - shader_phase_t phase) +metrics_t * +metrics_create (metrics_info_t *info) { - shader_stage_info_t *stage; - char *stage_name, *space; - unsigned i; + metrics_t *metrics; - stage_name = xstrdup (name); + metrics = xmalloc (sizeof (metrics_t)); - /* Terminate the stage name at the first space. - * - * This is valid for counter names such as: - * - * "Vertex Shader Active Time" - * or - * "Vertex Shader Stall Time - Core Stall" - */ - space = strchr (stage_name, ' '); - if (space) - *space = '\0'; - - /* Look for an existing stage of the given name. */ - stage = NULL; + metrics->info = info; - for (i = 0; i < info->num_shader_stages; i++) { - if (strcmp (info->stages[i].name, stage_name) == 0) { - stage = &info->stages[i]; - break; - } - } - - if (stage == NULL) { - info->num_shader_stages++; - info->stages = xrealloc (info->stages, - info->num_shader_stages * - sizeof (shader_stage_info_t)); - stage = &info->stages[info->num_shader_stages - 1]; - stage->name = xstrdup (stage_name); - stage->active_group_index = 0; - stage->active_counter_index = 0; - stage->stall_group_index = 0; - stage->stall_counter_index = 0; - } + metrics->first_frame = true; - if (phase == SHADER_ACTIVE) { - stage->active_group_index = group_index; - stage->active_counter_index = counter_index; - } else { - stage->stall_group_index = group_index; - stage->stall_counter_index = counter_index; - } + metrics->op = 0; - free (stage_name); -} + metrics->timer_begun_id = 0; -void -metrics_info_init (void) -{ - unsigned i, j; - GLuint *group_ids; - metrics_info_t *info = ¤t_context.metrics_info; + metrics->timer_head = NULL; + metrics->timer_tail = NULL; - glGetPerfMonitorGroupsAMD ((int *) &info->num_groups, 0, NULL); + metrics->monitor_begun_id = 0; - group_ids = xmalloc (info->num_groups * sizeof (GLuint)); + metrics->monitor_head = NULL; + metrics->monitor_tail = NULL; - glGetPerfMonitorGroupsAMD (NULL, info->num_groups, group_ids); + metrics->monitors_in_flight = 0; - info->groups = xmalloc (info->num_groups * sizeof (metrics_group_info_t)); + metrics->num_op_metrics = 0; + metrics->op_metrics = NULL; - for (i = 0; i < info->num_groups; i++) - metrics_group_info_init (&info->groups[i], group_ids[i]); + glGenQueries (1, &metrics->swap_end_timestamp_id); - free (group_ids); + /* Get the first frame timestamp started immediately. */ + glQueryCounter (metrics->swap_end_timestamp_id, GL_TIMESTAMP); - /* Identify each shader stage (by looking at - * performance-counter names for specific patterns) and - * initialize structures referring to the corresponding - * counter numbers for each stage. */ - info->num_shader_stages = 0; - info->stages = NULL; + /* As well as the initial CPU time. */ + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &metrics->cpu_time_ts); - for (i = 0; i < info->num_groups; i++) { - metrics_group_info_t *group = &info->groups[i]; - for (j = 0; j < group->num_counters; j++) { - char *name = group->counter_names[j]; - if (strstr (name, "Shader Active Time")) { - _add_shader_stage (info, name, i, j, - SHADER_ACTIVE); - } - if (strstr (name, "Shader Stall Time")) { - _add_shader_stage (info, name, i, j, - SHADER_STALL); - } - } - } - - info->initialized = 1; + return metrics; } void -metrics_info_fini (void) +metrics_fini (metrics_t *metrics) { - context_t *ctx = ¤t_context; - metrics_info_t *info = &ctx->metrics_info; - unsigned i; timer_query_t *timer, *timer_next; monitor_t *monitor, *monitor_next; - if (! info->initialized) - return; - - if (ctx->timer_begun_id) { - glEndQuery (GL_TIME_ELAPSED); - glDeleteQueries (1, &ctx->timer_begun_id); - ctx->timer_begun_id = 0; + /* Discard and cleanup any outstanding queries. */ + if (metrics->timer_begun_id) { + glDeleteQueries (1, &metrics->timer_begun_id); + metrics->timer_begun_id = 0; } - for (timer = ctx->timer_head; + for (timer = metrics->timer_head; timer; timer = timer_next) { @@ -345,41 +197,38 @@ metrics_info_fini (void) timer_next = timer->next; free (timer); } - ctx->timer_head = NULL; - ctx->timer_tail = NULL; + metrics->timer_head = NULL; + metrics->timer_tail = NULL; - if (ctx->monitor_begun_id) { - glEndPerfMonitorAMD (ctx->monitor_begun_id); - glDeletePerfMonitorsAMD (1, &ctx->monitor_begun_id); - ctx->monitor_begun_id = 0; - } + if (metrics->info->have_perfmon) { - for (monitor = ctx->monitor_head; - monitor; - monitor = monitor_next) - { - glDeletePerfMonitorsAMD (1, &monitor->id); - monitor_next = monitor->next; - free (monitor); - } - ctx->monitor_head = NULL; - ctx->monitor_tail = NULL; - - current_context.monitors_in_flight = 0; + if (metrics->monitor_begun_id) { + glDeletePerfMonitorsAMD (1, &metrics->monitor_begun_id); + metrics->monitor_begun_id = 0; + } - for (i = 0; i < info->num_groups; i++) - metrics_group_info_fini (&info->groups[i]); + for (monitor = metrics->monitor_head; + monitor; + monitor = monitor_next) + { + glDeletePerfMonitorsAMD (1, &monitor->id); + monitor_next = monitor->next; + free (monitor); + } + metrics->monitor_head = NULL; + metrics->monitor_tail = NULL; - free (info->groups); - info->groups = NULL; + } - for (i = 0; i < info->num_shader_stages; i++) - free (info->stages[i].name); + metrics->monitors_in_flight = 0; +} - free (info->stages); - info->stages = NULL; +void +metrics_destroy (metrics_t *metrics) +{ + metrics_fini (metrics); - info->initialized = 0; + free (metrics); } static const char * @@ -428,22 +277,30 @@ metrics_op_string (metrics_op_t op) } void -metrics_counter_start (void) +metrics_counter_start (metrics_t *metrics) { - context_t *ctx = ¤t_context; unsigned i; - /* Initialize the timer_query and monitor objects */ - glGenQueries (1, &ctx->timer_begun_id); + /* Initialize the timer_query object. */ + glGenQueries (1, &metrics->timer_begun_id); - glGenPerfMonitorsAMD (1, &ctx->monitor_begun_id); + /* Most everything else in this function is + * performance-monitor related. If we don't have that + * extension, just start the timer query and be done. */ + if (! metrics->info->have_perfmon) { + glBeginQuery (GL_TIME_ELAPSED, metrics->timer_begun_id); + return; + } + + /* Initialize the performance-monitor object */ + glGenPerfMonitorsAMD (1, &metrics->monitor_begun_id); - for (i = 0; i < ctx->metrics_info.num_groups; i++) + for (i = 0; i < metrics->info->num_groups; i++) { metrics_group_info_t *group; int num_counters; - group = &ctx->metrics_info.groups[i]; + group = &metrics->info->groups[i]; num_counters = group->num_counters; if (group->max_active_counters < group->num_counters) @@ -455,87 +312,89 @@ metrics_counter_start (void) } - glSelectPerfMonitorCountersAMD(ctx->monitor_begun_id, + glSelectPerfMonitorCountersAMD(metrics->monitor_begun_id, GL_TRUE, group->id, num_counters, group->counter_ids); } /* Start the queries */ - glBeginQuery (GL_TIME_ELAPSED, ctx->timer_begun_id); + glBeginQuery (GL_TIME_ELAPSED, metrics->timer_begun_id); - glBeginPerfMonitorAMD (ctx->monitor_begun_id); + glBeginPerfMonitorAMD (metrics->monitor_begun_id); } void -metrics_counter_stop (void) +metrics_counter_stop (metrics_t *metrics) { - context_t *ctx = ¤t_context; timer_query_t *timer; monitor_t *monitor; /* Stop the current timer and monitor. */ glEndQuery (GL_TIME_ELAPSED); - glEndPerfMonitorAMD (ctx->monitor_begun_id); + + if (metrics->info->have_perfmon) + glEndPerfMonitorAMD (metrics->monitor_begun_id); /* Add these IDs to our lists of outstanding queries and * monitors so the results can be collected later. */ timer = xmalloc (sizeof (timer_query_t)); - timer->op = ctx->op; - timer->id = ctx->timer_begun_id; + timer->op = metrics->op; + timer->id = metrics->timer_begun_id; timer->next = NULL; - if (ctx->timer_tail) { - ctx->timer_tail->next = timer; - ctx->timer_tail = timer; + if (metrics->timer_tail) { + metrics->timer_tail->next = timer; + metrics->timer_tail = timer; } else { - ctx->timer_tail = timer; - ctx->timer_head = timer; + metrics->timer_tail = timer; + metrics->timer_head = timer; } - /* Create a new performance-monitor query */ - monitor = xmalloc (sizeof (monitor_t)); + if (metrics->info->have_perfmon) { + /* Create a new performance-monitor query */ + monitor = xmalloc (sizeof (monitor_t)); - monitor->op = ctx->op; - monitor->id = ctx->monitor_begun_id; - monitor->next = NULL; + monitor->op = metrics->op; + monitor->id = metrics->monitor_begun_id; + monitor->next = NULL; - if (ctx->monitor_tail) { - ctx->monitor_tail->next = monitor; - ctx->monitor_tail = monitor; - } else { - ctx->monitor_tail = monitor; - ctx->monitor_head = monitor; + if (metrics->monitor_tail) { + metrics->monitor_tail->next = monitor; + metrics->monitor_tail = monitor; + } else { + metrics->monitor_tail = monitor; + metrics->monitor_head = monitor; + } } - ctx->monitors_in_flight++; + metrics->monitors_in_flight++; /* Avoid being a resource hog and collect outstanding results * once we have sent off a large number of * queries. (Presumably, many of the outstanding queries are * available by now.) */ - if (ctx->monitors_in_flight > MAX_MONITORS_IN_FLIGHT) - metrics_collect_available (); + if (metrics->monitors_in_flight > MAX_MONITORS_IN_FLIGHT) + metrics_collect_available (metrics); } void -metrics_set_current_op (metrics_op_t op) +metrics_set_current_op (metrics_t *metrics, metrics_op_t op) { - current_context.op = op; + metrics->op = op; } metrics_op_t -metrics_get_current_op (void) +metrics_get_current_op (metrics_t *metrics) { - return current_context.op; + return metrics->op; } static void -op_metrics_init (context_t *ctx, op_metrics_t *metrics, metrics_op_t op) +op_metrics_init (metrics_info_t *info, op_metrics_t *metrics, metrics_op_t op) { - metrics_info_t *info = &ctx->metrics_info; unsigned i, j; metrics->op = op; @@ -552,39 +411,40 @@ op_metrics_init (context_t *ctx, op_metrics_t *metrics, metrics_op_t op) } static op_metrics_t * -ctx_get_op_metrics (context_t *ctx, metrics_op_t op) +_get_op_metrics (metrics_t *metrics, metrics_op_t op) { unsigned i; - if (op >= ctx->num_op_metrics) + if (op >= metrics->num_op_metrics) { - ctx->op_metrics = realloc (ctx->op_metrics, - (op + 1) * sizeof (op_metrics_t)); - for (i = ctx->num_op_metrics; i < op + 1; i++) - op_metrics_init (ctx, &ctx->op_metrics[i], i); + metrics->op_metrics = realloc (metrics->op_metrics, + (op + 1) * sizeof (op_metrics_t)); + for (i = metrics->num_op_metrics; i < op + 1; i++) + op_metrics_init (metrics->info, &metrics->op_metrics[i], i); - ctx->num_op_metrics = op + 1; + metrics->num_op_metrics = op + 1; } - return &ctx->op_metrics[op]; + return &metrics->op_metrics[op]; } static void -accumulate_program_metrics (metrics_op_t op, GLuint *result, GLuint size) +accumulate_program_metrics (metrics_t *metrics, metrics_op_t op, + GLuint *result, GLuint size) { #define CONSUME(var) \ if (p + sizeof(var) > ((unsigned char *) result) + size) \ { \ fprintf (stderr, "Unexpected end-of-buffer while " \ "parsing results\n"); \ + value = 0.0; \ break; \ } \ (var) = *((typeof(var) *) p); \ p += sizeof(var); - context_t *ctx = ¤t_context; - metrics_info_t *info = &ctx->metrics_info; - op_metrics_t *metrics = ctx_get_op_metrics (ctx, op); + metrics_info_t *info = metrics->info; + op_metrics_t *op_metrics = _get_op_metrics (metrics, op); unsigned char *p = (unsigned char *) result; while (p < ((unsigned char *) result) + size) @@ -638,18 +498,18 @@ accumulate_program_metrics (metrics_op_t op, GLuint *result, GLuint size) break; } - metrics->counters[group_index][counter_index] += value; + op_metrics->counters[group_index][counter_index] += value; } } static void -accumulate_program_time (metrics_op_t op, unsigned time_ns) +accumulate_program_time (metrics_t *metrics, metrics_op_t op, unsigned time_ns) { - op_metrics_t *metrics; + op_metrics_t *op_metrics; - metrics = ctx_get_op_metrics (¤t_context, op); + op_metrics = _get_op_metrics (metrics, op); - metrics->time_ns += time_ns; + op_metrics->time_ns += time_ns; } typedef struct per_stage_metrics @@ -688,30 +548,47 @@ _is_shader_stage_counter (metrics_info_t *info, } static void -print_per_stage_metrics (context_t *ctx, +print_per_stage_metrics (metrics_t *metrics, per_stage_metrics_t *per_stage, double total) { - metrics_info_t *info = &ctx->metrics_info; - op_metrics_t *metric = per_stage->metrics; + metrics_info_t *info = metrics->info; + op_metrics_t *op_metrics = per_stage->metrics; metrics_group_info_t *group; const char *op_string; unsigned group_index, counter; double value; + int program_name_length = 0; /* Don't print anything for stages with no alloted time. */ if (per_stage->time_ns == 0.0) return; - op_string = metrics_op_string (metric->op); + op_string = metrics_op_string (op_metrics->op); - printf ("%21s", op_string); + if (op_metrics->op >= METRICS_OP_SHADER) { + int program = op_metrics->op - METRICS_OP_SHADER; + glGetObjectLabel (GL_PROGRAM, program, 0, + &program_name_length, NULL); + if (program_name_length) { + char *program_name; - if (metric->op >= METRICS_OP_SHADER) { - printf (" %3d", metric->op - METRICS_OP_SHADER); - } else { - printf (" "); + program_name = malloc(program_name_length + 1); + if (program_name == 0) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + glGetObjectLabel (GL_PROGRAM, program, + program_name_length + 1, + NULL, program_name); + printf ("%21s ", program_name); + } else { + printf ("%21s %3d", op_string, program); + } + } + else { + printf ("%21s ", op_string); } if (per_stage->stage) @@ -745,7 +622,7 @@ print_per_stage_metrics (context_t *ctx, if (_is_shader_stage_counter (info, group_index, counter)) continue; - value = metric->counters[group_index][counter]; + value = op_metrics->counters[group_index][counter]; if (value == 0.0) continue; printf ("%s: %.2f ", group->counter_names[counter], @@ -770,10 +647,9 @@ time_compare(const void *in_a, const void *in_b, void *arg unused) } static void -print_program_metrics (void) +print_program_metrics (metrics_t *metrics) { - context_t *ctx = ¤t_context; - metrics_info_t *info = &ctx->metrics_info; + metrics_info_t *info = metrics->info; unsigned num_shader_stages = info->num_shader_stages; per_stage_metrics_t *sorted, *per_stage; double total_time, op_cycles; @@ -781,18 +657,32 @@ print_program_metrics (void) unsigned group_index, counter_index; unsigned i, j, num_sorted; + if (! info->have_perfmon && + ! info->printed_missing_perfmon_warning) + { + fprintf (stderr, + "Warning: The GL_AMD_performance_monitor extensions is not available.\n" + " Some fips features, (such as per-shader-stage timings),\n" + " will not be available.\n"); + + info->printed_missing_perfmon_warning = true; + } + /* Make a sorted list of the per-stage operations by time * used, and figure out the total so we can print percentages. */ - num_sorted = ctx->num_op_metrics * num_shader_stages; + if (num_shader_stages) + num_sorted = metrics->num_op_metrics * num_shader_stages; + else + num_sorted = metrics->num_op_metrics; sorted = xmalloc (sizeof (*sorted) * num_sorted); total_time = 0.0; - for (i = 0; i < ctx->num_op_metrics; i++) { + for (i = 0; i < metrics->num_op_metrics; i++) { - op = &ctx->op_metrics[i]; + op = &metrics->op_metrics[i]; /* Accumulate total time across all ops. */ total_time += op->time_ns; @@ -800,6 +690,14 @@ print_program_metrics (void) /* Also, find total cycles in all stages of this op. */ op_cycles = 0.0; + if (num_shader_stages == 0) { + per_stage = &sorted[i]; + per_stage->metrics = op; + per_stage->stage = NULL; + per_stage->time_ns = op->time_ns; + per_stage->active = 0.0; + } + for (j = 0; j < num_shader_stages; j++) { /* Active cycles */ group_index = info->stages[j].active_group_index; @@ -857,31 +755,19 @@ print_program_metrics (void) } qsort_r (sorted, num_sorted, sizeof (*sorted), - time_compare, ctx->op_metrics); + time_compare, metrics->op_metrics); for (i = 0; i < num_sorted; i++) - print_per_stage_metrics (ctx, &sorted[i], total_time); + print_per_stage_metrics (metrics, &sorted[i], total_time); free (sorted); } -/* Called at program exit */ -static void -metrics_exit (void) -{ - if (verbose) - printf ("fips: terminating\n"); - - metrics_info_fini (); -} - void -metrics_collect_available (void) +metrics_collect_available (metrics_t *metrics) { - context_t *ctx = ¤t_context; - /* Consume all timer queries that are ready. */ - timer_query_t *timer = ctx->timer_head; + timer_query_t *timer = metrics->timer_head; while (timer) { GLuint available, elapsed; @@ -894,20 +780,23 @@ metrics_collect_available (void) glGetQueryObjectuiv (timer->id, GL_QUERY_RESULT, &elapsed); - accumulate_program_time (timer->op, elapsed); + accumulate_program_time (metrics, timer->op, elapsed); - ctx->timer_head = timer->next; - if (ctx->timer_head == NULL) - ctx->timer_tail = NULL; + metrics->timer_head = timer->next; + if (metrics->timer_head == NULL) + metrics->timer_tail = NULL; glDeleteQueries (1, &timer->id); free (timer); - timer = ctx->timer_head; + timer = metrics->timer_head; } + if (! metrics->info->have_perfmon) + return; + /* And similarly for all performance monitors that are ready. */ - monitor_t *monitor = ctx->monitor_head; + monitor_t *monitor = metrics->monitor_head; while (monitor) { GLuint available, result_size, *result; @@ -932,44 +821,130 @@ metrics_collect_available (void) result_size, result, &bytes_written); - accumulate_program_metrics (monitor->op, result, result_size); + accumulate_program_metrics (metrics, monitor->op, result, result_size); free (result); - ctx->monitor_head = monitor->next; - if (ctx->monitor_head == NULL) - ctx->monitor_tail = NULL; + metrics->monitor_head = monitor->next; + if (metrics->monitor_head == NULL) + metrics->monitor_tail = NULL; glDeletePerfMonitorsAMD (1, &monitor->id); free (monitor); - ctx->monitors_in_flight--; + metrics->monitors_in_flight--; - monitor = ctx->monitor_head; + monitor = metrics->monitor_head; } } +/* Subtract timespec values: Return (a - b) in seconds as a double. */ +#define SUBTRACT_TIMESPEC(a, b) (double) (a.tv_sec - b.tv_sec + \ + (a.tv_nsec - b.tv_nsec) / 1e9) + +void +metrics_end_frame_pre_swap (metrics_t *metrics) +{ + GLuint64 swap_end_timestamp, frame_time_ns, latency_ns; + double cpu_time; + + /* Don't leave any counters running over the end_frame work we + * do here. The counters will be started again at the end of + * metrics_end_frame_post_swap. */ + + metrics_counter_stop (metrics); + + /* Now that an entire frame's worth of content has gone by, we + * can be sure that the timer quiery for the previous frame's + * swap is available. + * + * Note: The only case in which this query isn't immediately + * available would be if there was effectively nothing in this + * frame. In that case, triggering a block on this query + * result is not a concern (since an empty frame is not + * interesting, and all of our counters are stopped anyway). + * + * Accepting this one block is much better than adding a + * linked list for these results that would reaslisticly never + * have more than one entry anyway. + */ + + glGetQueryObjectui64v (metrics->swap_end_timestamp_id, + GL_QUERY_RESULT, &swap_end_timestamp); + + if (metrics->first_frame) { + /* Print header */ + printf ("# frame: Frame_Number Frame_Time_milliseconds Frame_latency_milliseconds CPU_load GPU_load\n"); + + metrics->first_frame = false; + } else { + /* Subtract previous frame's times to get frame times. */ + frame_time_ns = subtract_timestamp (swap_end_timestamp, + metrics->previous_swap_end_timestamp); + + latency_ns = subtract_timestamp (swap_end_timestamp, + metrics->swap_begin_timestamp); + + cpu_time = SUBTRACT_TIMESPEC (metrics->cpu_time_ts, + metrics->previous_cpu_time_ts); + + /* We've waited one frame to ensure we have a timestamp + * result. So the time we've actually measured here is + * for the previous frame. */ + printf ("frame: %d %g %g %g %g\n", + frames - 1, + (double) frame_time_ns / 1e6, + (double) latency_ns / 1e6, + cpu_time / (frame_time_ns / 1e9), + 0.0); + } + + metrics->previous_swap_end_timestamp = swap_end_timestamp; + metrics->previous_cpu_time_ts = metrics->cpu_time_ts; + + /* Before the actual swap call, we get the current timestamp + * value. This is a synchronous get so we can use this as the + * baseline for a frame latency measurment. */ + + glGetInteger64v (GL_TIMESTAMP, &metrics->swap_begin_timestamp); +} void -metrics_end_frame (void) +metrics_end_frame_post_swap (metrics_t *metrics) { static int initialized = 0; static struct timeval tv_start, tv_now; + int err; + + /* Now that the swap command has been queued, we issue an + * asynchronous query of the timestamp value. Comparing this + * to the synchronous get we just sent in + * metrics_end_frame_pre_swap allows us to measure the + * per-frame swap latency. */ + + glQueryCounter (metrics->swap_end_timestamp_id, GL_TIMESTAMP); + + /* Query the total amount of CPU time spent by this process. */ + err = clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &metrics->cpu_time_ts); + if (err == -1 ) { + fprintf (stderr, "Failed to query CPU time with clock_gettime: %s\n", + strerror (errno)); + exit (1); + } if (! initialized) { gettimeofday (&tv_start, NULL); - atexit (metrics_exit); if (getenv ("FIPS_VERBOSE")) verbose = 1; initialized = 1; } - frames++; + metrics_collect_available (metrics); - metrics_collect_available (); + frames++; - if (frames % 15 == 0) { + if (frames) { double fps; gettimeofday (&tv_now, NULL); @@ -979,6 +954,9 @@ metrics_end_frame (void) printf("FPS: %.3f\n", fps); - print_program_metrics (); + print_program_metrics (metrics); } + + /* Start the counter up again now that we're done. */ + metrics_counter_start (metrics); }