X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=metrics.c;h=ad7ea3a330cc27b6354e13a8070209b0b406bd13;hb=refs%2Fheads%2Fframe-timings;hp=004c3399522ce907533a73f0a75d297f895316a4;hpb=0fee0398ec378498675d0d4fd5c0ec04ef84961a;p=fips diff --git a/metrics.c b/metrics.c index 004c339..ad7ea3a 100644 --- a/metrics.c +++ b/metrics.c @@ -21,10 +21,15 @@ #define _GNU_SOURCE +#include + #include #include #include +#include #include +#include +#include #include "fips-dispatch-gl.h" @@ -69,6 +74,9 @@ typedef struct op_metrics struct metrics { + /* Flag to indicate first frame is being processed. */ + bool first_frame; + /* Description of all available peformance counters, counter * groups, their names and IDs, etc. */ metrics_info_t *info; @@ -98,8 +106,38 @@ struct metrics unsigned num_op_metrics; op_metrics_t *op_metrics; + + /* Per-frame time and latency measurement. */ + GLint64 swap_begin_timestamp; + unsigned swap_end_timestamp_id; + GLint64 previous_swap_end_timestamp; + + struct timespec previous_cpu_time_ts; + struct timespec cpu_time_ts; }; +/* Return (a - b) while accounting for the fact that between a and b, + * the counter may have wrapped around (with a maximum number of bits + * in the counter of GL_QUERY_COUNTER_BITS). */ +static uint64_t +subtract_timestamp (uint64_t a, uint64_t b) +{ + GLint timestamp_bits; + + glGetQueryiv (GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, ×tamp_bits); + + /* Note: "<<" isn't defined for the number of bits in the + * type, and the compiler will punish you. + */ + if (timestamp_bits == 64) + return a - b; + + if (b > a) + return (1ull << timestamp_bits) + a - b; + else + return a - b; +} + metrics_t * metrics_create (metrics_info_t *info) { @@ -109,6 +147,8 @@ metrics_create (metrics_info_t *info) metrics->info = info; + metrics->first_frame = true; + metrics->op = 0; metrics->timer_begun_id = 0; @@ -126,6 +166,14 @@ metrics_create (metrics_info_t *info) metrics->num_op_metrics = 0; metrics->op_metrics = NULL; + glGenQueries (1, &metrics->swap_end_timestamp_id); + + /* Get the first frame timestamp started immediately. */ + glQueryCounter (metrics->swap_end_timestamp_id, GL_TIMESTAMP); + + /* As well as the initial CPU time. */ + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &metrics->cpu_time_ts); + return metrics; } @@ -137,7 +185,6 @@ metrics_fini (metrics_t *metrics) /* Discard and cleanup any outstanding queries. */ if (metrics->timer_begun_id) { - glEndQuery (GL_TIME_ELAPSED); glDeleteQueries (1, &metrics->timer_begun_id); metrics->timer_begun_id = 0; } @@ -153,22 +200,25 @@ metrics_fini (metrics_t *metrics) metrics->timer_head = NULL; metrics->timer_tail = NULL; - if (metrics->monitor_begun_id) { - glEndPerfMonitorAMD (metrics->monitor_begun_id); - glDeletePerfMonitorsAMD (1, &metrics->monitor_begun_id); - metrics->monitor_begun_id = 0; - } + if (metrics->info->have_perfmon) { + + if (metrics->monitor_begun_id) { + glDeletePerfMonitorsAMD (1, &metrics->monitor_begun_id); + metrics->monitor_begun_id = 0; + } + + for (monitor = metrics->monitor_head; + monitor; + monitor = monitor_next) + { + glDeletePerfMonitorsAMD (1, &monitor->id); + monitor_next = monitor->next; + free (monitor); + } + metrics->monitor_head = NULL; + metrics->monitor_tail = NULL; - for (monitor = metrics->monitor_head; - monitor; - monitor = monitor_next) - { - glDeletePerfMonitorsAMD (1, &monitor->id); - monitor_next = monitor->next; - free (monitor); } - metrics->monitor_head = NULL; - metrics->monitor_tail = NULL; metrics->monitors_in_flight = 0; } @@ -231,9 +281,18 @@ metrics_counter_start (metrics_t *metrics) { unsigned i; - /* Initialize the timer_query and monitor objects */ + /* Initialize the timer_query object. */ glGenQueries (1, &metrics->timer_begun_id); + /* Most everything else in this function is + * performance-monitor related. If we don't have that + * extension, just start the timer query and be done. */ + if (! metrics->info->have_perfmon) { + glBeginQuery (GL_TIME_ELAPSED, metrics->timer_begun_id); + return; + } + + /* Initialize the performance-monitor object */ glGenPerfMonitorsAMD (1, &metrics->monitor_begun_id); for (i = 0; i < metrics->info->num_groups; i++) @@ -273,7 +332,9 @@ metrics_counter_stop (metrics_t *metrics) /* Stop the current timer and monitor. */ glEndQuery (GL_TIME_ELAPSED); - glEndPerfMonitorAMD (metrics->monitor_begun_id); + + if (metrics->info->have_perfmon) + glEndPerfMonitorAMD (metrics->monitor_begun_id); /* Add these IDs to our lists of outstanding queries and * monitors so the results can be collected later. */ @@ -291,19 +352,21 @@ metrics_counter_stop (metrics_t *metrics) metrics->timer_head = timer; } - /* Create a new performance-monitor query */ - monitor = xmalloc (sizeof (monitor_t)); + if (metrics->info->have_perfmon) { + /* Create a new performance-monitor query */ + monitor = xmalloc (sizeof (monitor_t)); - monitor->op = metrics->op; - monitor->id = metrics->monitor_begun_id; - monitor->next = NULL; + monitor->op = metrics->op; + monitor->id = metrics->monitor_begun_id; + monitor->next = NULL; - if (metrics->monitor_tail) { - metrics->monitor_tail->next = monitor; - metrics->monitor_tail = monitor; - } else { - metrics->monitor_tail = monitor; - metrics->monitor_head = monitor; + if (metrics->monitor_tail) { + metrics->monitor_tail->next = monitor; + metrics->monitor_tail = monitor; + } else { + metrics->monitor_tail = monitor; + metrics->monitor_head = monitor; + } } metrics->monitors_in_flight++; @@ -374,6 +437,7 @@ accumulate_program_metrics (metrics_t *metrics, metrics_op_t op, { \ fprintf (stderr, "Unexpected end-of-buffer while " \ "parsing results\n"); \ + value = 0.0; \ break; \ } \ (var) = *((typeof(var) *) p); \ @@ -494,6 +558,7 @@ print_per_stage_metrics (metrics_t *metrics, const char *op_string; unsigned group_index, counter; double value; + int program_name_length = 0; /* Don't print anything for stages with no alloted time. */ if (per_stage->time_ns == 0.0) @@ -501,13 +566,29 @@ print_per_stage_metrics (metrics_t *metrics, op_string = metrics_op_string (op_metrics->op); - printf ("%21s", op_string); - if (op_metrics->op >= METRICS_OP_SHADER) { - printf (" %3d", op_metrics->op - METRICS_OP_SHADER); - } else { - printf (" "); + int program = op_metrics->op - METRICS_OP_SHADER; + glGetObjectLabel (GL_PROGRAM, program, 0, + &program_name_length, NULL); + if (program_name_length) { + char *program_name; + + program_name = malloc(program_name_length + 1); + if (program_name == 0) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + glGetObjectLabel (GL_PROGRAM, program, + program_name_length + 1, + NULL, program_name); + printf ("%21s ", program_name); + } else { + printf ("%21s %3d", op_string, program); + } + } + else { + printf ("%21s ", op_string); } if (per_stage->stage) @@ -576,6 +657,17 @@ print_program_metrics (metrics_t *metrics) unsigned group_index, counter_index; unsigned i, j, num_sorted; + if (! info->have_perfmon && + ! info->printed_missing_perfmon_warning) + { + fprintf (stderr, + "Warning: The GL_AMD_performance_monitor extensions is not available.\n" + " Some fips features, (such as per-shader-stage timings),\n" + " will not be available.\n"); + + info->printed_missing_perfmon_warning = true; + } + /* Make a sorted list of the per-stage operations by time * used, and figure out the total so we can print percentages. */ @@ -700,6 +792,9 @@ metrics_collect_available (metrics_t *metrics) timer = metrics->timer_head; } + if (! metrics->info->have_perfmon) + return; + /* And similarly for all performance monitors that are ready. */ monitor_t *monitor = metrics->monitor_head; @@ -744,12 +839,99 @@ metrics_collect_available (metrics_t *metrics) } } +/* Subtract timespec values: Return (a - b) in seconds as a double. */ +#define SUBTRACT_TIMESPEC(a, b) (double) (a.tv_sec - b.tv_sec + \ + (a.tv_nsec - b.tv_nsec) / 1e9) + +void +metrics_end_frame_pre_swap (metrics_t *metrics) +{ + GLuint64 swap_end_timestamp, frame_time_ns, latency_ns; + double cpu_time; + + /* Don't leave any counters running over the end_frame work we + * do here. The counters will be started again at the end of + * metrics_end_frame_post_swap. */ + + metrics_counter_stop (metrics); + + /* Now that an entire frame's worth of content has gone by, we + * can be sure that the timer quiery for the previous frame's + * swap is available. + * + * Note: The only case in which this query isn't immediately + * available would be if there was effectively nothing in this + * frame. In that case, triggering a block on this query + * result is not a concern (since an empty frame is not + * interesting, and all of our counters are stopped anyway). + * + * Accepting this one block is much better than adding a + * linked list for these results that would reaslisticly never + * have more than one entry anyway. + */ + + glGetQueryObjectui64v (metrics->swap_end_timestamp_id, + GL_QUERY_RESULT, &swap_end_timestamp); + + if (metrics->first_frame) { + /* Print header */ + printf ("# frame: Frame_Number Frame_Time_milliseconds Frame_latency_milliseconds CPU_load GPU_load\n"); + + metrics->first_frame = false; + } else { + /* Subtract previous frame's times to get frame times. */ + frame_time_ns = subtract_timestamp (swap_end_timestamp, + metrics->previous_swap_end_timestamp); + + latency_ns = subtract_timestamp (swap_end_timestamp, + metrics->swap_begin_timestamp); + + cpu_time = SUBTRACT_TIMESPEC (metrics->cpu_time_ts, + metrics->previous_cpu_time_ts); + + /* We've waited one frame to ensure we have a timestamp + * result. So the time we've actually measured here is + * for the previous frame. */ + printf ("frame: %d %g %g %g %g\n", + frames - 1, + (double) frame_time_ns / 1e6, + (double) latency_ns / 1e6, + cpu_time / (frame_time_ns / 1e9), + 0.0); + } + + metrics->previous_swap_end_timestamp = swap_end_timestamp; + metrics->previous_cpu_time_ts = metrics->cpu_time_ts; + + /* Before the actual swap call, we get the current timestamp + * value. This is a synchronous get so we can use this as the + * baseline for a frame latency measurment. */ + + glGetInteger64v (GL_TIMESTAMP, &metrics->swap_begin_timestamp); +} void -metrics_end_frame (metrics_t *metrics) +metrics_end_frame_post_swap (metrics_t *metrics) { static int initialized = 0; static struct timeval tv_start, tv_now; + int err; + + /* Now that the swap command has been queued, we issue an + * asynchronous query of the timestamp value. Comparing this + * to the synchronous get we just sent in + * metrics_end_frame_pre_swap allows us to measure the + * per-frame swap latency. */ + + glQueryCounter (metrics->swap_end_timestamp_id, GL_TIMESTAMP); + + /* Query the total amount of CPU time spent by this process. */ + err = clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &metrics->cpu_time_ts); + if (err == -1 ) { + fprintf (stderr, "Failed to query CPU time with clock_gettime: %s\n", + strerror (errno)); + exit (1); + } if (! initialized) { gettimeofday (&tv_start, NULL); @@ -758,11 +940,11 @@ metrics_end_frame (metrics_t *metrics) initialized = 1; } - frames++; - metrics_collect_available (metrics); - if (frames % 15 == 0) { + frames++; + + if (frames) { double fps; gettimeofday (&tv_now, NULL); @@ -774,4 +956,7 @@ metrics_end_frame (metrics_t *metrics) print_program_metrics (metrics); } + + /* Start the counter up again now that we're done. */ + metrics_counter_start (metrics); }