From: Carl Worth Date: Wed, 30 Oct 2013 21:24:52 +0000 (-0700) Subject: Collect timer/monitor results whenever there are >1000 outstanding X-Git-Url: https://git.cworth.org/git?p=fips;a=commitdiff_plain;h=9e6a41d89661ce7e983bac1b747755380604e447 Collect timer/monitor results whenever there are >1000 outstanding Previously, fips always waited for a frame boundary before collecting timer and monitor results. Now, whenever more than a maximum (set to 1000 here) number of monitors have been fired off, but no results collected, fips will check and collect results for all timers/monitors that have results available. Here's some background on the debugging that led to this change: With an apitrace collected from "DOTA 2" we ran into crashes, always on the first frame of the game proper (after the opening menus, etc.). This frame is unusually large, (roughly half a million OpenGL calls). With that large frame, and the resulting large number of outstanding queries waiting to be collected, we were running into a resource limit and Mesa's performance-monitor code was crashing on an unexpectedly NULL bo->virtual pointer. A little digging determined that a DRM map ioctl was failing due to the map_count resource in the kernel being larger than the configured default (roughly 65530). After checking that neither fips nor Mesa was leaking any large number of buffer objects, (nor keeping many mapped), we decided to attempt this more aggressive collection of results in fips. As far as resource consumption in general, this does seem like a reasonable thing to do. If we have hundreds of outstanding queries, surely the oldest of them have completed, and we can free some resources by collecting those. On the other hand, it still seems wrong that the kernel is imposing an arbitrary limit on how many outstanding queries an application can have. The AMD_performance_monitor specification and implementation are not intended to have any such limitation. So, there's still some investigation to be done on what resource is causing the kernel's map_count to grow so large and to see if there's a bug there to be fixed. --- diff --git a/metrics.c b/metrics.c index eede09b..9b94eb6 100644 --- a/metrics.c +++ b/metrics.c @@ -105,12 +105,26 @@ typedef struct context metrics_op_t op; + /* GL_TIME_ELAPSED query for which glEndQuery has not yet + * been called. */ + unsigned timer_begun_id; + + /* GL_TIME_ELAPSED queries for which glEndQuery has been + * called, (but results have not yet been queried). */ timer_query_t *timer_head; timer_query_t *timer_tail; + /* Performance monitor for which glEndPerfMonitorAMD has not + * yet been called. */ + unsigned monitor_begun_id; + + /* Performance monitors for which glEndPerfMonitorAMD has + * been called, (but results have not yet been queried). */ monitor_t *monitor_head; monitor_t *monitor_tail; + int monitors_in_flight; + unsigned num_op_metrics; op_metrics_t *op_metrics; } context_t; @@ -123,6 +137,11 @@ context_t current_context; int frames; int verbose; +#define MAX_MONITORS_IN_FLIGHT 1000 + +void +metrics_collect_available (void); + static void metrics_group_info_init (metrics_group_info_t *group, GLuint id) { @@ -312,6 +331,10 @@ metrics_info_fini (void) if (! info->initialized) return; + if (ctx->timer_begun_id) { + ctx->timer_begun_id = 0; + } + for (timer = ctx->timer_head; timer; timer = timer_next) @@ -322,6 +345,10 @@ metrics_info_fini (void) ctx->timer_head = NULL; ctx->timer_tail = NULL; + if (ctx->monitor_begun_id) { + ctx->monitor_begun_id = 0; + } + for (monitor = ctx->monitor_head; monitor; monitor = monitor_next) @@ -332,6 +359,8 @@ metrics_info_fini (void) ctx->monitor_head = NULL; ctx->monitor_tail = NULL; + current_context.monitors_in_flight = 0; + for (i = 0; i < info->num_groups; i++) metrics_group_info_fini (&info->groups[i]); @@ -396,42 +425,12 @@ void metrics_counter_start (void) { context_t *ctx = ¤t_context; - timer_query_t *timer; - monitor_t *monitor; unsigned i; - /* Create new timer query, add to list */ - timer = xmalloc (sizeof (timer_query_t)); - - timer->op = ctx->op; - timer->next = NULL; - - if (ctx->timer_tail) { - ctx->timer_tail->next = timer; - ctx->timer_tail = timer; - } else { - ctx->timer_tail = timer; - ctx->timer_head = timer; - } - - /* Create a new performance-monitor query */ - monitor = xmalloc (sizeof (monitor_t)); - - monitor->op = ctx->op; - monitor->next = NULL; - - if (ctx->monitor_tail) { - ctx->monitor_tail->next = monitor; - ctx->monitor_tail = monitor; - } else { - ctx->monitor_tail = monitor; - ctx->monitor_head = monitor; - } - /* Initialize the timer_query and monitor objects */ - glGenQueries (1, &timer->id); + glGenQueries (1, &ctx->timer_begun_id); - glGenPerfMonitorsAMD (1, &monitor->id); + glGenPerfMonitorsAMD (1, &ctx->monitor_begun_id); for (i = 0; i < ctx->metrics_info.num_groups; i++) { @@ -450,23 +449,69 @@ metrics_counter_start (void) } - glSelectPerfMonitorCountersAMD(monitor->id, + glSelectPerfMonitorCountersAMD(ctx->monitor_begun_id, GL_TRUE, group->id, num_counters, group->counter_ids); } /* Start the queries */ - glBeginQuery (GL_TIME_ELAPSED, timer->id); + glBeginQuery (GL_TIME_ELAPSED, ctx->timer_begun_id); - glBeginPerfMonitorAMD (monitor->id); + glBeginPerfMonitorAMD (ctx->monitor_begun_id); } void metrics_counter_stop (void) { + context_t *ctx = ¤t_context; + timer_query_t *timer; + monitor_t *monitor; + + /* Stop the current timer and monitor. */ glEndQuery (GL_TIME_ELAPSED); - glEndPerfMonitorAMD (current_context.monitor_tail->id); + glEndPerfMonitorAMD (ctx->monitor_begun_id); + + /* Add these IDs to our lists of outstanding queries and + * monitors so the results can be collected later. */ + timer = xmalloc (sizeof (timer_query_t)); + + timer->op = ctx->op; + timer->id = ctx->timer_begun_id; + timer->next = NULL; + + if (ctx->timer_tail) { + ctx->timer_tail->next = timer; + ctx->timer_tail = timer; + } else { + ctx->timer_tail = timer; + ctx->timer_head = timer; + } + + /* Create a new performance-monitor query */ + monitor = xmalloc (sizeof (monitor_t)); + + monitor->op = ctx->op; + monitor->id = ctx->monitor_begun_id; + monitor->next = NULL; + + if (ctx->monitor_tail) { + ctx->monitor_tail->next = monitor; + ctx->monitor_tail = monitor; + } else { + ctx->monitor_tail = monitor; + ctx->monitor_head = monitor; + } + + ctx->monitors_in_flight++; + + /* Avoid being a resource hog and collect outstanding results + * once we have sent off a large number of + * queries. (Presumably, many of the outstanding queries are + * available by now.) + */ + if (ctx->monitors_in_flight > MAX_MONITORS_IN_FLIGHT) + metrics_collect_available (); } void @@ -824,24 +869,10 @@ metrics_exit (void) metrics_info_fini (); } - void -metrics_end_frame (void) +metrics_collect_available (void) { context_t *ctx = ¤t_context; - static int initialized = 0; - static struct timeval tv_start, tv_now; - - if (! initialized) { - gettimeofday (&tv_start, NULL); - atexit (metrics_exit); - if (getenv ("FIPS_VERBOSE")) - verbose = 1; - initialized = 1; - } - - frames++; - gettimeofday (&tv_now, NULL); /* Consume all timer queries that are ready. */ timer_query_t *timer = ctx->timer_head; @@ -906,12 +937,37 @@ metrics_end_frame (void) glDeletePerfMonitorsAMD (1, &monitor->id); free (monitor); + + ctx->monitors_in_flight--; + monitor = ctx->monitor_head; } +} + + +void +metrics_end_frame (void) +{ + static int initialized = 0; + static struct timeval tv_start, tv_now; + + if (! initialized) { + gettimeofday (&tv_start, NULL); + atexit (metrics_exit); + if (getenv ("FIPS_VERBOSE")) + verbose = 1; + initialized = 1; + } + + frames++; + + metrics_collect_available (); if (frames % 15 == 0) { double fps; + gettimeofday (&tv_now, NULL); + fps = (double) frames / (tv_now.tv_sec - tv_start.tv_sec + (tv_now.tv_usec - tv_start.tv_usec) / 1.0e6);