#define _GNU_SOURCE
+#include <inttypes.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
+#include <time.h>
#include <sys/time.h>
+#include <string.h>
+#include <errno.h>
#include "fips-dispatch-gl.h"
struct metrics
{
+ /* Flag to indicate first frame is being processed. */
+ bool first_frame;
+
/* Description of all available peformance counters, counter
* groups, their names and IDs, etc. */
metrics_info_t *info;
unsigned num_op_metrics;
op_metrics_t *op_metrics;
+
+ /* Per-frame time and latency measurement. */
+ GLint64 swap_begin_timestamp;
+ unsigned swap_end_timestamp_id;
+ GLint64 previous_swap_end_timestamp;
+
+ struct timespec previous_cpu_time_ts;
+ struct timespec cpu_time_ts;
};
+/* Return (a - b) while accounting for the fact that between a and b,
+ * the counter may have wrapped around (with a maximum number of bits
+ * in the counter of GL_QUERY_COUNTER_BITS). */
+static uint64_t
+subtract_timestamp (uint64_t a, uint64_t b)
+{
+ GLint timestamp_bits;
+
+ glGetQueryiv (GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, ×tamp_bits);
+
+ /* Note: "<<" isn't defined for the number of bits in the
+ * type, and the compiler will punish you.
+ */
+ if (timestamp_bits == 64)
+ return a - b;
+
+ if (b > a)
+ return (1ull << timestamp_bits) + a - b;
+ else
+ return a - b;
+}
+
metrics_t *
metrics_create (metrics_info_t *info)
{
metrics->info = info;
+ metrics->first_frame = true;
+
metrics->op = 0;
metrics->timer_begun_id = 0;
metrics->num_op_metrics = 0;
metrics->op_metrics = NULL;
+ glGenQueries (1, &metrics->swap_end_timestamp_id);
+
+ /* Get the first frame timestamp started immediately. */
+ glQueryCounter (metrics->swap_end_timestamp_id, GL_TIMESTAMP);
+
+ /* As well as the initial CPU time. */
+ clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &metrics->cpu_time_ts);
+
return metrics;
}
/* Discard and cleanup any outstanding queries. */
if (metrics->timer_begun_id) {
- glEndQuery (GL_TIME_ELAPSED);
glDeleteQueries (1, &metrics->timer_begun_id);
metrics->timer_begun_id = 0;
}
if (metrics->info->have_perfmon) {
if (metrics->monitor_begun_id) {
- glEndPerfMonitorAMD (metrics->monitor_begun_id);
glDeletePerfMonitorsAMD (1, &metrics->monitor_begun_id);
metrics->monitor_begun_id = 0;
}
{ \
fprintf (stderr, "Unexpected end-of-buffer while " \
"parsing results\n"); \
+ value = 0.0; \
break; \
} \
(var) = *((typeof(var) *) p); \
const char *op_string;
unsigned group_index, counter;
double value;
+ int program_name_length = 0;
/* Don't print anything for stages with no alloted time. */
if (per_stage->time_ns == 0.0)
op_string = metrics_op_string (op_metrics->op);
- printf ("%21s", op_string);
-
if (op_metrics->op >= METRICS_OP_SHADER) {
- printf (" %3d", op_metrics->op - METRICS_OP_SHADER);
- } else {
- printf (" ");
+ int program = op_metrics->op - METRICS_OP_SHADER;
+ glGetObjectLabel (GL_PROGRAM, program, 0,
+ &program_name_length, NULL);
+ if (program_name_length) {
+ char *program_name;
+
+ program_name = malloc(program_name_length + 1);
+ if (program_name == 0) {
+ fprintf (stderr, "Out of memory.\n");
+ exit (1);
+ }
+ glGetObjectLabel (GL_PROGRAM, program,
+ program_name_length + 1,
+ NULL, program_name);
+ printf ("%21s ", program_name);
+ } else {
+ printf ("%21s %3d", op_string, program);
+ }
+ }
+ else {
+ printf ("%21s ", op_string);
}
if (per_stage->stage)
}
}
+/* Subtract timespec values: Return (a - b) in seconds as a double. */
+#define SUBTRACT_TIMESPEC(a, b) (double) (a.tv_sec - b.tv_sec + \
+ (a.tv_nsec - b.tv_nsec) / 1e9)
void
-metrics_end_frame (metrics_t *metrics)
+metrics_end_frame_pre_swap (metrics_t *metrics)
+{
+ GLuint64 swap_end_timestamp, frame_time_ns, latency_ns;
+ double cpu_time;
+
+ /* Don't leave any counters running over the end_frame work we
+ * do here. The counters will be started again at the end of
+ * metrics_end_frame_post_swap. */
+
+ metrics_counter_stop (metrics);
+
+ /* Now that an entire frame's worth of content has gone by, we
+ * can be sure that the timer quiery for the previous frame's
+ * swap is available.
+ *
+ * Note: The only case in which this query isn't immediately
+ * available would be if there was effectively nothing in this
+ * frame. In that case, triggering a block on this query
+ * result is not a concern (since an empty frame is not
+ * interesting, and all of our counters are stopped anyway).
+ *
+ * Accepting this one block is much better than adding a
+ * linked list for these results that would reaslisticly never
+ * have more than one entry anyway.
+ */
+
+ glGetQueryObjectui64v (metrics->swap_end_timestamp_id,
+ GL_QUERY_RESULT, &swap_end_timestamp);
+
+ if (metrics->first_frame) {
+ /* Print header */
+ printf ("# frame: Frame_Number Frame_Time_milliseconds Frame_latency_milliseconds CPU_load GPU_load\n");
+
+ metrics->first_frame = false;
+ } else {
+ /* Subtract previous frame's times to get frame times. */
+ frame_time_ns = subtract_timestamp (swap_end_timestamp,
+ metrics->previous_swap_end_timestamp);
+
+ latency_ns = subtract_timestamp (swap_end_timestamp,
+ metrics->swap_begin_timestamp);
+
+ cpu_time = SUBTRACT_TIMESPEC (metrics->cpu_time_ts,
+ metrics->previous_cpu_time_ts);
+
+ /* We've waited one frame to ensure we have a timestamp
+ * result. So the time we've actually measured here is
+ * for the previous frame. */
+ printf ("frame: %d %g %g %g %g\n",
+ frames - 1,
+ (double) frame_time_ns / 1e6,
+ (double) latency_ns / 1e6,
+ cpu_time / (frame_time_ns / 1e9),
+ 0.0);
+ }
+
+ metrics->previous_swap_end_timestamp = swap_end_timestamp;
+ metrics->previous_cpu_time_ts = metrics->cpu_time_ts;
+
+ /* Before the actual swap call, we get the current timestamp
+ * value. This is a synchronous get so we can use this as the
+ * baseline for a frame latency measurment. */
+
+ glGetInteger64v (GL_TIMESTAMP, &metrics->swap_begin_timestamp);
+}
+
+void
+metrics_end_frame_post_swap (metrics_t *metrics)
{
static int initialized = 0;
static struct timeval tv_start, tv_now;
+ int err;
- /* Don't leave any counters running over work we do here. */
- metrics_counter_stop (metrics);
+ /* Now that the swap command has been queued, we issue an
+ * asynchronous query of the timestamp value. Comparing this
+ * to the synchronous get we just sent in
+ * metrics_end_frame_pre_swap allows us to measure the
+ * per-frame swap latency. */
+
+ glQueryCounter (metrics->swap_end_timestamp_id, GL_TIMESTAMP);
+
+ /* Query the total amount of CPU time spent by this process. */
+ err = clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &metrics->cpu_time_ts);
+ if (err == -1 ) {
+ fprintf (stderr, "Failed to query CPU time with clock_gettime: %s\n",
+ strerror (errno));
+ exit (1);
+ }
if (! initialized) {
gettimeofday (&tv_start, NULL);
initialized = 1;
}
- frames++;
-
metrics_collect_available (metrics);
- if (frames % 15 == 0) {
+ frames++;
+
+ if (frames) {
double fps;
gettimeofday (&tv_now, NULL);