From 1da53248ab29d711df5602f3fdff90c45298ec77 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Mon, 11 Sep 2017 00:27:27 +0200 Subject: vo_opengl: refactor/fix mp_pass_perf code This was needlessly complicated and prone to breakage, because even the references to the ring buffer could end up getting invalidated and containing garbage data on e.g. shader cache flush. For much the same reason why we can't keep around the *timer_pool, we're also forced to hard-copy the entire sample buffer per pass per frame. Not a huge deal, though. This is, what, a few kB per frame? We have more pressing CPU performance concerns anyway. Also simplified/fixed some other code. --- player/command.c | 8 ++------ video/out/opengl/utils.c | 28 +++++++++++++--------------- video/out/opengl/utils.h | 1 + video/out/vo.h | 9 +++------ 4 files changed, 19 insertions(+), 27 deletions(-) diff --git a/player/command.c b/player/command.c index 6dc619d0eb..87709e119c 100644 --- a/player/command.c +++ b/player/command.c @@ -2882,12 +2882,8 @@ static void get_frame_perf(struct mpv_node *node, struct mp_frame_perf *perf) node_map_add(pass, "peak", MPV_FORMAT_INT64)->u.int64 = data->peak; node_map_add(pass, "count", MPV_FORMAT_INT64)->u.int64 = data->count; struct mpv_node *samples = node_map_add(pass, "samples", MPV_FORMAT_NODE_ARRAY); - - int idx = data->index; - for (int n = 0; n < data->count; n++) { - node_array_add(samples, MPV_FORMAT_INT64)->u.int64 = data->samples[idx]; - idx = (idx + 1) % PERF_SAMPLE_COUNT; - } + for (int n = 0; n < data->count; n++) + node_array_add(samples, MPV_FORMAT_INT64)->u.int64 = data->samples[n]; } } diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index cdd8903bc5..13f183f7a0 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -211,11 +211,11 @@ struct timer_pool { ra_timer *timer; bool running; // detect invalid usage - uint64_t samples[PERF_SAMPLE_COUNT]; + uint64_t samples[VO_PERF_SAMPLE_COUNT]; int sample_idx; int sample_count; - uint64_t avg_sum; + uint64_t sum; uint64_t peak; }; @@ -269,12 +269,10 @@ void timer_pool_stop(struct timer_pool *pool) if (res) { // Input res into the buffer and grab the previous value uint64_t old = pool->samples[pool->sample_idx]; + pool->sample_count = MPMIN(pool->sample_count + 1, VO_PERF_SAMPLE_COUNT); pool->samples[pool->sample_idx++] = res; - pool->sample_idx %= PERF_SAMPLE_COUNT; - - // Update average and sum - pool->avg_sum = pool->avg_sum + res - old; - pool->sample_count = MPMIN(pool->sample_count + 1, PERF_SAMPLE_COUNT); + pool->sample_idx %= VO_PERF_SAMPLE_COUNT; + pool->sum = pool->sum + res - old; // Update peak if necessary if (res >= pool->peak) { @@ -283,7 +281,7 @@ void timer_pool_stop(struct timer_pool *pool) // It's possible that the last peak was the value we just removed, // if so we need to scan for the new peak uint64_t peak = res; - for (int i = 0; i < PERF_SAMPLE_COUNT; i++) + for (int i = 0; i < VO_PERF_SAMPLE_COUNT; i++) peak = MPMAX(peak, pool->samples[i]); pool->peak = peak; } @@ -296,16 +294,16 @@ struct mp_pass_perf timer_pool_measure(struct timer_pool *pool) return (struct mp_pass_perf){0}; struct mp_pass_perf res = { - .count = pool->sample_count, - .index = (pool->sample_idx - pool->sample_count) % PERF_SAMPLE_COUNT, + .last = pool->samples[(pool->sample_idx - 1) % VO_PERF_SAMPLE_COUNT], + .avg = pool->sample_count > 0 ? pool->sum / pool->sample_count : 0, .peak = pool->peak, - .samples = pool->samples, + .count = pool->sample_count, }; - res.last = pool->samples[(pool->sample_idx - 1) % PERF_SAMPLE_COUNT]; - - if (pool->sample_count > 0) { - res.avg = pool->avg_sum / pool->sample_count; + int idx = (pool->sample_idx - pool->sample_count); + for (int i = 0; i < res.count; i++) { + idx %= VO_PERF_SAMPLE_COUNT; + res.samples[i] = pool->samples[idx++]; } return res; diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 8c52680f14..34e459f34c 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -3,6 +3,7 @@ #include #include +#include "video/out/vo.h" #include "ra.h" // A 3x2 matrix, with the translation part separate. diff --git a/video/out/vo.h b/video/out/vo.h index c6751c7524..36591cb00d 100644 --- a/video/out/vo.h +++ b/video/out/vo.h @@ -143,16 +143,13 @@ struct voctrl_playback_state { }; // VOCTRL_PERFORMANCE_DATA -#define PERF_SAMPLE_COUNT 256u +#define VO_PERF_SAMPLE_COUNT 256u struct mp_pass_perf { // times are all in nanoseconds uint64_t last, avg, peak; - // this is a ring buffer, indices are relative to index and modulo - // PERF_SAMPLE_COUNT - uint64_t *samples; - int count; - int index; + uint64_t samples[VO_PERF_SAMPLE_COUNT]; + uint64_t count; }; #define VO_PASS_PERF_MAX 128 -- cgit v1.2.3