summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-09-11 00:27:27 +0200
committerNiklas Haas <git@haasn.xyz>2017-09-11 00:35:23 +0200
commit1da53248ab29d711df5602f3fdff90c45298ec77 (patch)
tree749585e28538567588cdf2c83590ae3d6b96eb51
parentd0c87dd57918cab1afc4d5968355b6253377c0b1 (diff)
downloadmpv-1da53248ab29d711df5602f3fdff90c45298ec77.tar.bz2
mpv-1da53248ab29d711df5602f3fdff90c45298ec77.tar.xz
vo_opengl: refactor/fix mp_pass_perf code
This was needlessly complicated and prone to breakage, because even the references to the ring buffer could end up getting invalidated and containing garbage data on e.g. shader cache flush. For much the same reason why we can't keep around the *timer_pool, we're also forced to hard-copy the entire sample buffer per pass per frame. Not a huge deal, though. This is, what, a few kB per frame? We have more pressing CPU performance concerns anyway. Also simplified/fixed some other code.
-rw-r--r--player/command.c8
-rw-r--r--video/out/opengl/utils.c28
-rw-r--r--video/out/opengl/utils.h1
-rw-r--r--video/out/vo.h9
4 files changed, 19 insertions, 27 deletions
diff --git a/player/command.c b/player/command.c
index 6dc619d0eb..87709e119c 100644
--- a/player/command.c
+++ b/player/command.c
@@ -2882,12 +2882,8 @@ static void get_frame_perf(struct mpv_node *node, struct mp_frame_perf *perf)
node_map_add(pass, "peak", MPV_FORMAT_INT64)->u.int64 = data->peak;
node_map_add(pass, "count", MPV_FORMAT_INT64)->u.int64 = data->count;
struct mpv_node *samples = node_map_add(pass, "samples", MPV_FORMAT_NODE_ARRAY);
-
- int idx = data->index;
- for (int n = 0; n < data->count; n++) {
- node_array_add(samples, MPV_FORMAT_INT64)->u.int64 = data->samples[idx];
- idx = (idx + 1) % PERF_SAMPLE_COUNT;
- }
+ for (int n = 0; n < data->count; n++)
+ node_array_add(samples, MPV_FORMAT_INT64)->u.int64 = data->samples[n];
}
}
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index cdd8903bc5..13f183f7a0 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -211,11 +211,11 @@ struct timer_pool {
ra_timer *timer;
bool running; // detect invalid usage
- uint64_t samples[PERF_SAMPLE_COUNT];
+ uint64_t samples[VO_PERF_SAMPLE_COUNT];
int sample_idx;
int sample_count;
- uint64_t avg_sum;
+ uint64_t sum;
uint64_t peak;
};
@@ -269,12 +269,10 @@ void timer_pool_stop(struct timer_pool *pool)
if (res) {
// Input res into the buffer and grab the previous value
uint64_t old = pool->samples[pool->sample_idx];
+ pool->sample_count = MPMIN(pool->sample_count + 1, VO_PERF_SAMPLE_COUNT);
pool->samples[pool->sample_idx++] = res;
- pool->sample_idx %= PERF_SAMPLE_COUNT;
-
- // Update average and sum
- pool->avg_sum = pool->avg_sum + res - old;
- pool->sample_count = MPMIN(pool->sample_count + 1, PERF_SAMPLE_COUNT);
+ pool->sample_idx %= VO_PERF_SAMPLE_COUNT;
+ pool->sum = pool->sum + res - old;
// Update peak if necessary
if (res >= pool->peak) {
@@ -283,7 +281,7 @@ void timer_pool_stop(struct timer_pool *pool)
// It's possible that the last peak was the value we just removed,
// if so we need to scan for the new peak
uint64_t peak = res;
- for (int i = 0; i < PERF_SAMPLE_COUNT; i++)
+ for (int i = 0; i < VO_PERF_SAMPLE_COUNT; i++)
peak = MPMAX(peak, pool->samples[i]);
pool->peak = peak;
}
@@ -296,16 +294,16 @@ struct mp_pass_perf timer_pool_measure(struct timer_pool *pool)
return (struct mp_pass_perf){0};
struct mp_pass_perf res = {
- .count = pool->sample_count,
- .index = (pool->sample_idx - pool->sample_count) % PERF_SAMPLE_COUNT,
+ .last = pool->samples[(pool->sample_idx - 1) % VO_PERF_SAMPLE_COUNT],
+ .avg = pool->sample_count > 0 ? pool->sum / pool->sample_count : 0,
.peak = pool->peak,
- .samples = pool->samples,
+ .count = pool->sample_count,
};
- res.last = pool->samples[(pool->sample_idx - 1) % PERF_SAMPLE_COUNT];
-
- if (pool->sample_count > 0) {
- res.avg = pool->avg_sum / pool->sample_count;
+ int idx = (pool->sample_idx - pool->sample_count);
+ for (int i = 0; i < res.count; i++) {
+ idx %= VO_PERF_SAMPLE_COUNT;
+ res.samples[i] = pool->samples[idx++];
}
return res;
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 8c52680f14..34e459f34c 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -3,6 +3,7 @@
#include <stdbool.h>
#include <math.h>
+#include "video/out/vo.h"
#include "ra.h"
// A 3x2 matrix, with the translation part separate.
diff --git a/video/out/vo.h b/video/out/vo.h
index c6751c7524..36591cb00d 100644
--- a/video/out/vo.h
+++ b/video/out/vo.h
@@ -143,16 +143,13 @@ struct voctrl_playback_state {
};
// VOCTRL_PERFORMANCE_DATA
-#define PERF_SAMPLE_COUNT 256u
+#define VO_PERF_SAMPLE_COUNT 256u
struct mp_pass_perf {
// times are all in nanoseconds
uint64_t last, avg, peak;
- // this is a ring buffer, indices are relative to index and modulo
- // PERF_SAMPLE_COUNT
- uint64_t *samples;
- int count;
- int index;
+ uint64_t samples[VO_PERF_SAMPLE_COUNT];
+ uint64_t count;
};
#define VO_PASS_PERF_MAX 128