summaryrefslogtreecommitdiffstats
path: root/video/out/opengl
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-08-27 09:15:50 +0200
committerNiklas Haas <git@haasn.xyz>2017-09-04 13:53:14 +0200
commit8cf5799ab1e1cb2de22636eadf3119a319161aec (patch)
tree891a1917891078c003a788597662fe1c9ae3e318 /video/out/opengl
parentf589a3bd78efbc16e8025bff0809ac3c16b8ea2b (diff)
downloadmpv-8cf5799ab1e1cb2de22636eadf3119a319161aec.tar.bz2
mpv-8cf5799ab1e1cb2de22636eadf3119a319161aec.tar.xz
vo_opengl: refactor scaler LUT weight packing/loading
This is mostly done so we can support using textures with more components than the scaler LUTs have entries. But while we're at it, also change the way the weights are packed so that they're always sequential with no gaps. This allows us to simplify pass_sample_separated_get_weights as well.
Diffstat (limited to 'video/out/opengl')
-rw-r--r--video/out/opengl/video.c21
-rw-r--r--video/out/opengl/video_shaders.c27
2 files changed, 16 insertions, 32 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index f0a8635c56..09b05fd688 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -1597,23 +1597,18 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor);
int size = scaler->kernel->size;
- int elems_per_pixel = 4;
- if (size == 1) {
- elems_per_pixel = 1;
- } else if (size == 2) {
- elems_per_pixel = 2;
- } else if (size == 6) {
- elems_per_pixel = 3;
- }
- int width = size / elems_per_pixel;
- assert(size == width * elems_per_pixel);
- const struct ra_format *fmt = ra_find_float16_format(p->ra, elems_per_pixel);
+ int num_components = size > 2 ? 4 : size;
+ const struct ra_format *fmt = ra_find_float16_format(p->ra, num_components);
assert(fmt);
+ int width = (size + num_components - 1) / num_components; // round up
+ int stride = width * num_components;
+ assert(size <= stride);
+
scaler->lut_size = 1 << p->opts.scaler_lut_size;
- float *weights = talloc_array(NULL, float, scaler->lut_size * size);
- mp_compute_lut(scaler->kernel, scaler->lut_size, weights);
+ float *weights = talloc_array(NULL, float, scaler->lut_size * stride);
+ mp_compute_lut(scaler->kernel, scaler->lut_size, stride, weights);
bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D);
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 40c5e98729..b73f13434b 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -41,27 +41,16 @@ static void pass_sample_separated_get_weights(struct gl_shader_cache *sc,
struct scaler *scaler)
{
gl_sc_uniform_texture(sc, "lut", scaler->lut);
- // Define a new variable to cache the corrected fcoord.
- GLSLF("float fcoord_lut = LUT_POS(fcoord, %d.0);\n", scaler->lut_size);
+ GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut_size);
int N = scaler->kernel->size;
- if (N == 2) {
- GLSL(vec2 c1 = texture(lut, vec2(0.5, fcoord_lut)).rg;)
- GLSL(float weights[2] = float[](c1.r, c1.g);)
- } else if (N == 6) {
- GLSL(vec4 c1 = texture(lut, vec2(0.25, fcoord_lut));)
- GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord_lut));)
- GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);)
- } else {
- GLSLF("float weights[%d];\n", N);
- for (int n = 0; n < N / 4; n++) {
- GLSLF("c = texture(lut, vec2(1.0 / %d.0 + %d.0 / %d.0, fcoord_lut));\n",
- N / 2, n, N / 4);
- GLSLF("weights[%d] = c.r;\n", n * 4 + 0);
- GLSLF("weights[%d] = c.g;\n", n * 4 + 1);
- GLSLF("weights[%d] = c.b;\n", n * 4 + 2);
- GLSLF("weights[%d] = c.a;\n", n * 4 + 3);
- }
+ int width = (N + 3) / 4; // round up
+
+ GLSLF("float weights[%d];\n", N);
+ for (int i = 0; i < N; i++) {
+ if (i % 4 == 0)
+ GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width);
+ GLSLF("weights[%d] = c[%d];\n", i, i % 4);
}
}