summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--video/out/filter_kernels.c9
-rw-r--r--video/out/filter_kernels.h3
-rw-r--r--video/out/opengl/video.c21
-rw-r--r--video/out/opengl/video_shaders.c27
4 files changed, 24 insertions, 36 deletions
diff --git a/video/out/filter_kernels.c b/video/out/filter_kernels.c
index 87fd129714..bfbd4e9465 100644
--- a/video/out/filter_kernels.c
+++ b/video/out/filter_kernels.c
@@ -142,14 +142,17 @@ static void mp_compute_weights(struct filter_kernel *filter, double f,
}
// Fill the given array with weights for the range [0.0, 1.0]. The array is
-// interpreted as rectangular array of count * filter->size items.
+// interpreted as rectangular array of count * filter->size items, with a
+// stride of `stride` floats in between each array element. (For polar filters,
+// the `count` indicates the row size and filter->size/stride are ignored)
//
// There will be slight sampling error if these weights are used in a OpenGL
// texture as LUT directly. The sampling point of a texel is located at its
// center, so out_array[0] will end up at 0.5 / count instead of 0.0.
// Correct lookup requires a linear coordinate mapping from [0.0, 1.0] to
// [0.5 / count, 1.0 - 0.5 / count].
-void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array)
+void mp_compute_lut(struct filter_kernel *filter, int count, int stride,
+ float *out_array)
{
if (filter->polar) {
filter->radius_cutoff = 0.0;
@@ -165,7 +168,7 @@ void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array)
// Compute a 2D array indexed by subpixel position
for (int n = 0; n < count; n++) {
mp_compute_weights(filter, n / (double)(count - 1),
- out_array + filter->size * n);
+ out_array + stride * n);
}
}
}
diff --git a/video/out/filter_kernels.h b/video/out/filter_kernels.h
index ac9b7fd39a..dd9672a256 100644
--- a/video/out/filter_kernels.h
+++ b/video/out/filter_kernels.h
@@ -50,6 +50,7 @@ const struct filter_kernel *mp_find_filter_kernel(const char *name);
bool mp_init_filter(struct filter_kernel *filter, const int *sizes,
double scale);
-void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array);
+void mp_compute_lut(struct filter_kernel *filter, int count, int stride,
+ float *out_array);
#endif /* MPLAYER_FILTER_KERNELS_H */
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index f0a8635c56..09b05fd688 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -1597,23 +1597,18 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor);
int size = scaler->kernel->size;
- int elems_per_pixel = 4;
- if (size == 1) {
- elems_per_pixel = 1;
- } else if (size == 2) {
- elems_per_pixel = 2;
- } else if (size == 6) {
- elems_per_pixel = 3;
- }
- int width = size / elems_per_pixel;
- assert(size == width * elems_per_pixel);
- const struct ra_format *fmt = ra_find_float16_format(p->ra, elems_per_pixel);
+ int num_components = size > 2 ? 4 : size;
+ const struct ra_format *fmt = ra_find_float16_format(p->ra, num_components);
assert(fmt);
+ int width = (size + num_components - 1) / num_components; // round up
+ int stride = width * num_components;
+ assert(size <= stride);
+
scaler->lut_size = 1 << p->opts.scaler_lut_size;
- float *weights = talloc_array(NULL, float, scaler->lut_size * size);
- mp_compute_lut(scaler->kernel, scaler->lut_size, weights);
+ float *weights = talloc_array(NULL, float, scaler->lut_size * stride);
+ mp_compute_lut(scaler->kernel, scaler->lut_size, stride, weights);
bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D);
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 40c5e98729..b73f13434b 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -41,27 +41,16 @@ static void pass_sample_separated_get_weights(struct gl_shader_cache *sc,
struct scaler *scaler)
{
gl_sc_uniform_texture(sc, "lut", scaler->lut);
- // Define a new variable to cache the corrected fcoord.
- GLSLF("float fcoord_lut = LUT_POS(fcoord, %d.0);\n", scaler->lut_size);
+ GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut_size);
int N = scaler->kernel->size;
- if (N == 2) {
- GLSL(vec2 c1 = texture(lut, vec2(0.5, fcoord_lut)).rg;)
- GLSL(float weights[2] = float[](c1.r, c1.g);)
- } else if (N == 6) {
- GLSL(vec4 c1 = texture(lut, vec2(0.25, fcoord_lut));)
- GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord_lut));)
- GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);)
- } else {
- GLSLF("float weights[%d];\n", N);
- for (int n = 0; n < N / 4; n++) {
- GLSLF("c = texture(lut, vec2(1.0 / %d.0 + %d.0 / %d.0, fcoord_lut));\n",
- N / 2, n, N / 4);
- GLSLF("weights[%d] = c.r;\n", n * 4 + 0);
- GLSLF("weights[%d] = c.g;\n", n * 4 + 1);
- GLSLF("weights[%d] = c.b;\n", n * 4 + 2);
- GLSLF("weights[%d] = c.a;\n", n * 4 + 3);
- }
+ int width = (N + 3) / 4; // round up
+
+ GLSLF("float weights[%d];\n", N);
+ for (int i = 0; i < N; i++) {
+ if (i % 4 == 0)
+ GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width);
+ GLSLF("weights[%d] = c[%d];\n", i, i % 4);
}
}