summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorNiklas Haas <git@nand.wakku.to>2016-03-07 17:34:47 +0100
committerwm4 <wm4@nowhere>2016-03-07 22:31:15 +0100
commit9f91bc4b75eda0395ed6bc8b3ae3b58e663489a6 (patch)
treed703631b04060567c278b743eda029449e9272b9 /video
parent5fa45fb5641c95a5dc65faac2087d842333b5425 (diff)
downloadmpv-9f91bc4b75eda0395ed6bc8b3ae3b58e663489a6.tar.bz2
mpv-9f91bc4b75eda0395ed6bc8b3ae3b58e663489a6.tar.xz
vo_opengl: refactor superxbr algorithm
This is a fresh implementation from scratch that carries with it significantly less baggage and verbosity from the previous (ported) version. The actual values for the masks and such were copied from the current code. Behavior and performance should be unaffected. An important difference between the old code and the new code is that the new code always explicitly samples from the first component, rather than being able to process multiple planes at once. Since prescale-luma only affects luma, I deemed this unnecessary. May change in the future, if prescale-chroma ever gets implemented. But prescaling multiple planes would be slow to do this way. (Better would be to generalize it to differently-sized vectors)
Diffstat (limited to 'video')
-rw-r--r--video/out/opengl/superxbr.c276
-rw-r--r--video/out/opengl/superxbr.h4
-rw-r--r--video/out/opengl/video.c3
3 files changed, 150 insertions, 133 deletions
diff --git a/video/out/opengl/superxbr.c b/video/out/opengl/superxbr.c
index 87319aab99..c2d308bfdb 100644
--- a/video/out/opengl/superxbr.c
+++ b/video/out/opengl/superxbr.c
@@ -71,73 +71,145 @@ const struct m_sub_options superxbr_conf = {
*/
-void pass_superxbr(struct gl_shader_cache *sc, int planes, int tex_num,
- int step, float tex_mul, const struct superxbr_opts *conf,
- struct gl_transform *transform)
-{
- assert(0 <= step && step < 2);
- GLSLF("// superxbr (step %d)\n", step);
+struct step_params {
+ const float dstr, ostr; // sharpness strength modifiers
+ const int d1[3][3]; // 1-distance diagonal mask
+ const int d2[2][2]; // 2-distance diagonal mask
+ const int o1[3]; // 1-distance orthogonal mask
+ const int o2[3]; // 2-distance orthogonal mask
+};
- if (!conf)
- conf = &superxbr_opts_def;
+const struct step_params params[3] = {
+ { .dstr = 0.129633,
+ .ostr = 0.175068,
+ .d1 = {{1, 2, 1},
+ {2, 4, 2},
+ {1, 2, 1}},
+ .d2 = {{-1, -1},
+ {-1, -1}},
+
+ .o1 = {1, 2, 1},
+ .o2 = {-1, -1},
+ }, {
+ .dstr = 0.175068,
+ .ostr = 0.129633,
+ .d1 = {{0, 2, 0},
+ {2, 0, 2},
+ {0, 2, 0}},
+ .d2 = {{ 0, 0},
+ { 0, 0}},
+
+ .o1 = {2, 0, 2},
+ .o2 = { 0, 0},
+ }
+};
- if (step == 0) {
- *transform = (struct gl_transform){{{2.0,0.0}, {0.0,2.0}}, {-0.5,-0.5}};
+// Compute a single step of the superxbr process, assuming the input can be
+// sampled using i(x,y). Dumps its output into 'res'
+static void superxbr_step_h(struct gl_shader_cache *sc,
+ const struct superxbr_opts *conf,
+ const struct step_params *mask)
+{
+ GLSLHF("{ // step\n");
+
+ // Convolute along the diagonal and orthogonal lines
+ GLSLH(vec4 d1 = vec4( i(0,0), i(1,1), i(2,2), i(3,3) );)
+ GLSLH(vec4 d2 = vec4( i(0,3), i(1,2), i(2,1), i(3,0) );)
+ GLSLH(vec4 h1 = vec4( i(0,1), i(1,1), i(2,1), i(3,1) );)
+ GLSLH(vec4 h2 = vec4( i(0,2), i(1,2), i(2,2), i(3,2) );)
+ GLSLH(vec4 v1 = vec4( i(1,0), i(1,1), i(1,2), i(1,3) );)
+ GLSLH(vec4 v2 = vec4( i(2,0), i(2,1), i(2,2), i(2,3) );)
+
+ GLSLHF("float dw = %f;\n", conf->sharpness * mask->dstr);
+ GLSLHF("float ow = %f;\n", conf->sharpness * mask->ostr);
+ GLSLH(vec4 dk = vec4(-dw, dw+0.5, dw+0.5, -dw);) // diagonal kernel
+ GLSLH(vec4 ok = vec4(-ow, ow+0.5, ow+0.5, -ow);) // ortho kernel
+
+ // Convoluted results
+ GLSLH(float d1c = dot(d1, dk);)
+ GLSLH(float d2c = dot(d2, dk);)
+ GLSLH(float vc = dot(v1+v2, ok)/2.0;)
+ GLSLH(float hc = dot(h1+h2, ok)/2.0;)
+
+ // Compute diagonal edge strength using diagonal mask
+ GLSLH(float d_edge = 0;)
+ for (int x = 0; x < 3; x++) {
+ for (int y = 0; y < 3; y++) {
+ if (mask->d1[x][y]) {
+ // 1-distance diagonal neighbours
+ GLSLHF("d_edge += %d * abs(i(%d,%d) - i(%d,%d));\n",
+ mask->d1[x][y], x+1, y, x, y+1);
+ GLSLHF("d_edge -= %d * abs(i(%d,%d) - i(%d,%d));\n",
+ mask->d1[x][y], 3-y, x+1, 3-(y+1), x); // rotated
+ }
+ if (x < 2 && y < 2 && mask->d2[x][y]) {
+ // 2-distance diagonal neighbours
+ GLSLHF("d_edge += %d * abs(i(%d,%d) - i(%d,%d));\n",
+ mask->d2[x][y], x+2, y, x, y+2);
+ GLSLHF("d_edge -= %d * abs(i(%d,%d) - i(%d,%d));\n",
+ mask->d2[x][y], 3-y, x+2, 3-(y+2), x); // rotated
+ }
+ }
+ }
- GLSLH(#define wp1 2.0)
- GLSLH(#define wp2 1.0)
- GLSLH(#define wp3 -1.0)
- GLSLH(#define wp4 4.0)
- GLSLH(#define wp5 -1.0)
- GLSLH(#define wp6 1.0)
+ // Compute orthogonal edge strength using orthogonal mask
+ GLSLH(float o_edge = 0;)
+ for (int x = 1; x < 3; x++) {
+ for (int y = 0; y < 3; y++) {
+ if (mask->o1[y]) {
+ // 1-distance neighbours
+ GLSLHF("o_edge += %d * abs(i(%d,%d) - i(%d,%d));\n",
+ mask->o1[y], x, y, x, y+1); // vertical
+ GLSLHF("o_edge -= %d * abs(i(%d,%d) - i(%d,%d));\n",
+ mask->o1[y], y, x, y+1, x); // horizontal
+ }
+ if (y < 2 && mask->o2[y]) {
+ // 2-distance neighbours
+ GLSLHF("o_edge += %d * abs(i(%d,%d) - i(%d,%d));\n",
+ mask->o2[y], x, y, x, y+2); // vertical
+ GLSLHF("o_edge -= %d * abs(i(%d,%d) - i(%d,%d));\n",
+ mask->o2[x], y, x, y+2, x); // horizontal
+ }
+ }
+ }
- GLSLHF("#define weight1 (%f*1.29633/10.0)\n", conf->sharpness);
- GLSLHF("#define weight2 (%f*1.75068/10.0/2.0)\n", conf->sharpness);
+ // Pick the two best directions and mix them together
+ GLSLHF("float str = smoothstep(0.0, %f + 1e-6, abs(tex_mul*d_edge));\n",
+ conf->edge_strength);
+ GLSLH(res = mix(mix(d2c, d1c, step(0.0, d_edge)), \
+ mix(hc, vc, step(0.0, o_edge)), 1.0 - str);)
- GLSLH(#define Get(x, y) (texture(tex, pos + (vec2(x, y) - vec2(0.25, 0.25)) * pixel_size)[plane] * tex_mul))
- } else {
- *transform = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}};
+ // Anti-ringing using center square
+ GLSLH(float lo = min(min( i(1,1), i(2,1) ), min( i(1,2), i(2,2) ));)
+ GLSLH(float hi = max(max( i(1,1), i(2,1) ), max( i(1,2), i(2,2) ));)
+ GLSLHF("res = mix(res, clamp(res, lo, hi), 1.0-2.0*abs(%f-0.5));\n",
+ conf->edge_strength);
- GLSLH(#define wp1 2.0)
- GLSLH(#define wp2 0.0)
- GLSLH(#define wp3 0.0)
- GLSLH(#define wp4 0.0)
- GLSLH(#define wp5 0.0)
- GLSLH(#define wp6 0.0)
+ GLSLHF("} // step\n");
+}
+
+void pass_superxbr(struct gl_shader_cache *sc, int id, int step, float tex_mul,
+ const struct superxbr_opts *conf,
+ struct gl_transform *transform)
+{
+ if (!conf)
+ conf = &superxbr_opts_def;
- GLSLHF("#define weight1 (%f*1.75068/10.0)\n", conf->sharpness);
- GLSLHF("#define weight2 (%f*1.29633/10.0/2.0)\n", conf->sharpness);
+ assert(0 <= step && step < 2);
+ GLSLF("// superxbr (step %d)\n", step);
+ GLSLHF("#define tex texture%d\n", id);
+ GLSLHF("#define tex_size texture_size%d\n", id);
+ GLSLHF("#define tex_mul %f\n", tex_mul);
+ GLSLHF("#define pt pixel_size%d\n", id);
- GLSLH(#define Get(x, y) (texture(tex, pos + (vec2((x) + (y) - 1, (y) - (x))) * pixel_size)[plane] * tex_mul))
- }
- GLSLH(float df(float A, float B)
- {
- return abs(A-B);
- })
-
- GLSLH(float d_wd(float b0, float b1, float c0, float c1, float c2,
- float d0, float d1, float d2, float d3, float e1,
- float e2, float e3, float f2, float f3)
- {
- return (wp1*(df(c1,c2) + df(c1,c0) + df(e2,e1) + df(e2,e3)) +
- wp2*(df(d2,d3) + df(d0,d1)) +
- wp3*(df(d1,d3) + df(d0,d2)) +
- wp4*df(d1,d2) +
- wp5*(df(c0,c2) + df(e1,e3)) +
- wp6*(df(b0,b1) + df(f2,f3)));
- })
-
- GLSLH(float hv_wd(float i1, float i2, float i3, float i4,
- float e1, float e2, float e3, float e4)
- {
- return (wp4*(df(i1,i2)+df(i3,i4)) +
- wp1*(df(i1,e1)+df(i2,e2)+df(i3,e3)+df(i4,e4)) +
- wp3*(df(i1,e2)+df(i3,e4)+df(e1,i2)+df(e3,i4)));
- })
-
- GLSLHF("float superxbr(sampler2D tex, vec2 pos, vec2 tex_size, vec2 pixel_size, int plane, float tex_mul) {\n");
+ // We use a sub-function in the header so we can return early
+ GLSLHF("float superxbr(vec2 pos) {\n");
+ GLSLH(float i[4*4];)
+ GLSLH(float res;)
+ GLSLH(#define i(x,y) i[(x)*4+(y)])
if (step == 0) {
+ *transform = (struct gl_transform){{{2.0,0.0}, {0.0,2.0}}, {-0.5,-0.5}};
GLSLH(vec2 dir = fract(pos * tex_size) - 0.5;)
// Optimization: Discard (skip drawing) unused pixels, except those
@@ -147,83 +219,27 @@ void pass_superxbr(struct gl_shader_cache *sc, int planes, int tex_num,
return 0.0;)
GLSLH(if (dir.x < 0.0 || dir.y < 0.0 || dist.x < 1.0 || dist.y < 1.0)
- return texture(tex, pos - dir * pixel_size)[plane] * tex_mul;)
+ return texture(tex, pos - pt * dir).x;)
+
+ // Load the input samples
+ GLSLH(for (int x = 0; x < 4; x++))
+ GLSLH(for (int y = 0; y < 4; y++))
+ GLSLH(i(x,y) = texture(tex, pos + pt * vec2(x-1.25, y-1.25)).x;)
} else {
+ *transform = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}};
+
GLSLH(vec2 dir = fract(pos * tex_size / 2.0) - 0.5;)
GLSLH(if (dir.x * dir.y > 0.0)
- return texture(tex, pos)[plane] * tex_mul;)
- }
+ return texture(tex, pos).x;)
- GLSLH(float P0 = Get(-1,-1);
- float P1 = Get( 2,-1);
- float P2 = Get(-1, 2);
- float P3 = Get( 2, 2);
-
- float B = Get( 0,-1);
- float C = Get( 1,-1);
- float D = Get(-1, 0);
- float E = Get( 0, 0);
- float F = Get( 1, 0);
- float G = Get(-1, 1);
- float H = Get( 0, 1);
- float I = Get( 1, 1);
-
- float F4 = Get(2, 0);
- float I4 = Get(2, 1);
- float H5 = Get(0, 2);
- float I5 = Get(1, 2);)
-
-/*
- P1
- |P0|B |C |P1| C F4 |a0|b1|c2|d3|
- |D |E |F |F4| B F I4 |b0|c1|d2|e3| |e1|i1|i2|e2|
- |G |H |I |I4| P0 E A I P3 |c0|d1|e2|f3| |e3|i3|i4|e4|
- |P2|H5|I5|P3| D H I5 |d0|e1|f2|g3|
- G H5
- P2
-*/
-
- /* Calc edgeness in diagonal directions. */
- GLSLH(float d_edge = (d_wd( D, B, G, E, C, P2, H, F, P1, H5, I, F4, I5, I4 ) -
- d_wd( C, F4, B, F, I4, P0, E, I, P3, D, H, I5, G, H5 ));)
-
- /* Calc edgeness in horizontal/vertical directions. */
- GLSLH(float hv_edge = (hv_wd(F, I, E, H, C, I5, B, H5) -
- hv_wd(E, F, H, I, D, F4, G, I4));)
-
- /* Filter weights. Two taps only. */
- GLSLH(vec4 w1 = vec4(-weight1, weight1+0.5, weight1+0.5, -weight1);
- vec4 w2 = vec4(-weight2, weight2+0.25, weight2+0.25, -weight2);)
-
- /* Filtering and normalization in four direction generating four colors. */
- GLSLH(float c1 = dot(vec4(P2, H, F, P1), w1);
- float c2 = dot(vec4(P0, E, I, P3), w1);
- float c3 = dot(vec4( D+G, E+H, F+I, F4+I4), w2);
- float c4 = dot(vec4( C+B, F+E, I+H, I5+H5), w2);)
-
- GLSLHF("float limits = %f + 0.000001;\n", conf->edge_strength);
- GLSLH(float edge_strength = smoothstep(0.0, limits, abs(d_edge));)
-
- /* Smoothly blends the two strongest directions(one in diagonal and the
- * other in vert/horiz direction). */
- GLSLHF("float color = mix(mix(c1, c2, step(0.0, d_edge)),"
- "mix(c3, c4, step(0.0, hv_edge)), 1.0 - %f);\n",
- conf->edge_strength);
- /* Anti-ringing code. */
- GLSLH(float min_sample = min(min(E, F), min(H, I));
- float max_sample = max(max(E, F), max(H, I));
- float aux = color;
- color = clamp(color, min_sample, max_sample);)
- GLSLHF("color = mix(aux, color, 1.0-2.0*abs(%f-0.5));\n", conf->edge_strength);
-
- GLSLH(return color;)
-
- GLSLHF("}"); // superxbr()
+ GLSLH(for (int x = 0; x < 4; x++))
+ GLSLH(for (int y = 0; y < 4; y++))
+ GLSLH(i(x,y) = texture(tex, pos + pt * vec2(x+y-3, y-x)).x;)
+ }
- GLSL(color = vec4(1.0);)
+ superxbr_step_h(sc, conf, &params[step]);
+ GLSLH(return res;)
+ GLSLHF("}\n");
- for (int i = 0; i < planes; i++) {
- GLSLF("color[%d] = superxbr(texture%d, texcoord%d, texture_size%d, pixel_size%d, %d, %f);\n",
- i, tex_num, tex_num, tex_num, tex_num, i, tex_mul);
- }
+ GLSLF("color.x = tex_mul * superxbr(texcoord%d);\n", id);
}
diff --git a/video/out/opengl/superxbr.h b/video/out/opengl/superxbr.h
index 38a10ffedd..7aa46eff7c 100644
--- a/video/out/opengl/superxbr.h
+++ b/video/out/opengl/superxbr.h
@@ -24,8 +24,8 @@
extern const struct superxbr_opts superxbr_opts_def;
extern const struct m_sub_options superxbr_conf;
-void pass_superxbr(struct gl_shader_cache *sc, int planes, int tex_num,
- int step, float tex_mul, const struct superxbr_opts *conf,
+void pass_superxbr(struct gl_shader_cache *sc, int id, int step, float tex_mul,
+ const struct superxbr_opts *conf,
struct gl_transform *transform);
#endif
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 358249d2f8..3f0123eb65 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -1395,7 +1395,8 @@ static void pass_prescale_luma(struct gl_video *p, struct img_tex *tex,
switch(p->opts.prescale_luma) {
case 1:
- pass_superxbr(p->sc, planes, id, step, tex->multiplier,
+ assert(planes == 1);
+ pass_superxbr(p->sc, id, step, tex->multiplier,
p->opts.superxbr_opts, &step_transform);
break;
case 2: