vo_opengl: add Super-xBR filter for upscaling

Add the Super-xBR filter for image doubling, and the prescaling framework to support it. The shader code was ported from MPDN extensions project, with modification to process luma only. This commit is largely inspired by code from #2266, with `gl_transform_trans()` authored by @haasn taken directly.
author: Bin Jin <bjin1990@gmail.com> 2015-10-26 22:43:48 +0000
committer: wm4 <wm4@nowhere> 2015-11-05 17:38:20 +0100
commit: 4c43c30421b1d713b7a17b437e381fe1efd01902 (patch)
tree: 073551ba56ccc34be9c093eabab62cb09b1ca496 /video/out/opengl/video.c
parent: 7438f208c37deb1a30df54278a6d81227038f33e (diff)
download: mpv-4c43c30421b1d713b7a17b437e381fe1efd01902.tar.bz2
mpv-4c43c30421b1d713b7a17b437e381fe1efd01902.tar.xz
1 files changed, 210 insertions, 6 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 55fafad96e..b69330d1a9 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -38,6 +38,7 @@
 #include "hwdec.h"
 #include "osd.h"
 #include "stream/stream.h"
+#include "superxbr.h"
 #include "video_shaders.h"
 #include "video/out/filter_kernels.h"
 #include "video/out/aspect.h"
@@ -48,6 +49,12 @@
 // Pixel width of 1D lookup textures.
 #define LOOKUP_TEXTURE_SIZE 256
 
+// Maximal number of passes that prescaler can be applied.
+#define MAX_PRESCALE_PASSES 5
+
+// Maximal number of steps each pass of prescaling contains
+#define MAX_PRESCALE_STEPS 2
+
 // scale/cscale arguments that map directly to shader filter routines.
 // Note that the convolution filters are not included in this list.
 static const char *const fixed_scale_filters[] = {
@@ -166,6 +173,7 @@ struct gl_video {
     struct fbotex blend_subs_fbo;
     struct fbotex unsharp_fbo;
     struct fbotex output_fbo;
+    struct fbotex deband_fbo;
     struct fbosurface surfaces[FBOSURFACES_MAX];
 
     // these are duplicated so we can keep rendering back and forth between
@@ -173,6 +181,8 @@ struct gl_video {
     struct fbotex pre_fbo[2];
     struct fbotex post_fbo[2];
 
+    struct fbotex prescale_fbo[MAX_PRESCALE_PASSES][MAX_PRESCALE_STEPS];
+
     int surface_idx;
     int surface_now;
     int frames_drawn;
@@ -192,6 +202,7 @@ struct gl_video {
     // temporary during rendering
     struct src_tex pass_tex[TEXUNIT_VIDEO_NUM];
     int texture_w, texture_h;
+    struct gl_transform texture_offset; // texture transform without rotation
     bool use_linear;
     bool use_normalized_range;
     float user_gamma;
@@ -330,6 +341,8 @@ const struct gl_video_opts gl_video_opts_def = {
     .alpha_mode = 2,
     .background = {0, 0, 0, 255},
     .gamma = 1.0f,
+    .prescale_passes = 1,
+    .prescale_downscaling_threshold = 2.0f,
 };
 
 const struct gl_video_opts gl_video_opts_hq_def = {
@@ -353,6 +366,8 @@ const struct gl_video_opts gl_video_opts_hq_def = {
     .blend_subs = 0,
     .pbo = 1,
     .deband = 1,
+    .prescale_passes = 1,
+    .prescale_downscaling_threshold = 2.0f,
 };
 
 static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
@@ -429,6 +444,12 @@ const struct m_sub_options gl_video_conf = {
         OPT_FLAG("deband", deband, 0),
         OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0),
         OPT_FLOAT("sharpen", unsharp, 0),
+        OPT_CHOICE("prescale", prescale, 0, ({"none", 0}, {"superxbr", 1})),
+        OPT_INTRANGE("prescale-passes",
+                     prescale_passes, 0, 1, MAX_PRESCALE_PASSES),
+        OPT_FLOATRANGE("prescale-downscaling-threshold",
+                       prescale_downscaling_threshold, 0, 0.0, 32.0),
+        OPT_SUBSTRUCT("superxbr", superxbr_opts, superxbr_conf, 0),
 
         OPT_REMOVED("approx-gamma", "this is always enabled now"),
         OPT_REMOVED("cscale-down", "chroma is never downscaled"),
@@ -463,6 +484,7 @@ static void check_gl_features(struct gl_video *p);
 static bool init_format(int fmt, struct gl_video *init);
 static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi);
 static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src);
+static void get_scale_factors(struct gl_video *p, double xy[2]);
 
 #define GLSL(x) gl_sc_add(p->sc, #x "\n");
 #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
@@ -580,12 +602,18 @@ static void uninit_rendering(struct gl_video *p)
     fbotex_uninit(&p->indirect_fbo);
     fbotex_uninit(&p->blend_subs_fbo);
     fbotex_uninit(&p->unsharp_fbo);
+    fbotex_uninit(&p->deband_fbo);
 
     for (int n = 0; n < 2; n++) {
         fbotex_uninit(&p->pre_fbo[n]);
         fbotex_uninit(&p->post_fbo[n]);
     }
 
+    for (int pass = 0; pass < MAX_PRESCALE_PASSES; pass++) {
+        for (int step = 0; step < MAX_PRESCALE_STEPS; step++)
+            fbotex_uninit(&p->prescale_fbo[pass][step]);
+    }
+
     for (int n = 0; n < FBOSURFACES_MAX; n++)
         fbotex_uninit(&p->surfaces[n].fbotex);
 
@@ -1126,6 +1154,125 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler,
         GLSL(color.a = 1.0;)
 }
 
+// Get the number of passes for prescaler, with given display size.
+static int get_prescale_passes(struct gl_video *p)
+{
+    if (!p->opts.prescale)
+        return 0;
+    // The downscaling threshold check is turned off.
+    if (p->opts.prescale_downscaling_threshold < 1.0f)
+        return p->opts.prescale_passes;
+
+    double scale_factors[2];
+    get_scale_factors(p, scale_factors);
+
+    int passes = 0;
+    for (; passes < p->opts.prescale_passes; passes ++) {
+        // The scale factor happens to be the same for superxbr and nnedi3.
+        scale_factors[0] /= 2;
+        scale_factors[1] /= 2;
+
+        if (1.0f / scale_factors[0] > p->opts.prescale_downscaling_threshold)
+            break;
+        if (1.0f / scale_factors[1] > p->opts.prescale_downscaling_threshold)
+            break;
+    }
+
+    return passes;
+}
+
+// apply pre-scalers
+static void pass_prescale(struct gl_video *p, int src_tex_num, int dst_tex_num,
+                          int planes, int w, int h, int passes,
+                          float tex_mul, struct gl_transform *offset)
+{
+    *offset = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}};
+
+    int tex_num = src_tex_num;
+
+    // Happens to be the same for superxbr and nnedi3.
+    const int steps_per_pass = 2;
+
+    for (int pass = 0; pass < passes; pass++) {
+        for (int step = 0; step < steps_per_pass; step++) {
+            struct gl_transform transform = {{{0}}};
+
+            switch(p->opts.prescale) {
+            case 1:
+                pass_superxbr(p->sc, planes, tex_num, step,
+                              p->opts.superxbr_opts, &transform);
+                break;
+            default:
+                abort();
+            }
+
+            if (tex_mul != 1.0) {
+                GLSLF("color *= %f;\n", tex_mul);
+                tex_mul = 1.0;
+            }
+
+            gl_transform_trans(transform, offset);
+
+            w *= (int)transform.m[0][0];
+            h *= (int)transform.m[1][1];
+
+            finish_pass_fbo(p, &p->prescale_fbo[pass][step],
+                            w, h, dst_tex_num, 0);
+            tex_num = dst_tex_num;
+        }
+    }
+}
+
+// Prescale the planes from the main textures.
+static bool pass_prescale_luma(struct gl_video *p, float tex_mul,
+                               struct gl_transform *chromafix,
+                               struct gl_transform *transform,
+                               struct src_tex *prescaled_tex,
+                               int *prescaled_planes)
+{
+    // number of passes to apply prescaler, can be zero.
+    int prescale_passes = get_prescale_passes(p);
+
+    if (prescale_passes == 0)
+        return false;
+
+    p->use_normalized_range = true;
+
+    // estimate a safe upperbound of planes being prescaled on texture0.
+    *prescaled_planes = p->is_yuv ? 1 :
+        (!p->color_swizzle[0] || p->color_swizzle[3] == 'a') ? 3 : 4;
+
+    struct src_tex tex_backup[4];
+    for (int i = 0; i < 4; i++)
+        tex_backup[i] = p->pass_tex[i];
+
+    if (p->opts.deband) {
+        // apply debanding before upscaling.
+        pass_sample_deband(p->sc, p->opts.deband_opts, 0, p->gl_target,
+                           tex_mul, p->texture_w, p->texture_h, &p->lfg);
+        finish_pass_fbo(p, &p->deband_fbo, p->texture_w,
+                        p->texture_h, 0, 0);
+        tex_backup[0] = p->pass_tex[0];
+    }
+
+    // process texture0 and store the result in texture4.
+    pass_prescale(p, 0, 4, *prescaled_planes, p->texture_w, p->texture_h,
+                  prescale_passes, p->opts.deband ? 1.0 : tex_mul, transform);
+
+    // correct the chromafix under new transform.
+    chromafix->t[0] -= transform->t[0] / transform->m[0][0];
+    chromafix->t[1] -= transform->t[1] / transform->m[1][1];
+
+    // restore the first four texture.
+    for (int i = 0; i < 4; i++)
+        p->pass_tex[i] = tex_backup[i];
+
+    // backup texture4 for later use.
+    *prescaled_tex = p->pass_tex[4];
+
+    return true;
+}
+
 // sample from video textures, set "color" variable to yuv value
 static void pass_read_video(struct gl_video *p)
 {
@@ -1136,6 +1283,16 @@ static void pass_read_video(struct gl_video *p)
         tx_bits = (in_bits + 7) & ~7;
     float tex_mul = ((1 << tx_bits) - 1.0) / ((1 << in_bits) - 1.0);
 
+    struct src_tex prescaled_tex;
+    struct gl_transform offset = {{{0}}};
+    int prescaled_planes;
+
+    bool prescaled = pass_prescale_luma(p, tex_mul, &chromafix, &offset,
+                                        &prescaled_tex, &prescaled_planes);
+
+    const int scale_factor_x = prescaled ? (int)offset.m[0][0] : 1;
+    const int scale_factor_y = prescaled ? (int)offset.m[1][1] : 1;
+
     bool color_defined = false;
     if (p->plane_count > 1) {
         // Chroma processing (merging -> debanding -> scaling)
@@ -1174,10 +1331,11 @@ static void pass_read_video(struct gl_video *p)
         }
 
         // Sample either directly or by upscaling
-        if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED) {
+        if ((p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED) || prescaled) {
             GLSLF("// chroma scaling\n");
             pass_sample(p, 1, &p->scaler[2], cscale, 1.0,
-                        p->texture_w, p->texture_h, chromafix);
+                        p->texture_w * scale_factor_x,
+                        p->texture_h * scale_factor_y, chromafix);
             GLSL(vec2 chroma = color.xy;)
             color_defined = true; // pass_sample defines vec4 color
         } else {
@@ -1200,12 +1358,20 @@ static void pass_read_video(struct gl_video *p)
     // stuff
     GLSL(vec4 main;)
     GLSLF("{\n");
-    if (p->opts.deband) {
+    if (!prescaled && p->opts.deband) {
         pass_sample_deband(p->sc, p->opts.deband_opts, 0, p->gl_target, tex_mul,
                            p->texture_w, p->texture_h, &p->lfg);
         p->use_normalized_range = true;
     } else {
-        GLSL(vec4 color = texture(texture0, texcoord0);)
+        if (!prescaled) {
+            GLSL(vec4 color = texture(texture0, texcoord0);)
+        } else {
+            // just use bilinear for non-essential planes.
+            GLSLF("vec4 color = texture(texture0, "
+                       "texcoord0 + vec2(%f,%f) / texture_size0);\n",
+                  -offset.t[0] / scale_factor_x,
+                  -offset.t[1] / scale_factor_y);
+        }
         if (p->use_normalized_range)
             GLSLF("color *= %f;\n", tex_mul);
     }
@@ -1214,13 +1380,33 @@ static void pass_read_video(struct gl_video *p)
 
     // Set up the right combination of planes
     GLSL(color = main;)
+    if (prescaled) {
+        // Restore texture4 and merge it into the main texture.
+        p->pass_tex[4] = prescaled_tex;
+
+        const char* planes_to_copy = "abgr" + 4 - prescaled_planes;
+        GLSLF("color.%s = texture(texture4, texcoord4).%s;\n",
+              planes_to_copy, planes_to_copy);
+
+        p->texture_w *= scale_factor_x;
+        p->texture_h *= scale_factor_y;
+        gl_transform_trans(offset, &p->texture_offset);
+    }
     if (p->plane_count > 1)
         GLSL(color.yz = chroma;)
     if (p->has_alpha && p->plane_count >= 4) {
-        GLSL(color.a = texture(texture3, texcoord3).r;)
+        if (!prescaled) {
+            GLSL(color.a = texture(texture3, texcoord3).r;)
+        } else {
+            GLSLF("color.a = texture(texture3, "
+                      "texcoord3 + vec2(%f,%f) / texture_size3).r;",
+                  -offset.t[0] / scale_factor_x,
+                  -offset.t[1] / scale_factor_y);
+        }
         if (p->use_normalized_range)
             GLSLF("color.a *= %f;\n", tex_mul);
     }
+
 }
 
 // yuv conversion, and any other conversions before main up/down-scaling
@@ -1327,6 +1513,8 @@ static void compute_src_transform(struct gl_video *p, struct gl_transform *tr,
           oy = p->src_rect.y0;
     struct gl_transform transform = {{{sx,0.0}, {0.0,sy}}, {ox,oy}};
 
+    gl_transform_trans(p->texture_offset, &transform);
+
     int xc = 0, yc = 1;
     *vp_w = p->dst_rect.x1 - p->dst_rect.x0,
     *vp_h = p->dst_rect.y1 - p->dst_rect.y0;
@@ -1348,14 +1536,23 @@ static void pass_scale_main(struct gl_video *p)
     // Figure out the main scaler.
     double xy[2];
     get_scale_factors(p, xy);
+
+    // actual scale factor should be divided by the scale factor of prescaling.
+    xy[0] /= p->texture_offset.m[0][0];
+    xy[1] /= p->texture_offset.m[1][1];
+
     bool downscaling = xy[0] < 1.0 || xy[1] < 1.0;
     bool upscaling = !downscaling && (xy[0] > 1.0 || xy[1] > 1.0);
     double scale_factor = 1.0;
 
     struct scaler *scaler = &p->scaler[0];
     struct scaler_config scaler_conf = p->opts.scaler[0];
-    if (p->opts.scaler_resizes_only && !downscaling && !upscaling)
+    if (p->opts.scaler_resizes_only && !downscaling && !upscaling) {
         scaler_conf.kernel.name = "bilinear";
+        // bilinear is going to be used, just remove all sub-pixel offsets.
+        p->texture_offset.t[0] = (int)p->texture_offset.t[0];
+        p->texture_offset.t[1] = (int)p->texture_offset.t[1];
+    }
     if (downscaling && p->opts.scaler[1].kernel.name) {
         scaler_conf = p->opts.scaler[1];
         scaler = &p->scaler[1];
@@ -1644,6 +1841,7 @@ static void pass_render_frame(struct gl_video *p)
     // initialize the texture parameters
     p->texture_w = p->image_params.w;
     p->texture_h = p->image_params.h;
+    p->texture_offset = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}};
 
     if (p->opts.dumb_mode)
         return;
@@ -2509,12 +2707,18 @@ static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src)
     talloc_free(dst->pre_shaders);
     talloc_free(dst->post_shaders);
     talloc_free(dst->deband_opts);
+    talloc_free(dst->superxbr_opts);
 
     *dst = *src;
 
     if (src->deband_opts)
         dst->deband_opts = m_sub_options_copy(NULL, &deband_conf, src->deband_opts);
 
+    if (src->superxbr_opts) {
+        dst->superxbr_opts = m_sub_options_copy(NULL, &superxbr_conf,
+                                                src->superxbr_opts);
+    }
+
     for (int n = 0; n < 4; n++) {
         dst->scaler[n].kernel.name =
             (char *)handle_scaler_opt(dst->scaler[n].kernel.name, n == 3);
author	Bin Jin <bjin1990@gmail.com>	2015-10-26 22:43:48 +0000
committer	wm4 <wm4@nowhere>	2015-11-05 17:38:20 +0100
commit	4c43c30421b1d713b7a17b437e381fe1efd01902 (patch)
tree	073551ba56ccc34be9c093eabab62cb09b1ca496 /video/out/opengl/video.c
parent	7438f208c37deb1a30df54278a6d81227038f33e (diff)
download	mpv-4c43c30421b1d713b7a17b437e381fe1efd01902.tar.bz2 mpv-4c43c30421b1d713b7a17b437e381fe1efd01902.tar.xz