8 files changed, 538 insertions, 7 deletions
diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst
index b2a6cd7045..36ca35d51f 100644
--- a/DOCS/man/vo.rst
+++ b/DOCS/man/vo.rst
@@ -549,6 +549,44 @@ Available video output drivers are:
         feature doesn't work correctly with different scale factors in
         different directions.
 
+    ``prescale=<filter>``
+        This option provides non-convolution-based filters for upscaling. These
+        filters resize the video to multiple of the original size (all currently
+        supported prescalers can only perform image doubling in a single pass).
+        Generally another convolution based filter (the main scaler) will be
+        applied after prescaler to match the target display size.
+
+        ``none``
+            Disable all prescalers. This is the default.
+
+        ``superxbr``
+            A relatively fast prescaler originally developed for pixel art.
+
+            Some parameters can be tuned with ``superxbr-sharpness`` and
+            ``superxbr-edge-strength`` options.
+
+        Note that all the filters above are designed (or implemented) to process
+        luma plane only and probably won't work as intended for video in RGB
+        format.
+
+    ``prescale-passes=<1..5>``
+        The number of passes to apply the prescaler (defaults to be 1). Setting
+        it to 2 will perform a 4x upscaling.
+
+    ``prescale-downscaling-threshold=<0..32>``
+        This option prevents "overkill" use of prescalers, which can be caused
+        by misconfiguration, or user trying to play a video with much larger
+        size. With this option, user can specify the maximal allowed downscaling
+        ratio in both dimension. To satisfy it, the number of passes for
+        prescaler will be reduced, and if necessary prescaler could also be
+        disabled.
+
+        The default value is 2.0, and should be able to prevent most seemingly
+        unreasonable use of prescalers. Most user would probably want to set it
+        to a smaller value between 1.0 and 1.5 for better performance.
+
+        A value less than 1.0 will disable the check.
+
     ``pre-shaders=<files>``, ``post-shaders=<files>``, ``scale-shader=<file>``
         Custom GLSL fragment shaders.
 
diff --git a/video/out/opengl/superxbr.c b/video/out/opengl/superxbr.c
new file mode 100644
index 0000000000..a88c058a38
--- /dev/null
+++ b/video/out/opengl/superxbr.c
@@ -0,0 +1,234 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * You can alternatively redistribute this file and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#include "superxbr.h"
+
+#include <assert.h>
+
+#define GLSL(x) gl_sc_add(sc, #x "\n");
+#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__)
+#define GLSLH(x) gl_sc_hadd(sc, #x "\n");
+#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__)
+
+struct superxbr_opts {
+    float sharpness;
+    float edge_strength;
+};
+
+const struct superxbr_opts superxbr_opts_def = {
+    .sharpness = 1.0f,
+    .edge_strength = 1.0f,
+};
+
+#define OPT_BASE_STRUCT struct superxbr_opts
+const struct m_sub_options superxbr_conf = {
+    .opts = (const m_option_t[]) {
+        OPT_FLOATRANGE("sharpness", sharpness, 0, 0.0, 2.0),
+        OPT_FLOATRANGE("edge-strength", edge_strength, 0, 0.0, 1.0),
+        {0}
+    },
+    .size = sizeof(struct superxbr_opts),
+    .defaults = &superxbr_opts_def,
+};
+
+/*
+
+    *******  Super XBR Shader  *******
+
+    Copyright (c) 2015 Hyllian - sergiogdb@gmail.com
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+
+*/
+
+void pass_superxbr(struct gl_shader_cache *sc, int planes, int tex_num,
+                   int step, const struct superxbr_opts *conf,
+                   struct gl_transform *transform)
+{
+    assert(0 <= step && step < 2);
+    GLSLF("// superxbr (tex %d, step %d)\n", tex_num, step + 1);
+
+    if (!conf)
+        conf = &superxbr_opts_def;
+
+    if (step == 0) {
+        *transform = (struct gl_transform){{{2.0,0.0}, {0.0,2.0}}, {-0.5,-0.5}};
+
+        GLSLH(#define wp1  2.0)
+        GLSLH(#define wp2  1.0)
+        GLSLH(#define wp3 -1.0)
+        GLSLH(#define wp4  4.0)
+        GLSLH(#define wp5 -1.0)
+        GLSLH(#define wp6  1.0)
+
+        GLSLHF("#define weight1 (%f*1.29633/10.0)\n", conf->sharpness);
+        GLSLHF("#define weight2 (%f*1.75068/10.0/2.0)\n", conf->sharpness);
+
+        GLSLH(#define Get(x, y) (texture(tex, pos + (vec2(x, y) - vec2(0.25, 0.25)) / tex_size)[plane]))
+    } else {
+        *transform = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}};
+
+        GLSLH(#define wp1  2.0)
+        GLSLH(#define wp2  0.0)
+        GLSLH(#define wp3  0.0)
+        GLSLH(#define wp4  0.0)
+        GLSLH(#define wp5  0.0)
+        GLSLH(#define wp6  0.0)
+
+        GLSLHF("#define weight1 (%f*1.75068/10.0)\n", conf->sharpness);
+        GLSLHF("#define weight2 (%f*1.29633/10.0/2.0)\n", conf->sharpness);
+
+        GLSLH(#define Get(x, y) (texture(tex, pos + (vec2((x) + (y) - 1, (y) - (x))) / tex_size)[plane]))
+    }
+    GLSLH(float df(float A, float B)
+          {
+              return abs(A-B);
+          })
+
+    GLSLH(float d_wd(float b0, float b1, float c0, float c1, float c2,
+                     float d0, float d1, float d2, float d3, float e1,
+                     float e2, float e3, float f2, float f3)
+          {
+              return (wp1*(df(c1,c2) + df(c1,c0) + df(e2,e1) + df(e2,e3)) +
+                      wp2*(df(d2,d3) + df(d0,d1)) +
+                      wp3*(df(d1,d3) + df(d0,d2)) +
+                      wp4*df(d1,d2) +
+                      wp5*(df(c0,c2) + df(e1,e3)) +
+                      wp6*(df(b0,b1) + df(f2,f3)));
+          })
+
+    GLSLH(float hv_wd(float i1, float i2, float i3, float i4,
+                      float e1, float e2, float e3, float e4)
+          {
+              return (wp4*(df(i1,i2)+df(i3,i4)) +
+                      wp1*(df(i1,e1)+df(i2,e2)+df(i3,e3)+df(i4,e4)) +
+                      wp3*(df(i1,e2)+df(i3,e4)+df(e1,i2)+df(e3,i4)));
+          })
+
+    GLSLHF("float superxbr(sampler2D tex, vec2 pos, vec2 tex_size, int plane) {\n");
+
+    if (step == 0) {
+        GLSLH(vec2 dir = fract(pos * tex_size) - 0.5;)
+
+        // Optimization: Discard (skip drawing) unused pixels, except those
+        // at the edge.
+        GLSLH(vec2 dist = tex_size * min(pos, vec2(1.0) - pos);)
+        GLSLH(if (dir.x * dir.y < 0 && dist.x > 1 && dist.y > 1)
+                  return 0.0;)
+
+        GLSLH(if (dir.x < 0 || dir.y < 0 || dist.x < 1 || dist.y < 1)
+                  return texture(tex, pos - dir / tex_size)[plane];)
+    } else {
+        GLSLH(vec2 dir = fract(pos * tex_size / 2) - 0.5;)
+        GLSLH(if (dir.x * dir.y > 0)
+                  return texture(tex, pos)[plane];)
+    }
+
+    GLSLH(float P0 = Get(-1,-1);
+          float P1 = Get( 2,-1);
+          float P2 = Get(-1, 2);
+          float P3 = Get( 2, 2);
+
+          float  B = Get( 0,-1);
+          float  C = Get( 1,-1);
+          float  D = Get(-1, 0);
+          float  E = Get( 0, 0);
+          float  F = Get( 1, 0);
+          float  G = Get(-1, 1);
+          float  H = Get( 0, 1);
+          float  I = Get( 1, 1);
+
+          float F4 = Get(2, 0);
+          float I4 = Get(2, 1);
+          float H5 = Get(0, 2);
+          float I5 = Get(1, 2);)
+
+/*
+                                  P1
+         |P0|B |C |P1|         C     F4          |a0|b1|c2|d3|
+         |D |E |F |F4|      B     F     I4       |b0|c1|d2|e3|   |e1|i1|i2|e2|
+         |G |H |I |I4|   P0    E  A  I     P3    |c0|d1|e2|f3|   |e3|i3|i4|e4|
+         |P2|H5|I5|P3|      D     H     I5       |d0|e1|f2|g3|
+                               G     H5
+                                  P2
+*/
+
+    /* Calc edgeness in diagonal directions. */
+    GLSLH(float d_edge = (d_wd( D, B, G, E, C, P2, H, F, P1, H5, I, F4, I5, I4 ) -
+                          d_wd( C, F4, B, F, I4, P0, E, I, P3, D, H, I5, G, H5 ));)
+
+    /* Calc edgeness in horizontal/vertical directions. */
+    GLSLH(float hv_edge = (hv_wd(F, I, E, H, C, I5, B, H5) -
+                           hv_wd(E, F, H, I, D, F4, G, I4));)
+
+    /* Filter weights. Two taps only. */
+    GLSLH(vec4 w1 = vec4(-weight1, weight1+0.5, weight1+0.5, -weight1);
+          vec4 w2 = vec4(-weight2, weight2+0.25, weight2+0.25, -weight2);)
+
+    /* Filtering and normalization in four direction generating four colors. */
+    GLSLH(float c1 = dot(vec4(P2, H, F, P1), w1);
+          float c2 = dot(vec4(P0, E, I, P3), w1);
+          float c3 = dot(vec4( D+G, E+H, F+I, F4+I4), w2);
+          float c4 = dot(vec4( C+B, F+E, I+H, I5+H5), w2);)
+
+    GLSLHF("float limits = %f + 0.000001;\n", conf->edge_strength);
+    GLSLH(float edge_strength = smoothstep(0.0, limits, abs(d_edge));)
+
+    /* Smoothly blends the two strongest directions(one in diagonal and the
+     * other in vert/horiz direction). */
+    GLSLHF("float color =  mix(mix(c1, c2, step(0.0, d_edge)),"
+                              "mix(c3, c4, step(0.0, hv_edge)), 1 - %f);\n",
+           conf->edge_strength);
+    /* Anti-ringing code. */
+    GLSLH(float min_sample = min(min(E, F), min(H, I));
+          float max_sample = max(max(E, F), max(H, I));
+          float aux = color;
+          color = clamp(color, min_sample, max_sample);)
+    GLSLHF("color = mix(aux, color, 1-2.0*abs(%f-0.5));\n", conf->edge_strength);
+
+    GLSLH(return color;)
+
+    GLSLHF("}");  // superxbr()
+
+    GLSL(vec4 color = vec4(1.0);)
+
+    for (int i = 0; i < planes; i++) {
+        GLSLF("color[%d] = superxbr(texture%d, texcoord%d, texture_size%d, %d);\n",
+              i, tex_num, tex_num, tex_num, i);
+    }
+}
diff --git a/video/out/opengl/superxbr.h b/video/out/opengl/superxbr.h
new file mode 100644
index 0000000000..46f15fd269
--- /dev/null
+++ b/video/out/opengl/superxbr.h
@@ -0,0 +1,36 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * You can alternatively redistribute this file and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#ifndef MP_GL_SUPERXBR_H
+#define MP_GL_SUPERXBR_H
+
+#include "common.h"
+#include "utils.h"
+
+extern const struct superxbr_opts superxbr_opts_def;
+extern const struct m_sub_options superxbr_conf;
+
+void pass_superxbr(struct gl_shader_cache *sc, int planes, int tex_num,
+                   int step, const struct superxbr_opts *conf,
+                   struct gl_transform *transform);
+
+#endif
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index ac36bdffc4..0026090df9 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -435,6 +435,18 @@ void gl_transform_ortho(struct gl_transform *t, float x0, float x1,
     t->t[1] = -(y1 + y0) / (y1 - y0);
 }
 
+// Apply the effects of one transformation to another, transforming it in the
+// process. In other words: post-composes t onto x
+void gl_transform_trans(struct gl_transform t, struct gl_transform *x)
+{
+    float x00 = x->m[0][0], x01 = x->m[0][1], x10 = x->m[1][0], x11 = x->m[1][1];
+    x->m[0][0] = t.m[0][0] * x00 + t.m[0][1] * x10;
+    x->m[1][0] = t.m[0][0] * x01 + t.m[0][1] * x11;
+    x->m[0][1] = t.m[1][0] * x00 + t.m[1][1] * x10;
+    x->m[1][1] = t.m[1][0] * x01 + t.m[1][1] * x11;
+    gl_transform_vec(t, &x->t[0], &x->t[1]);
+}
+
 static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id,
                                    GLenum severity, GLsizei length,
                                    const GLchar *message, const void *userParam)
@@ -462,7 +474,7 @@ void gl_set_debug_logger(GL *gl, struct mp_log *log)
     }
 }
 
-#define SC_ENTRIES 16
+#define SC_ENTRIES 32
 #define SC_UNIFORM_ENTRIES 20
 
 enum uniform_type {
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index ffcaa4da0f..8682366a24 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -117,6 +117,8 @@ static inline void gl_transform_rect(struct gl_transform t, struct mp_rect_f *r)
     gl_transform_vec(t, &r->x1, &r->y1);
 }
 
+void gl_transform_trans(struct gl_transform t, struct gl_transform *x);
+
 void gl_set_debug_logger(GL *gl, struct mp_log *log);
 
 struct gl_shader_cache;
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 55fafad96e..b69330d1a9 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -38,6 +38,7 @@
 #include "hwdec.h"
 #include "osd.h"
 #include "stream/stream.h"
+#include "superxbr.h"
 #include "video_shaders.h"
 #include "video/out/filter_kernels.h"
 #include "video/out/aspect.h"
@@ -48,6 +49,12 @@
 // Pixel width of 1D lookup textures.
 #define LOOKUP_TEXTURE_SIZE 256
 
+// Maximal number of passes that prescaler can be applied.
+#define MAX_PRESCALE_PASSES 5
+
+// Maximal number of steps each pass of prescaling contains
+#define MAX_PRESCALE_STEPS 2
+
 // scale/cscale arguments that map directly to shader filter routines.
 // Note that the convolution filters are not included in this list.
 static const char *const fixed_scale_filters[] = {
@@ -166,6 +173,7 @@ struct gl_video {
     struct fbotex blend_subs_fbo;
     struct fbotex unsharp_fbo;
     struct fbotex output_fbo;
+    struct fbotex deband_fbo;
     struct fbosurface surfaces[FBOSURFACES_MAX];
 
     // these are duplicated so we can keep rendering back and forth between
@@ -173,6 +181,8 @@ struct gl_video {
     struct fbotex pre_fbo[2];
     struct fbotex post_fbo[2];
 
+    struct fbotex prescale_fbo[MAX_PRESCALE_PASSES][MAX_PRESCALE_STEPS];
+
     int surface_idx;
     int surface_now;
     int frames_drawn;
@@ -192,6 +202,7 @@ struct gl_video {
     // temporary during rendering
     struct src_tex pass_tex[TEXUNIT_VIDEO_NUM];
     int texture_w, texture_h;
+    struct gl_transform texture_offset; // texture transform without rotation
     bool use_linear;
     bool use_normalized_range;
     float user_gamma;
@@ -330,6 +341,8 @@ const struct gl_video_opts gl_video_opts_def = {
     .alpha_mode = 2,
     .background = {0, 0, 0, 255},
     .gamma = 1.0f,
+    .prescale_passes = 1,
+    .prescale_downscaling_threshold = 2.0f,
 };
 
 const struct gl_video_opts gl_video_opts_hq_def = {
@@ -353,6 +366,8 @@ const struct gl_video_opts gl_video_opts_hq_def = {
     .blend_subs = 0,
     .pbo = 1,
     .deband = 1,
+    .prescale_passes = 1,
+    .prescale_downscaling_threshold = 2.0f,
 };
 
 static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
@@ -429,6 +444,12 @@ const struct m_sub_options gl_video_conf = {
         OPT_FLAG("deband", deband, 0),
         OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0),
         OPT_FLOAT("sharpen", unsharp, 0),
+        OPT_CHOICE("prescale", prescale, 0, ({"none", 0}, {"superxbr", 1})),
+        OPT_INTRANGE("prescale-passes",
+                     prescale_passes, 0, 1, MAX_PRESCALE_PASSES),
+        OPT_FLOATRANGE("prescale-downscaling-threshold",
+                       prescale_downscaling_threshold, 0, 0.0, 32.0),
+        OPT_SUBSTRUCT("superxbr", superxbr_opts, superxbr_conf, 0),
 
         OPT_REMOVED("approx-gamma", "this is always enabled now"),
         OPT_REMOVED("cscale-down", "chroma is never downscaled"),
@@ -463,6 +484,7 @@ static void check_gl_features(struct gl_video *p);
 static bool init_format(int fmt, struct gl_video *init);
 static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi);
 static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src);
+static void get_scale_factors(struct gl_video *p, double xy[2]);
 
 #define GLSL(x) gl_sc_add(p->sc, #x "\n");
 #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
@@ -580,12 +602,18 @@ static void uninit_rendering(struct gl_video *p)
     fbotex_uninit(&p->indirect_fbo);
     fbotex_uninit(&p->blend_subs_fbo);
     fbotex_uninit(&p->unsharp_fbo);
+    fbotex_uninit(&p->deband_fbo);
 
     for (int n = 0; n < 2; n++) {
         fbotex_uninit(&p->pre_fbo[n]);
         fbotex_uninit(&p->post_fbo[n]);
     }
 
+    for (int pass = 0; pass < MAX_PRESCALE_PASSES; pass++) {
+        for (int step = 0; step < MAX_PRESCALE_STEPS; step++)
+            fbotex_uninit(&p->prescale_fbo[pass][step]);
+    }
+
     for (int n = 0; n < FBOSURFACES_MAX; n++)
         fbotex_uninit(&p->surfaces[n].fbotex);
 
@@ -1126,6 +1154,125 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler,
         GLSL(color.a = 1.0;)
 }
 
+// Get the number of passes for prescaler, with given display size.
+static int get_prescale_passes(struct gl_video *p)
+{
+    if (!p->opts.prescale)
+        return 0;
+    // The downscaling threshold check is turned off.
+    if (p->opts.prescale_downscaling_threshold < 1.0f)
+        return p->opts.prescale_passes;
+
+    double scale_factors[2];
+    get_scale_factors(p, scale_factors);
+
+    int passes = 0;
+    for (; passes < p->opts.prescale_passes; passes ++) {
+        // The scale factor happens to be the same for superxbr and nnedi3.
+        scale_factors[0] /= 2;
+        scale_factors[1] /= 2;
+
+        if (1.0f / scale_factors[0] > p->opts.prescale_downscaling_threshold)
+            break;
+        if (1.0f / scale_factors[1] > p->opts.prescale_downscaling_threshold)
+            break;
+    }
+
+    return passes;
+}
+
+// apply pre-scalers
+static void pass_prescale(struct gl_video *p, int src_tex_num, int dst_tex_num,
+                          int planes, int w, int h, int passes,
+                          float tex_mul, struct gl_transform *offset)
+{
+    *offset = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}};
+
+    int tex_num = src_tex_num;
+
+    // Happens to be the same for superxbr and nnedi3.
+    const int steps_per_pass = 2;
+
+    for (int pass = 0; pass < passes; pass++) {
+        for (int step = 0; step < steps_per_pass; step++) {
+            struct gl_transform transform = {{{0}}};
+
+            switch(p->opts.prescale) {
+            case 1:
+                pass_superxbr(p->sc, planes, tex_num, step,
+                              p->opts.superxbr_opts, &transform);
+                break;
+            default:
+                abort();
+            }
+
+            if (tex_mul != 1.0) {
+                GLSLF("color *= %f;\n", tex_mul);
+                tex_mul = 1.0;
+            }
+
+            gl_transform_trans(transform, offset);
+
+            w *= (int)transform.m[0][0];
+            h *= (int)transform.m[1][1];
+
+            finish_pass_fbo(p, &p->prescale_fbo[pass][step],
+                            w, h, dst_tex_num, 0);
+            tex_num = dst_tex_num;
+        }
+    }
+}
+
+// Prescale the planes from the main textures.
+static bool pass_prescale_luma(struct gl_video *p, float tex_mul,
+                               struct gl_transform *chromafix,
+                               struct gl_transform *transform,
+                               struct src_tex *prescaled_tex,
+                               int *prescaled_planes)
+{
+    // number of passes to apply prescaler, can be zero.
+    int prescale_passes = get_prescale_passes(p);
+
+    if (prescale_passes == 0)
+        return false;
+
+    p->use_normalized_range = true;
+
+    // estimate a safe upperbound of planes being prescaled on texture0.
+    *prescaled_planes = p->is_yuv ? 1 :
+        (!p->color_swizzle[0] || p->color_swizzle[3] == 'a') ? 3 : 4;
+
+    struct src_tex tex_backup[4];
+    for (int i = 0; i < 4; i++)
+        tex_backup[i] = p->pass_tex[i];
+
+    if (p->opts.deband) {
+        // apply debanding before upscaling.
+        pass_sample_deband(p->sc, p->opts.deband_opts, 0, p->gl_target,
+                           tex_mul, p->texture_w, p->texture_h, &p->lfg);
+        finish_pass_fbo(p, &p->deband_fbo, p->texture_w,
+                        p->texture_h, 0, 0);
+        tex_backup[0] = p->pass_tex[0];
+    }
+
+    // process texture0 and store the result in texture4.
+    pass_prescale(p, 0, 4, *prescaled_planes, p->texture_w, p->texture_h,
+                  prescale_passes, p->opts.deband ? 1.0 : tex_mul, transform);
+
+    // correct the chromafix under new transform.
+    chromafix->t[0] -= transform->t[0] / transform->m[0][0];
+    chromafix->t[1] -= transform->t[1] / transform->m[1][1];
+
+    // restore the first four texture.
+    for (int i = 0; i < 4; i++)
+        p->pass_tex[i] = tex_backup[i];
+
+    // backup texture4 for later use.
+    *prescaled_tex = p->pass_tex[4];
+
+    return true;
+}
+
 // sample from video textures, set "color" variable to yuv value
 static void pass_read_video(struct gl_video *p)
 {
@@ -1136,6 +1283,16 @@ static void pass_read_video(struct gl_video *p)
         tx_bits = (in_bits + 7) & ~7;
     float tex_mul = ((1 << tx_bits) - 1.0) / ((1 << in_bits) - 1.0);
 
+    struct src_tex prescaled_tex;
+    struct gl_transform offset = {{{0}}};
+    int prescaled_planes;
+
+    bool prescaled = pass_prescale_luma(p, tex_mul, &chromafix, &offset,
+                                        &prescaled_tex, &prescaled_planes);
+
+    const int scale_factor_x = prescaled ? (int)offset.m[0][0] : 1;
+    const int scale_factor_y = prescaled ? (int)offset.m[1][1] : 1;
+
     bool color_defined = false;
     if (p->plane_count > 1) {
         // Chroma processing (merging -> debanding -> scaling)
@@ -1174,10 +1331,11 @@ static void pass_read_video(struct gl_video *p)
         }
 
         // Sample either directly or by upscaling
-        if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED) {
+        if ((p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED) || prescaled) {
             GLSLF("// chroma scaling\n");
             pass_sample(p, 1, &p->scaler[2], cscale, 1.0,
-                        p->texture_w, p->texture_h, chromafix);
+                        p->texture_w * scale_factor_x,
+                        p->texture_h * scale_factor_y, chromafix);
             GLSL(vec2 chroma = color.xy;)
             color_defined = true; // pass_sample defines vec4 color
         } else {
@@ -1200,12 +1358,20 @@ static void pass_read_video(struct gl_video *p)
     // stuff
     GLSL(vec4 main;)
     GLSLF("{\n");
-    if (p->opts.deband) {
+    if (!prescaled && p->opts.deband) {
         pass_sample_deband(p->sc, p->opts.deband_opts, 0, p->gl_target, tex_mul,
                            p->texture_w, p->texture_h, &p->lfg);
         p->use_normalized_range = true;
     } else {
-        GLSL(vec4 color = texture(texture0, texcoord0);)
+        if (!prescaled) {
+            GLSL(vec4 color = texture(texture0, texcoord0);)
+        } else {
+            // just use bilinear for non-essential planes.
+            GLSLF("vec4 color = texture(texture0, "
+                       "texcoord0 + vec2(%f,%f) / texture_size0);\n",
+                  -offset.t[0] / scale_factor_x,
+                  -offset.t[1] / scale_factor_y);
+        }
         if (p->use_normalized_range)
             GLSLF("color *= %f;\n", tex_mul);
     }
@@ -1214,13 +1380,33 @@ static void pass_read_video(struct gl_video *p)
 
     // Set up the right combination of planes
     GLSL(color = main;)
+    if (prescaled) {
+        // Restore texture4 and merge it into the main texture.
+        p->pass_tex[4] = prescaled_tex;
+
+        const char* planes_to_copy = "abgr" + 4 - prescaled_planes;
+        GLSLF("color.%s = texture(texture4, texcoord4).%s;\n",
+              planes_to_copy, planes_to_copy);
+
+        p->texture_w *= scale_factor_x;
+        p->texture_h *= scale_factor_y;
+        gl_transform_trans(offset, &p->texture_offset);
+    }
     if (p->plane_count > 1)
         GLSL(color.yz = chroma;)
     if (p->has_alpha && p->plane_count >= 4) {
-        GLSL(color.a = texture(texture3, texcoord3).r;)
+        if (!prescaled) {
+            GLSL(color.a = texture(texture3, texcoord3).r;)
+        } else {
+            GLSLF("color.a = texture(texture3, "
+                      "texcoord3 + vec2(%f,%f) / texture_size3).r;",
+                  -offset.t[0] / scale_factor_x,
+                  -offset.t[1] / scale_factor_y);
+        }
         if (p->use_normalized_range)
             GLSLF("color.a *= %f;\n", tex_mul);
     }
+
 }
 
 // yuv conversion, and any other conversions before main up/down-scaling
@@ -1327,6 +1513,8 @@ static void compute_src_transform(struct gl_video *p, struct gl_transform *tr,
           oy = p->src_rect.y0;
     struct gl_transform transform = {{{sx,0.0}, {0.0,sy}}, {ox,oy}};
 
+    gl_transform_trans(p->texture_offset, &transform);
+
     int xc = 0, yc = 1;
     *vp_w = p->dst_rect.x1 - p->dst_rect.x0,
     *vp_h = p->dst_rect.y1 - p->dst_rect.y0;
@@ -1348,14 +1536,23 @@ static void pass_scale_main(struct gl_video *p)
     // Figure out the main scaler.
     double xy[2];
     get_scale_factors(p, xy);
+
+    // actual scale factor should be divided by the scale factor of prescaling.
+    xy[0] /= p->texture_offset.m[0][0];
+    xy[1] /= p->texture_offset.m[1][1];
+
     bool downscaling = xy[0] < 1.0 || xy[1] < 1.0;
     bool upscaling = !downscaling && (xy[0] > 1.0 || xy[1] > 1.0);
     double scale_factor = 1.0;
 
     struct scaler *scaler = &p->scaler[0];
     struct scaler_config scaler_conf = p->opts.scaler[0];
-    if (p->opts.scaler_resizes_only && !downscaling && !upscaling)
+    if (p->opts.scaler_resizes_only && !downscaling && !upscaling) {
         scaler_conf.kernel.name = "bilinear";
+        // bilinear is going to be used, just remove all sub-pixel offsets.
+        p->texture_offset.t[0] = (int)p->texture_offset.t[0];
+        p->texture_offset.t[1] = (int)p->texture_offset.t[1];
+    }
     if (downscaling && p->opts.scaler[1].kernel.name) {
         scaler_conf = p->opts.scaler[1];
         scaler = &p->scaler[1];
@@ -1644,6 +1841,7 @@ static void pass_render_frame(struct gl_video *p)
     // initialize the texture parameters
     p->texture_w = p->image_params.w;
     p->texture_h = p->image_params.h;
+    p->texture_offset = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}};
 
     if (p->opts.dumb_mode)
         return;
@@ -2509,12 +2707,18 @@ static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src)
     talloc_free(dst->pre_shaders);
     talloc_free(dst->post_shaders);
     talloc_free(dst->deband_opts);
+    talloc_free(dst->superxbr_opts);
 
     *dst = *src;
 
     if (src->deband_opts)
         dst->deband_opts = m_sub_options_copy(NULL, &deband_conf, src->deband_opts);
 
+    if (src->superxbr_opts) {
+        dst->superxbr_opts = m_sub_options_copy(NULL, &superxbr_conf,
+                                                src->superxbr_opts);
+    }
+
     for (int n = 0; n < 4; n++) {
         dst->scaler[n].kernel.name =
             (char *)handle_scaler_opt(dst->scaler[n].kernel.name, n == 3);
diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h
index 2d9e9abfca..0ed7b7cb41 100644
--- a/video/out/opengl/video.h
+++ b/video/out/opengl/video.h
@@ -98,6 +98,10 @@ struct gl_video_opts {
     int deband;
     struct deband_opts *deband_opts;
     float unsharp;
+    int prescale;
+    int prescale_passes;
+    float prescale_downscaling_threshold;
+    struct superxbr_opts *superxbr_opts;
 };
 
 extern const struct m_sub_options gl_video_conf;
diff --git a/wscript_build.py b/wscript_build.py
index c410c3724d..b6836a7beb 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -325,6 +325,7 @@ def build(ctx):
         ( "video/out/opengl/hwdec_vdpau.c",      "vdpau-gl-x11" ),
         ( "video/out/opengl/lcms.c",             "gl" ),
         ( "video/out/opengl/osd.c",              "gl" ),
+        ( "video/out/opengl/superxbr.c",         "gl" ),
         ( "video/out/opengl/utils.c",            "gl" ),
         ( "video/out/opengl/video.c",            "gl" ),
         ( "video/out/opengl/video_shaders.c",    "gl" ),