From f3b00ec142c99836c918da8cb23d02148440e5f1 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@nand.wakku.to>
Date: Sat, 5 Sep 2015 11:39:20 +0200
Subject: vo_opengl: require FBOs and get rid of the single-pass optimization

This change makes vo_opengl slightly less compatible (ancient devices
without FBOs will no longer work) and decreases performance in the
simplest case (vo=opengl), in exchange for significantly reducing code
complexity and making everything easier to reason about.
---
 DOCS/man/vo.rst      |  17 ++++----
 video/out/gl_video.c | 116 +++++++++++++++------------------------------------
 2 files changed, 42 insertions(+), 91 deletions(-)
diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst
index ef4a677c62..2334535998 100644
--- a/DOCS/man/vo.rst
+++ b/DOCS/man/vo.rst
@@ -290,6 +290,13 @@ Available video output drivers are:
     color space conversion and chroma upsampling is generally in the hand of
     the hardware decoder APIs.
 
+    ``opengl`` makes use of FBOs by default. Sometimes you can achieve better
+    quality or performance by changing the ``fbo-format`` suboption to
+    ``rgb16f``, ``rgb32f`` or ``rgb``. Known problems include Mesa/Intel not
+    accepting ``rgb16``, Mesa sometimes not being compiled with float texture
+    support, and some OS X setups being very slow with ``rgb16`` but fast
+    with ``rgb32f``.
+
     ``scale=<filter>``
 
         ``bilinear``
@@ -657,8 +664,7 @@ Available video output drivers are:
 
     ``fbo-format=<fmt>``
         Selects the internal format of textures used for FBOs. The format can
-        influence performance and quality of the video output. (FBOs are not
-        always used, and typically only when using extended scalers.)
+        influence performance and quality of the video output.
         ``fmt`` can be one of: rgb, rgba, rgb8, rgb10, rgb10_a2, rgb16, rgb16f,
         rgb32f, rgba12, rgba16, rgba16f, rgba32f.
         Default: rgba16.
@@ -826,13 +832,6 @@ Available video output drivers are:
     Note that some cheaper LCDs do dithering that gravely interferes with
     ``opengl``'s dithering. Disabling dithering with ``dither-depth=no`` helps.
 
-    Unlike ``opengl``, ``opengl-hq`` makes use of FBOs by default. Sometimes you
-    can achieve better quality or performance by changing the ``fbo-format``
-    suboption to ``rgb16f``, ``rgb32f`` or ``rgb``. Known problems include
-    Mesa/Intel not accepting ``rgb16``, Mesa sometimes not being compiled with
-    float texture support, and some OS X setups being very slow with ``rgb16``
-    but fast with ``rgb32f``.
-
 ``sdl``
     SDL 2.0+ Render video output driver, depending on system with or without
     hardware acceleration. Should work on all platforms supported by SDL 2.0.
diff --git a/video/out/gl_video.c b/video/out/gl_video.c
index 7d27c73eb6..cdaa8d8c26 100644
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@@ -206,7 +206,6 @@ struct gl_video {
 
     // temporary during rendering
     struct src_tex pass_tex[TEXUNIT_VIDEO_NUM];
-    bool use_indirect;
     bool use_linear;
     bool use_normalized_range;
     float user_gamma;
@@ -483,7 +482,7 @@ const struct m_sub_options gl_video_conf = {
 
 static void uninit_rendering(struct gl_video *p);
 static void uninit_scaler(struct gl_video *p, struct scaler *scaler);
-static void check_gl_features(struct gl_video *p);
+static bool check_gl_features(struct gl_video *p);
 static bool init_format(int fmt, struct gl_video *init);
 static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi);
 
@@ -1400,7 +1399,6 @@ static void pass_read_video(struct gl_video *p)
             GLSLF("// custom source-shader (RGB)\n");
             gl_sc_uniform_f(p->sc, "cmul", cmul);
             GLSL(vec4 color = sample(texture0, texcoord0, texture_size0);)
-            p->use_indirect = true;
         } else {
             GLSL(vec4 color = texture(texture0, texcoord0);)
         }
@@ -1413,12 +1411,9 @@ static void pass_read_video(struct gl_video *p)
     int c_w = p->pass_tex[1].src.x1 - p->pass_tex[1].src.x0;
     int c_h = p->pass_tex[1].src.y1 - p->pass_tex[1].src.y0;
     const struct scaler_config *cscale = &p->opts.scaler[2];
-    // Non-trivial sampling is needed on the chroma plane
-    bool nontrivial = p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED &&
-                      strcmp(cscale->kernel.name, "bilinear") != 0;
 
     bool merged = false;
-    if (p->plane_count > 2 && (nontrivial || shader)) {
+    if (p->plane_count > 2) {
         // For simplicity and performance, we merge the chroma planes
         // into a single texture before scaling or shading, so the shader
         // doesn't need to run multiple times.
@@ -1444,39 +1439,21 @@ static void pass_read_video(struct gl_video *p)
         GLSL(vec4 color = sample(texture1, texcoord1, texture_size1);)
         GLSL(color.ba = vec2(0.0, 1.0);) // skip unused
         finish_pass_fbo(p, &p->source_fbo, c_w, c_h, 1, 0);
-        p->use_indirect = true;
     }
 
-    if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED && nontrivial) {
+    if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED) {
         GLSLF("// chroma scaling\n");
         pass_sample(p, 1, &p->scaler[2], cscale, 1.0, p->image_w, p->image_h,
                     chromafix);
         GLSL(vec2 chroma = color.rg;)
-        p->use_indirect = true;
-    } else {
-        // No explicit scaling needed, either because it's trivial (ie.
-        // bilinear), or because there's no subsampling. We have to manually
-        // apply the fix to the chroma coordinates because it's not implied by
-        // pass_sample.
-        GLSL(vec4 color;)
-        gl_transform_rect(chromafix, &p->pass_tex[1].src);
-        if (p->plane_count > 2 && !merged) {
-            gl_transform_rect(chromafix, &p->pass_tex[2].src);
-            GLSL(vec2 chroma = vec2(texture(texture1, texcoord1).r,
-                                    texture(texture2, texcoord2).r);)
-        } else {
-            GLSL(vec2 chroma = texture(texture1, texcoord1).rg;)
-        }
     }
 
     p->pass_tex[0] = luma; // Restore the luma plane
-    p->pass_tex[3] = alpha; // Restore the alpha plane (if set)
     if (shader) {
         load_shader(p, shader);
         gl_sc_uniform_f(p->sc, "cmul", cmul);
         GLSLF("// custom source-shader (luma)\n");
         GLSL(float luma = sample(texture0, texcoord0, texture_size0).r;)
-        p->use_indirect = true;
     } else {
         GLSL(float luma = texture(texture0, texcoord0).r;)
         if (p->use_normalized_range)
@@ -1484,6 +1461,8 @@ static void pass_read_video(struct gl_video *p)
     }
 
     GLSL(color = vec4(luma, chroma, 1.0);)
+
+    p->pass_tex[3] = alpha; // Restore the alpha plane (if set)
     if (p->has_alpha && p->plane_count >= 4) {
         GLSL(color.a = texture(texture3, texcoord3).r;)
         if (p->use_normalized_range)
@@ -1538,7 +1517,6 @@ static void pass_convert_yuv(struct gl_video *p)
     }
 
     if (p->image_params.colorspace == MP_CSP_BT_2020_C) {
-        p->use_indirect = true;
         // Conversion for C'rcY'cC'bc via the BT.2020 CL system:
         // C'bc = (B'-Y'c) / 1.9404  | C'bc <= 0
         //      = (B'-Y'c) / 1.5816  | C'bc >  0
@@ -1681,15 +1659,12 @@ static void pass_scale_main(struct gl_video *p)
 
     // Pre-conversion, like linear light/sigmoidization
     GLSLF("// scaler pre-conversion\n");
-    if (p->use_linear) {
-        p->use_indirect = true;
+    if (p->use_linear)
         pass_linearize(p, p->image_params.gamma);
-    }
 
     bool use_sigmoid = p->use_linear && p->opts.sigmoid_upscaling && upscaling;
     float sig_center, sig_slope, sig_offset, sig_scale;
     if (use_sigmoid) {
-        p->use_indirect = true;
         // Coefficients for the sigmoidal transform are taken from the
         // formula here: http://www.imagemagick.org/Usage/color_mods/#sigmoidal
         sig_center = p->opts.sigmoid_center;
@@ -1722,23 +1697,9 @@ static void pass_scale_main(struct gl_video *p)
     }
 
     GLSLF("// main scaling\n");
-    if (!p->use_indirect && strcmp(scaler_conf.kernel.name, "bilinear") == 0) {
-        // implicitly scale in pass_video_to_screen, but set up the textures
-        // manually (for cropping etc.). Special care has to be taken for the
-        // chroma planes (everything except luma=tex0), to make sure the offset
-        // is scaled to the correct reference frame (in the case of subsampled
-        // input)
-        struct gl_transform tchroma = transform;
-        tchroma.t[xc] /= 1 << p->image_desc.chroma_xs;
-        tchroma.t[yc] /= 1 << p->image_desc.chroma_ys;
-
-        for (int n = 0; n < p->plane_count; n++)
-            gl_transform_rect(n > 0 ? tchroma : transform, &p->pass_tex[n].src);
-    } else {
-        finish_pass_fbo(p, &p->indirect_fbo, p->image_w, p->image_h, 0, 0);
-        pass_sample(p, 0, scaler, &scaler_conf, scale_factor, vp_w, vp_h,
-                    transform);
-    }
+    finish_pass_fbo(p, &p->indirect_fbo, p->image_w, p->image_h, 0, 0);
+    pass_sample(p, 0, scaler, &scaler_conf, scale_factor, vp_w, vp_h,
+                transform);
 
     GLSLF("// scaler post-conversion\n");
     if (use_sigmoid) {
@@ -1942,7 +1903,6 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts,
 static void pass_render_frame(struct gl_video *p)
 {
     p->use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling;
-    p->use_indirect = false; // set to true as needed by pass_*
     pass_read_video(p);
     pass_convert_yuv(p);
 
@@ -1964,11 +1924,8 @@ static void pass_render_frame(struct gl_video *p)
         GLSL(vec4 color = texture(texture0, texcoord0);)
     }
 
-    if (apply_shaders(p, p->opts.pre_shaders, &p->pre_fbo[0], 0,
-                      p->image_w, p->image_h))
-    {
-        p->use_indirect = true;
-    }
+    apply_shaders(p, p->opts.pre_shaders, &p->pre_fbo[0], 0,
+                  p->image_w, p->image_h);
 
     pass_scale_main(p);
 
@@ -2348,28 +2305,25 @@ static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi)
         gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
 }
 
-static bool test_fbo(struct gl_video *p, bool *success)
+static bool test_fbo(struct gl_video *p)
 {
-    if (!*success)
-        return false;
-
     GL *gl = p->gl;
-    *success = false;
+    bool success = false;
     MP_VERBOSE(p, "Testing user-set FBO format (0x%x)\n",
                    (unsigned)p->opts.fbo_format);
     struct fbotex fbo = {0};
     if (fbotex_init(&fbo, p->gl, p->log, 16, 16, p->opts.fbo_format)) {
         gl->BindFramebuffer(GL_FRAMEBUFFER, fbo.fbo);
         gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
-        *success = true;
+        success = true;
     }
     fbotex_uninit(&fbo);
     glCheckError(gl, p->log, "FBO test");
-    return *success;
+    return success;
 }
 
 // Disable features that are not supported with the current OpenGL version.
-static void check_gl_features(struct gl_video *p)
+static bool check_gl_features(struct gl_video *p)
 {
     GL *gl = p->gl;
     bool have_float_tex = gl->mpgl_caps & MPGL_CAP_FLOAT_TEX;
@@ -2378,6 +2332,13 @@ static void check_gl_features(struct gl_video *p)
     bool have_3d_tex = gl->mpgl_caps & MPGL_CAP_3D_TEX;
     bool have_mix = gl->glsl_version >= 130;
 
+    // Immediately error out if FBOs are missing, since they are required
+    // for basic operation.
+    if (!have_fbo || !test_fbo(p)) {
+        MP_ERR(p, "FBOs unsupported, required for vo_opengl.\n");
+        return false;
+    }
+
     // Normally, we want to disable them by default if FBOs are unavailable,
     // because they will be slow (not critically slow, but still slower).
     // Without FP textures, we must always disable them.
@@ -2387,8 +2348,6 @@ static void check_gl_features(struct gl_video *p)
             mp_find_filter_kernel(p->opts.scaler[n].kernel.name);
         if (kernel) {
             char *reason = NULL;
-            if (!test_fbo(p, &have_fbo))
-                reason = "(FBOs missing)";
             if (!have_float_tex)
                 reason = "(float tex. missing)";
             if (!have_1d_tex && kernel->polar)
@@ -2428,33 +2387,22 @@ static void check_gl_features(struct gl_video *p)
         p->use_lut_3d = false;
         MP_WARN(p, "Disabling color management (GLSL version too old).\n");
     }
-    if (use_cms && !test_fbo(p, &have_fbo)) {
-        p->opts.target_prim = MP_CSP_PRIM_AUTO;
-        p->opts.target_trc = MP_CSP_TRC_AUTO;
-        p->use_lut_3d = false;
-        MP_WARN(p, "Disabling color management (FBOs missing).\n");
-    }
-    if (p->opts.interpolation && !test_fbo(p, &have_fbo)) {
-        p->opts.interpolation = false;
-        MP_WARN(p, "Disabling interpolation (FBOs missing).\n");
-    }
-    if (p->opts.blend_subs && !test_fbo(p, &have_fbo)) {
-        p->opts.blend_subs = 0;
-        MP_WARN(p, "Disabling subtitle blending (FBOs missing).\n");
-    }
     if (gl->es && p->opts.pbo) {
         p->opts.pbo = 0;
         MP_WARN(p, "Disabling PBOs (GLES unsupported).\n");
     }
+
+    return true;
 }
 
-static int init_gl(struct gl_video *p)
+static bool init_gl(struct gl_video *p)
 {
     GL *gl = p->gl;
 
     debug_check_gl(p, "before init_gl");
 
-    check_gl_features(p);
+    if (!check_gl_features(p))
+        return false;
 
     gl->Disable(GL_DITHER);
 
@@ -2488,7 +2436,7 @@ static int init_gl(struct gl_video *p)
 
     debug_check_gl(p, "after init_gl");
 
-    return 1;
+    return true;
 }
 
 void gl_video_uninit(struct gl_video *p)
@@ -2735,7 +2683,11 @@ struct gl_video *gl_video_init(GL *gl, struct mp_log *log, struct mpv_global *g)
         .sc = gl_sc_create(gl, log, g),
     };
     gl_video_set_debug(p, true);
-    init_gl(p);
+    if (!init_gl(p)) {
+        mp_err(log, "Failed to initialize OpenGL.\n");
+        gl_video_uninit(p);
+        return NULL;
+    }
     recreate_osd(p);
     return p;
 }
-- 
cgit v1.2.3