vo_opengl: refactor into vo_gpu

This is done in several steps: 1. refactor MPGLContext -> struct ra_ctx 2. move GL-specific stuff in vo_opengl into opengl/context.c 3. generalize context creation to support other APIs, and add --gpu-api 4. rename all of the --opengl- options that are no longer opengl-specific 5. move all of the stuff from opengl/* that isn't GL-specific into gpu/ (note: opengl/gl_utils.h became opengl/utils.h) 6. rename vo_opengl to vo_gpu 7. to handle window screenshots, the short-term approach was to just add it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to ra itself (and vo_gpu altered to compensate), but this was a stop-gap measure to prevent this commit from getting too big 8. move ra->fns->flush to ra_gl_ctx instead 9. some other minor changes that I've probably already forgotten Note: This is one half of a major refactor, the other half of which is provided by rossy's following commit. This commit enables support for all linux platforms, while his version enables support for all non-linux platforms. Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the --opengl- options like --opengl-early-flush, --opengl-finish etc. Should be a strict superset of the old functionality. Disclaimer: Since I have no way of compiling mpv on all platforms, some of these ports were done blindly. Specifically, the blind ports included context_mali_fbdev.c and context_rpi.c. Since they're both based on egl_helpers, the port should have gone smoothly without any major changes required. But if somebody complains about a compile error on those platforms (assuming anybody actually uses them), you know where to complain.
author: Niklas Haas <git@haasn.xyz> 2017-09-14 08:04:55 +0200
committer: Niklas Haas <git@haasn.xyz> 2017-09-21 15:00:55 +0200
commit: 65979986a923a8f08019b257c3fe72cd5e8ecf68 (patch)
tree: b8f4b8c17d583594aef0ca509064f8b2ff7128d4 /video
parent: 20f958c9775652c3213588c2a0824f5353276adc (diff)
download: mpv-65979986a923a8f08019b257c3fe72cd5e8ecf68.tar.bz2
mpv-65979986a923a8f08019b257c3fe72cd5e8ecf68.tar.xz
51 files changed, 2071 insertions, 1795 deletions
diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c
new file mode 100644
index 0000000000..dbabba8b3b
--- /dev/null
+++ b/video/out/gpu/context.c
@@ -0,0 +1,186 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <math.h>
+#include <assert.h>
+
+#include "config.h"
+#include "common/common.h"
+#include "common/msg.h"
+#include "options/options.h"
+#include "options/m_option.h"
+#include "video/out/vo.h"
+
+#include "context.h"
+
+extern const struct ra_ctx_fns ra_ctx_glx;
+extern const struct ra_ctx_fns ra_ctx_glx_probe;
+extern const struct ra_ctx_fns ra_ctx_x11_egl;
+extern const struct ra_ctx_fns ra_ctx_drm_egl;
+extern const struct ra_ctx_fns ra_ctx_cocoa;
+extern const struct ra_ctx_fns ra_ctx_wayland_egl;
+extern const struct ra_ctx_fns ra_ctx_wgl;
+extern const struct ra_ctx_fns ra_ctx_angle;
+extern const struct ra_ctx_fns ra_ctx_dxinterop;
+extern const struct ra_ctx_fns ra_ctx_rpi;
+extern const struct ra_ctx_fns ra_ctx_mali;
+extern const struct ra_ctx_fns ra_ctx_vdpauglx;
+
+static const struct ra_ctx_fns *contexts[] = {
+// OpenGL contexts:
+#if HAVE_RPI
+    &ra_ctx_rpi,
+#endif
+/*
+#if HAVE_GL_COCOA
+    &ra_ctx_cocoa,
+#endif
+#if HAVE_EGL_ANGLE_WIN32
+    &ra_ctx_angle,
+#endif
+#if HAVE_GL_WIN32
+    &ra_ctx_wgl,
+#endif
+#if HAVE_GL_DXINTEROP
+    &ra_ctx_dxinterop,
+#endif
+*/
+#if HAVE_GL_X11
+    &ra_ctx_glx_probe,
+#endif
+#if HAVE_EGL_X11
+    &ra_ctx_x11_egl,
+#endif
+#if HAVE_GL_X11
+    &ra_ctx_glx,
+#endif
+#if HAVE_GL_WAYLAND
+    &ra_ctx_wayland_egl,
+#endif
+#if HAVE_EGL_DRM
+    &ra_ctx_drm_egl,
+#endif
+#if HAVE_MALI_FBDEV
+    &ra_ctx_mali,
+#endif
+#if HAVE_VDPAU_GL_X11
+    &ra_ctx_vdpauglx,
+#endif
+};
+
+static bool get_help(struct mp_log *log, struct bstr param)
+{
+    if (bstr_equals0(param, "help")) {
+        mp_info(log, "GPU contexts / APIs:\n");
+        mp_info(log, "    auto (autodetect)\n");
+        for (int n = 0; n < MP_ARRAY_SIZE(contexts); n++)
+            mp_info(log, "    %s (%s)\n", contexts[n]->name, contexts[n]->type);
+        return true;
+    }
+
+    return false;
+}
+
+int ra_ctx_validate_api(struct mp_log *log, const struct m_option *opt,
+                        struct bstr name, struct bstr param)
+{
+    if (get_help(log, param))
+        return M_OPT_EXIT;
+    if (bstr_equals0(param, "auto"))
+        return 1;
+    for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) {
+        if (bstr_equals0(param, contexts[i]->type))
+            return 1;
+    }
+    return M_OPT_INVALID;
+}
+
+int ra_ctx_validate_context(struct mp_log *log, const struct m_option *opt,
+                            struct bstr name, struct bstr param)
+{
+    if (get_help(log, param))
+        return M_OPT_EXIT;
+    if (bstr_equals0(param, "auto"))
+        return 1;
+    for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) {
+        if (bstr_equals0(param, contexts[i]->name))
+            return 1;
+    }
+    return M_OPT_INVALID;
+}
+
+// Create a VO window and create a RA context on it.
+//  vo_flags: passed to the backend's create window function
+struct ra_ctx *ra_ctx_create(struct vo *vo, const char *context_type,
+                             const char *context_name, struct ra_ctx_opts opts)
+{
+    bool api_auto = !context_type || strcmp(context_type, "auto") == 0;
+    bool ctx_auto = !context_name || strcmp(context_name, "auto") == 0;
+
+    if (ctx_auto) {
+        MP_VERBOSE(vo, "Probing for best GPU context.\n");
+        opts.probing = true;
+    }
+
+    // Hack to silence backend (X11/Wayland/etc.) errors. Kill it once backends
+    // are separate from `struct vo`
+    bool old_probing = vo->probing;
+    vo->probing = opts.probing;
+
+    for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) {
+        if (!opts.probing && strcmp(contexts[i]->name, context_name) != 0)
+            continue;
+        if (!api_auto && strcmp(contexts[i]->type, context_type) != 0)
+            continue;
+
+        struct ra_ctx *ctx = talloc_ptrtype(NULL, ctx);
+        *ctx = (struct ra_ctx) {
+            .vo = vo,
+            .global = vo->global,
+            .log = mp_log_new(ctx, vo->log, contexts[i]->type),
+            .opts = opts,
+            .fns = contexts[i],
+        };
+
+        MP_VERBOSE(ctx, "Initializing GPU context '%s'\n", ctx->fns->name);
+        if (contexts[i]->init(ctx)) {
+            vo->probing = old_probing;
+            return ctx;
+        }
+
+        talloc_free(ctx);
+    }
+
+    // If we've reached this point, then none of the contexts matched the name
+    // requested, or the backend creation failed for all of them.
+    MP_ERR(vo, "Failed initializing any suitable GPU context!\n");
+    vo->probing = old_probing;
+    return NULL;
+}
+
+void ra_ctx_destroy(struct ra_ctx **ctx)
+{
+    if (*ctx)
+        (*ctx)->fns->uninit(*ctx);
+    talloc_free(*ctx);
+    *ctx = NULL;
+}
diff --git a/video/out/gpu/context.h b/video/out/gpu/context.h
new file mode 100644
index 0000000000..42de59b75f
--- /dev/null
+++ b/video/out/gpu/context.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include "video/out/vo.h"
+
+#include "config.h"
+#include "ra.h"
+
+struct ra_ctx_opts {
+    int allow_sw;        // allow software renderers
+    int want_alpha;      // create an alpha framebuffer if possible
+    int debug;           // enable debugging layers/callbacks etc.
+    bool probing;        // the backend was auto-probed
+    int swapchain_depth; // max number of images to render ahead
+};
+
+struct ra_ctx {
+    struct vo *vo;
+    struct ra *ra;
+    struct mpv_global *global;
+    struct mp_log *log;
+
+    struct ra_ctx_opts opts;
+    const struct ra_ctx_fns *fns;
+    struct ra_swapchain *swapchain;
+
+    void *priv;
+};
+
+// The functions that make up a ra_ctx.
+struct ra_ctx_fns {
+    const char *type; // API type (for --gpu-api)
+    const char *name; // name (for --gpu-context)
+
+    // Resize the window, or create a new window if there isn't one yet.
+    // Currently, there is an unfortunate interaction with ctx->vo, and
+    // display size etc. are determined by it.
+    bool (*reconfig)(struct ra_ctx *ctx);
+
+    // This behaves exactly like vo_driver.control().
+    int (*control)(struct ra_ctx *ctx, int *events, int request, void *arg);
+
+    // These behave exactly like vo_driver.wakeup/wait_events. They are
+    // optional.
+    void (*wakeup)(struct ra_ctx *ctx);
+    void (*wait_events)(struct ra_ctx *ctx, int64_t until_time_us);
+
+    // Initialize/destroy the 'struct ra' and possibly the underlying VO backend.
+    // Not normally called by the user of the ra_ctx.
+    bool (*init)(struct ra_ctx *ctx);
+    void (*uninit)(struct ra_ctx *ctx);
+};
+
+// Extra struct for the swapchain-related functions so they can be easily
+// inherited from helpers.
+struct ra_swapchain {
+    struct ra_ctx *ctx;
+    struct priv *priv;
+    const struct ra_swapchain_fns *fns;
+
+    bool flip_v; // flip the rendered image vertically (set by the swapchain)
+};
+
+struct ra_swapchain_fns {
+    // Gets the current framebuffer depth in bits (0 if unknown). Optional.
+    int (*color_depth)(struct ra_swapchain *sw);
+
+    // Retrieves a screenshot of the framebuffer. These are always the right
+    // side up, regardless of ra_swapchain->flip_v. Optional.
+    struct mp_image *(*screenshot)(struct ra_swapchain *sw);
+
+    // Called when rendering starts. Returns NULL on failure. This must be
+    // followed by submit_frame, to submit the rendered frame.
+    struct ra_tex *(*start_frame)(struct ra_swapchain *sw);
+
+    // Present the frame. Issued in lockstep with start_frame, with rendering
+    // commands in between. The `frame` is just there for timing data, for
+    // swapchains smart enough to do something with it.
+    bool (*submit_frame)(struct ra_swapchain *sw, const struct vo_frame *frame);
+
+    // Performs a buffer swap. This blocks for as long as necessary to meet
+    // params.swapchain_depth, or until the next vblank (for vsynced contexts)
+    void (*swap_buffers)(struct ra_swapchain *sw);
+};
+
+// Create and destroy a ra_ctx. This also takes care of creating and destroying
+// the underlying `struct ra`, and perhaps the underlying VO backend.
+struct ra_ctx *ra_ctx_create(struct vo *vo, const char *context_type,
+                             const char *context_name, struct ra_ctx_opts opts);
+void ra_ctx_destroy(struct ra_ctx **ctx);
+
+struct m_option;
+int ra_ctx_validate_api(struct mp_log *log, const struct m_option *opt,
+                        struct bstr name, struct bstr param);
+int ra_ctx_validate_context(struct mp_log *log, const struct m_option *opt,
+                            struct bstr name, struct bstr param);
diff --git a/video/out/opengl/hwdec.c b/video/out/gpu/hwdec.c
index 5fbc1aa4a9..5fbc1aa4a9 100644
--- a/video/out/opengl/hwdec.c
+++ b/video/out/gpu/hwdec.c
diff --git a/video/out/opengl/hwdec.h b/video/out/gpu/hwdec.h
index 20bbaae9eb..20bbaae9eb 100644
--- a/video/out/opengl/hwdec.h
+++ b/video/out/gpu/hwdec.h
diff --git a/video/out/opengl/lcms.c b/video/out/gpu/lcms.c
index 8747ae6aa6..8747ae6aa6 100644
--- a/video/out/opengl/lcms.c
+++ b/video/out/gpu/lcms.c
diff --git a/video/out/opengl/lcms.h b/video/out/gpu/lcms.h
index 35bbd61fe0..35bbd61fe0 100644
--- a/video/out/opengl/lcms.h
+++ b/video/out/gpu/lcms.h
diff --git a/video/out/opengl/osd.c b/video/out/gpu/osd.c
index f7c325d1db..f7c325d1db 100644
--- a/video/out/opengl/osd.c
+++ b/video/out/gpu/osd.c
diff --git a/video/out/opengl/osd.h b/video/out/gpu/osd.h
index 6c2b886de3..6c2b886de3 100644
--- a/video/out/opengl/osd.h
+++ b/video/out/gpu/osd.h
diff --git a/video/out/opengl/ra.c b/video/out/gpu/ra.c
index ef1de54d1a..ef1de54d1a 100644
--- a/video/out/opengl/ra.c
+++ b/video/out/gpu/ra.c
diff --git a/video/out/opengl/ra.h b/video/out/gpu/ra.h
index ae7fb9aea7..76f98397f8 100644
--- a/video/out/opengl/ra.h
+++ b/video/out/gpu/ra.h
@@ -436,9 +436,6 @@ struct ra_fns {
     // delayed by a few frames. When no value is available, this returns 0.
     uint64_t (*timer_stop)(struct ra *ra, ra_timer *timer);
 
-    // Hint that possibly queued up commands should be sent to the GPU. Optional.
-    void (*flush)(struct ra *ra);
-
     // Associates a marker with any past error messages, for debugging
     // purposes. Optional.
     void (*debug_marker)(struct ra *ra, const char *msg);
diff --git a/video/out/opengl/shader_cache.c b/video/out/gpu/shader_cache.c
index 90a757617b..afda9cc036 100644
--- a/video/out/opengl/shader_cache.c
+++ b/video/out/gpu/shader_cache.c
@@ -14,7 +14,6 @@
 #include "options/path.h"
 #include "stream/stream.h"
 #include "shader_cache.h"
-#include "formats.h"
 #include "utils.h"
 
 // Force cache flush if more than this number of shaders is created.
@@ -361,7 +360,7 @@ void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2])
     u->v.f[1] = f[1];
 }
 
-void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, GLfloat f[3])
+void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3])
 {
     struct sc_uniform *u = find_uniform(sc, name);
     u->input.type = RA_VARTYPE_FLOAT;
@@ -379,7 +378,7 @@ static void transpose2x2(float r[2 * 2])
 }
 
 void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
-                        bool transpose, GLfloat *v)
+                        bool transpose, float *v)
 {
     struct sc_uniform *u = find_uniform(sc, name);
     u->input.type = RA_VARTYPE_FLOAT;
@@ -401,7 +400,7 @@ static void transpose3x3(float r[3 * 3])
 }
 
 void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
-                        bool transpose, GLfloat *v)
+                        bool transpose, float *v)
 {
     struct sc_uniform *u = find_uniform(sc, name);
     u->input.type = RA_VARTYPE_FLOAT;
diff --git a/video/out/opengl/shader_cache.h b/video/out/gpu/shader_cache.h
index 82a078079b..82a078079b 100644
--- a/video/out/opengl/shader_cache.h
+++ b/video/out/gpu/shader_cache.h
diff --git a/video/out/opengl/user_shaders.c b/video/out/gpu/user_shaders.c
index 58a1ac9e64..446941b03f 100644
--- a/video/out/opengl/user_shaders.c
+++ b/video/out/gpu/user_shaders.c
@@ -17,9 +17,9 @@
 
 #include <assert.h>
 
+#include "common/msg.h"
 #include "misc/ctype.h"
 #include "user_shaders.h"
-#include "formats.h"
 
 static bool parse_rpn_szexpr(struct bstr line, struct szexp out[MAX_SZEXP_SIZE])
 {
diff --git a/video/out/opengl/user_shaders.h b/video/out/gpu/user_shaders.h
index 94a070c8e2..94a070c8e2 100644
--- a/video/out/opengl/user_shaders.h
+++ b/video/out/gpu/user_shaders.h
diff --git a/video/out/gpu/utils.c b/video/out/gpu/utils.c
new file mode 100644
index 0000000000..f8dcbaac60
--- /dev/null
+++ b/video/out/gpu/utils.c
@@ -0,0 +1,372 @@
+#include "common/msg.h"
+#include "video/out/vo.h"
+#include "utils.h"
+
+// Standard parallel 2D projection, except y1 < y0 means that the coordinate
+// system is flipped, not the projection.
+void gl_transform_ortho(struct gl_transform *t, float x0, float x1,
+                        float y0, float y1)
+{
+    if (y1 < y0) {
+        float tmp = y0;
+        y0 = tmp - y1;
+        y1 = tmp;
+    }
+
+    t->m[0][0] = 2.0f / (x1 - x0);
+    t->m[0][1] = 0.0f;
+    t->m[1][0] = 0.0f;
+    t->m[1][1] = 2.0f / (y1 - y0);
+    t->t[0] = -(x1 + x0) / (x1 - x0);
+    t->t[1] = -(y1 + y0) / (y1 - y0);
+}
+
+// Apply the effects of one transformation to another, transforming it in the
+// process. In other words: post-composes t onto x
+void gl_transform_trans(struct gl_transform t, struct gl_transform *x)
+{
+    struct gl_transform xt = *x;
+    x->m[0][0] = t.m[0][0] * xt.m[0][0] + t.m[0][1] * xt.m[1][0];
+    x->m[1][0] = t.m[1][0] * xt.m[0][0] + t.m[1][1] * xt.m[1][0];
+    x->m[0][1] = t.m[0][0] * xt.m[0][1] + t.m[0][1] * xt.m[1][1];
+    x->m[1][1] = t.m[1][0] * xt.m[0][1] + t.m[1][1] * xt.m[1][1];
+    gl_transform_vec(t, &x->t[0], &x->t[1]);
+}
+
+void gl_transform_ortho_fbodst(struct gl_transform *t, struct fbodst fbo)
+{
+    int y_dir = fbo.flip ? -1 : 1;
+    gl_transform_ortho(t, 0, fbo.tex->params.w, 0, fbo.tex->params.h * y_dir);
+}
+
+void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool)
+{
+    for (int i = 0; i < pool->num_buffers; i++)
+        ra_buf_free(ra, &pool->buffers[i]);
+
+    talloc_free(pool->buffers);
+    *pool = (struct ra_buf_pool){0};
+}
+
+static bool ra_buf_params_compatible(const struct ra_buf_params *new,
+                                     const struct ra_buf_params *old)
+{
+    return new->type == old->type &&
+           new->size <= old->size &&
+           new->host_mapped  == old->host_mapped &&
+           new->host_mutable == old->host_mutable;
+}
+
+static bool ra_buf_pool_grow(struct ra *ra, struct ra_buf_pool *pool)
+{
+    struct ra_buf *buf = ra_buf_create(ra, &pool->current_params);
+    if (!buf)
+        return false;
+
+    MP_TARRAY_INSERT_AT(NULL, pool->buffers, pool->num_buffers, pool->index, buf);
+    MP_VERBOSE(ra, "Resized buffer pool of type %u to size %d\n",
+               pool->current_params.type, pool->num_buffers);
+    return true;
+}
+
+struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool,
+                               const struct ra_buf_params *params)
+{
+    assert(!params->initial_data);
+
+    if (!ra_buf_params_compatible(params, &pool->current_params)) {
+        ra_buf_pool_uninit(ra, pool);
+        pool->current_params = *params;
+    }
+
+    // Make sure we have at least one buffer available
+    if (!pool->buffers && !ra_buf_pool_grow(ra, pool))
+        return NULL;
+
+    // Make sure the next buffer is available for use
+    if (!ra->fns->buf_poll(ra, pool->buffers[pool->index]) &&
+        !ra_buf_pool_grow(ra, pool))
+    {
+        return NULL;
+    }
+
+    struct ra_buf *buf = pool->buffers[pool->index++];
+    pool->index %= pool->num_buffers;
+
+    return buf;
+}
+
+bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo,
+                       const struct ra_tex_upload_params *params)
+{
+    if (params->buf)
+        return ra->fns->tex_upload(ra, params);
+
+    struct ra_tex *tex = params->tex;
+    size_t row_size = tex->params.dimensions == 2 ? params->stride :
+                      tex->params.w * tex->params.format->pixel_size;
+
+    struct ra_buf_params bufparams = {
+        .type = RA_BUF_TYPE_TEX_UPLOAD,
+        .size = row_size * tex->params.h * tex->params.d,
+        .host_mutable = true,
+    };
+
+    struct ra_buf *buf = ra_buf_pool_get(ra, pbo, &bufparams);
+    if (!buf)
+        return false;
+
+    ra->fns->buf_update(ra, buf, 0, params->src, bufparams.size);
+
+    struct ra_tex_upload_params newparams = *params;
+    newparams.buf = buf;
+    newparams.src = NULL;
+
+    return ra->fns->tex_upload(ra, &newparams);
+}
+
+struct ra_layout std140_layout(struct ra_renderpass_input *inp)
+{
+    size_t el_size = ra_vartype_size(inp->type);
+
+    // std140 packing rules:
+    // 1. The alignment of generic values is their size in bytes
+    // 2. The alignment of vectors is the vector length * the base count, with
+    // the exception of vec3 which is always aligned like vec4
+    // 3. The alignment of arrays is that of the element size rounded up to
+    // the nearest multiple of vec4
+    // 4. Matrices are
author	Niklas Haas <git@haasn.xyz>	2017-09-14 08:04:55 +0200
committer	Niklas Haas <git@haasn.xyz>	2017-09-21 15:00:55 +0200
commit	65979986a923a8f08019b257c3fe72cd5e8ecf68 (patch)
tree	b8f4b8c17d583594aef0ca509064f8b2ff7128d4 /video
parent	20f958c9775652c3213588c2a0824f5353276adc (diff)
download	mpv-65979986a923a8f08019b257c3fe72cd5e8ecf68.tar.bz2 mpv-65979986a923a8f08019b257c3fe72cd5e8ecf68.tar.xz