From 81851febc4766e053cc17134c779959f5942025a Mon Sep 17 00:00:00 2001 From: wm4 Date: Wed, 26 Jul 2017 11:19:51 +0200 Subject: vo_opengl: start work on rendering API abstraction This starts work on moving OpenGL-specific code out of the general renderer code, so that we can support other other GPU APIs. This is in a very early stage and it's only a proof of concept. It's unknown whether this will succeed or result in other backends. For now, the GL rendering API ("ra") and its only provider (ra_gl) does texture creation/upload/destruction only. And it's used for the main video texture only. All other code is still hardcoded to GL. There is some duplication with ra_format and gl_format handling. In the end, only the ra variants will be needed (plus the gl_format table of course). For now, this is simpler, because for some reason lots of hwdec code still requires the GL variants, and would have to be updated to use the ra ones. Currently, the video.c code accesses private ra_gl fields. In the end, it should not do that of course, and it would not include ra_gl.h. Probably adds bugs, but you can keep them. --- video/out/opengl/formats.c | 102 ++++++++-------- video/out/opengl/formats.h | 10 +- video/out/opengl/ra.c | 199 +++++++++++++++++++++++++++++++ video/out/opengl/ra.h | 175 +++++++++++++++++++++++++++ video/out/opengl/ra_gl.c | 290 +++++++++++++++++++++++++++++++++++++++++++++ video/out/opengl/ra_gl.h | 29 +++++ video/out/opengl/video.c | 206 +++++++++++++------------------- wscript_build.py | 2 + 8 files changed, 838 insertions(+), 175 deletions(-) create mode 100644 video/out/opengl/ra.c create mode 100644 video/out/opengl/ra.h create mode 100644 video/out/opengl/ra_gl.c create mode 100644 video/out/opengl/ra_gl.h diff --git a/video/out/opengl/formats.c b/video/out/opengl/formats.c index 2cbc130170..41b7f459e3 100644 --- a/video/out/opengl/formats.c +++ b/video/out/opengl/formats.c @@ -10,56 +10,56 @@ enum { // List of allowed formats, and their usability for bilinear filtering and FBOs. // This is limited to combinations that are useful for our renderer. -static const struct gl_format gl_formats[] = { +const struct gl_format gl_formats[] = { // These are used for desktop GL 3+, and GLES 3+ with GL_EXT_texture_norm16. - {GL_R8, GL_RED, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, - {GL_RG8, GL_RG, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, - {GL_RGB8, GL_RGB, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, - {GL_RGBA8, GL_RGBA, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, - {GL_R16, GL_RED, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, - {GL_RG16, GL_RG, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, - {GL_RGB16, GL_RGB, T_U16, F_CF | F_GL3 | F_GL2F}, - {GL_RGBA16, GL_RGBA, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, + {"r8", GL_R8, GL_RED, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {"rg8", GL_RG8, GL_RG, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {"rgb8", GL_RGB8, GL_RGB, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {"rgba8", GL_RGBA8, GL_RGBA, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {"r16", GL_R16, GL_RED, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, + {"rg16", GL_RG16, GL_RG, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, + {"rgb16", GL_RGB16, GL_RGB, T_U16, F_CF | F_GL3 | F_GL2F}, + {"rgba16", GL_RGBA16, GL_RGBA, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, // Specifically not color-renderable. - {GL_RGB16, GL_RGB, T_U16, F_TF | F_EXT16}, + {"rgb16", GL_RGB16, GL_RGB, T_U16, F_TF | F_EXT16}, // GL2 legacy. Ignores possibly present FBO extensions (no CF flag set). - {GL_LUMINANCE8, GL_LUMINANCE, T_U8, F_TF | F_GL2}, - {GL_LUMINANCE8_ALPHA8, GL_LUMINANCE_ALPHA, T_U8, F_TF | F_GL2}, - {GL_RGB8, GL_RGB, T_U8, F_TF | F_GL2}, - {GL_RGBA8, GL_RGBA, T_U8, F_TF | F_GL2}, - {GL_LUMINANCE16, GL_LUMINANCE, T_U16, F_TF | F_GL2}, - {GL_LUMINANCE16_ALPHA16, GL_LUMINANCE_ALPHA, T_U16, F_TF | F_GL2}, - {GL_RGB16, GL_RGB, T_U16, F_TF | F_GL2}, - {GL_RGBA16, GL_RGBA, T_U16, F_TF | F_GL2}, + {"l8", GL_LUMINANCE8, GL_LUMINANCE, T_U8, F_TF | F_GL2}, + {"la8", GL_LUMINANCE8_ALPHA8, GL_LUMINANCE_ALPHA, T_U8, F_TF | F_GL2}, + {"rgb8", GL_RGB8, GL_RGB, T_U8, F_TF | F_GL2}, + {"rgba8", GL_RGBA8, GL_RGBA, T_U8, F_TF | F_GL2}, + {"l16", GL_LUMINANCE16, GL_LUMINANCE, T_U16, F_TF | F_GL2}, + {"la16", GL_LUMINANCE16_ALPHA16, GL_LUMINANCE_ALPHA, T_U16, F_TF | F_GL2}, + {"rgb16", GL_RGB16, GL_RGB, T_U16, F_TF | F_GL2}, + {"rgba16", GL_RGBA16, GL_RGBA, T_U16, F_TF | F_GL2}, // ES2 legacy - {GL_LUMINANCE, GL_LUMINANCE, T_U8, F_TF | F_ES2}, - {GL_LUMINANCE_ALPHA, GL_LUMINANCE_ALPHA, T_U8, F_TF | F_ES2}, - {GL_RGB, GL_RGB, T_U8, F_TF | F_ES2}, - {GL_RGBA, GL_RGBA, T_U8, F_TF | F_ES2}, + {"l" , GL_LUMINANCE,GL_LUMINANCE, T_U8, F_TF | F_ES2}, + {"la",GL_LUMINANCE_ALPHA,GL_LUMINANCE_ALPHA, T_U8, F_TF | F_ES2}, + {"rgb", GL_RGB, GL_RGB, T_U8, F_TF | F_ES2}, + {"rgba", GL_RGBA, GL_RGBA, T_U8, F_TF | F_ES2}, // Non-normalized integer formats. // Follows ES 3.0 as to which are color-renderable. - {GL_R8UI, GL_RED_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, - {GL_RG8UI, GL_RG_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, - {GL_RGB8UI, GL_RGB_INTEGER, T_U8, F_GL3 | F_ES3}, - {GL_RGBA8UI, GL_RGBA_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, - {GL_R16UI, GL_RED_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, - {GL_RG16UI, GL_RG_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, - {GL_RGB16UI, GL_RGB_INTEGER, T_U16, F_GL3 | F_ES3}, - {GL_RGBA16UI, GL_RGBA_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, + {"r8ui", GL_R8UI, GL_RED_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, + {"rg8ui", GL_RG8UI, GL_RG_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, + {"rgb8ui", GL_RGB8UI, GL_RGB_INTEGER, T_U8, F_GL3 | F_ES3}, + {"rgba8ui", GL_RGBA8UI, GL_RGBA_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, + {"r16ui", GL_R16UI, GL_RED_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, + {"rg16ui", GL_RG16UI, GL_RG_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, + {"rgb16ui", GL_RGB16UI, GL_RGB_INTEGER, T_U16, F_GL3 | F_ES3}, + {"rgba16ui",GL_RGBA16UI, GL_RGBA_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, // On GL3+ or GL2.1 with GL_ARB_texture_float, floats work fully. - {GL_R16F, GL_RED, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, - {GL_RG16F, GL_RG, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, - {GL_RGB16F, GL_RGB, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, - {GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, - {GL_R32F, GL_RED, T_FL, F_CF | F_GL3 | F_GL2F}, - {GL_RG32F, GL_RG, T_FL, F_CF | F_GL3 | F_GL2F}, - {GL_RGB32F, GL_RGB, T_FL, F_CF | F_GL3 | F_GL2F}, - {GL_RGBA32F, GL_RGBA, T_FL, F_CF | F_GL3 | F_GL2F}, + {"r16f", GL_R16F, GL_RED, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {"rg16f", GL_RG16F, GL_RG, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {"rgb16f", GL_RGB16F, GL_RGB, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {"rgba16f", GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {"r32f", GL_R32F, GL_RED, T_FL, F_CF | F_GL3 | F_GL2F}, + {"rg32f", GL_RG32F, GL_RG, T_FL, F_CF | F_GL3 | F_GL2F}, + {"rgb32f", GL_RGB32F, GL_RGB, T_FL, F_CF | F_GL3 | F_GL2F}, + {"rgba32f", GL_RGBA32F, GL_RGBA, T_FL, F_CF | F_GL3 | F_GL2F}, // Note: we simply don't support float anything on ES2, despite extensions. // We also don't bother with non-filterable float formats, and we ignore @@ -67,28 +67,28 @@ static const struct gl_format gl_formats[] = { // On ES3.2+, both 16 bit floats work fully (except 3-component formats). // F_EXTF16 implies extensions that also enable 16 bit floats fully. - {GL_R16F, GL_RED, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, - {GL_RG16F, GL_RG, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, - {GL_RGB16F, GL_RGB, T_FL, F_F16 | F_TF | F_ES32 | F_EXTF16}, - {GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, + {"r16f", GL_R16F, GL_RED, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, + {"rg16f", GL_RG16F, GL_RG, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, + {"rgb16f", GL_RGB16F, GL_RGB, T_FL, F_F16 | F_TF | F_ES32 | F_EXTF16}, + {"rgba16f", GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, // On ES3.0+, 16 bit floats are texture-filterable. // Don't bother with 32 bit floats; they exist but are neither CR nor TF. - {GL_R16F, GL_RED, T_FL, F_F16 | F_TF | F_ES3}, - {GL_RG16F, GL_RG, T_FL, F_F16 | F_TF | F_ES3}, - {GL_RGB16F, GL_RGB, T_FL, F_F16 | F_TF | F_ES3}, - {GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_TF | F_ES3}, + {"r16f", GL_R16F, GL_RED, T_FL, F_F16 | F_TF | F_ES3}, + {"rg16f", GL_RG16F, GL_RG, T_FL, F_F16 | F_TF | F_ES3}, + {"rgb16f", GL_RGB16F, GL_RGB, T_FL, F_F16 | F_TF | F_ES3}, + {"rgba16f", GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_TF | F_ES3}, // These might be useful as FBO formats. - {GL_RGB10_A2, GL_RGBA, + {"rgb10_a2",GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, F_CF | F_GL3 | F_ES3}, - {GL_RGBA12, GL_RGBA, T_U16, F_CF | F_GL2 | F_GL3}, - {GL_RGB10, GL_RGB, T_U16, F_CF | F_GL2 | F_GL3}, + {"rgba12", GL_RGBA12, GL_RGBA, T_U16, F_CF | F_GL2 | F_GL3}, + {"rgb10", GL_RGB10, GL_RGB, T_U16, F_CF | F_GL2 | F_GL3}, // Special formats. - {GL_RGB8, GL_RGB, + {"rgb565", GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, F_TF | F_GL2 | F_GL3}, - {GL_RGB, GL_RGB_422_APPLE, + {"ashit", GL_RGB, GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_APPLE, F_TF | F_APPL}, {0} diff --git a/video/out/opengl/formats.h b/video/out/opengl/formats.h index 5bb3bcb4a3..d4d38c1150 100644 --- a/video/out/opengl/formats.h +++ b/video/out/opengl/formats.h @@ -2,8 +2,10 @@ #define MPGL_FORMATS_H_ #include "common.h" +#include "ra.h" struct gl_format { + const char *name; // symbolic name for user interaction/debugging GLint internal_format; // glTexImage argument GLenum format; // glTexImage argument GLenum type; // e.g. GL_UNSIGNED_SHORT @@ -33,11 +35,13 @@ enum { // the format is still GL_FLOAT (32 bit) // --- Other constants. - MPGL_TYPE_UNORM = 1, // normalized integer (fixed point) formats - MPGL_TYPE_UINT = 2, // full integer formats - MPGL_TYPE_FLOAT = 3, // float formats (both full and half) + MPGL_TYPE_UNORM = RA_CTYPE_UNORM, // normalized integer (fixed point) formats + MPGL_TYPE_UINT = RA_CTYPE_UINT, // full integer formats + MPGL_TYPE_FLOAT = RA_CTYPE_FLOAT, // float formats (both full and half) }; +extern const struct gl_format gl_formats[]; + int gl_format_feature_flags(GL *gl); const struct gl_format *gl_find_internal_format(GL *gl, GLint internal_format); const struct gl_format *gl_find_format(GL *gl, int type, int flags, diff --git a/video/out/opengl/ra.c b/video/out/opengl/ra.c new file mode 100644 index 0000000000..12d944f29c --- /dev/null +++ b/video/out/opengl/ra.c @@ -0,0 +1,199 @@ +#include "common/common.h" +#include "common/msg.h" +#include "video/img_format.h" + +#include "ra.h" + +// Return whether this is a tightly packed format with no external padding and +// with the same bit size/depth in all components. +static bool ra_format_is_regular(const struct ra_format *fmt) +{ + if (!fmt->pixel_size || !fmt->num_components) + return false; + for (int n = 1; n < fmt->num_components; n++) { + if (fmt->component_size[n] != fmt->component_size[0] || + fmt->component_depth[n] != fmt->component_depth[0]) + return false; + } + if (fmt->component_size[0] * fmt->num_components != fmt->pixel_size * 8) + return false; + return true; +} + +// Return a regular format using RA_CTYPE_UNORM. +const struct ra_format *ra_find_unorm_format(struct ra *ra, + int bytes_per_component, + int n_components) +{ + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (fmt->ctype == RA_CTYPE_UNORM && fmt->num_components == n_components && + fmt->pixel_size == bytes_per_component * n_components && + fmt->component_depth[0] == bytes_per_component * 8 && + ra_format_is_regular(fmt)) + return fmt; + } + return NULL; +} + +// Return a regular format using RA_CTYPE_UINT. +const struct ra_format *ra_find_uint_format(struct ra *ra, + int bytes_per_component, + int n_components) +{ + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (fmt->ctype == RA_CTYPE_UINT && fmt->num_components == n_components && + fmt->pixel_size == bytes_per_component * n_components && + fmt->component_depth[0] == bytes_per_component * 8 && + ra_format_is_regular(fmt)) + return fmt; + } + return NULL; +} + +// Return a regular format that uses float16 internally, but does 32 bit +// transfer. (This is just so we don't need 32->16 bit conversion on CPU, +// which would be ok but messy.) +const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components) +{ + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (fmt->ctype == RA_CTYPE_FLOAT && fmt->num_components == n_components && + fmt->pixel_size == sizeof(float) * n_components && + fmt->component_depth[0] == 16 && + ra_format_is_regular(fmt)) + return fmt; + } + return NULL; +} + + +// Like ra_find_unorm_format(), but takes bits (not bytes), and if no fixed +// point format is available, return an unsigned integer format. +static const struct ra_format *find_plane_format(struct ra *ra, int bytes, + int n_channels) +{ + const struct ra_format *f = ra_find_unorm_format(ra, bytes, n_channels); + if (f) + return f; + return ra_find_uint_format(ra, bytes, n_channels); +} + +// Put a mapping of imgfmt to texture formats into *out. Basically it selects +// the correct texture formats needed to represent an imgfmt in a shader, with +// textures using the same memory organization as on the CPU. +// Each plane is represented by a texture, and each texture has a RGBA +// component order. out->components describes the meaning of them. +// May return integer formats for >8 bit formats, if the driver has no +// normalized 16 bit formats. +// Returns false (and *out is not touched) if no format found. +bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out) +{ + struct ra_imgfmt_desc res = {0}; + + struct mp_regular_imgfmt regfmt; + if (mp_get_regular_imgfmt(®fmt, imgfmt)) { + res.num_planes = regfmt.num_planes; + res.component_bits = regfmt.component_size * 8; + res.component_pad = regfmt.component_pad; + for (int n = 0; n < regfmt.num_planes; n++) { + struct mp_regular_imgfmt_plane *plane = ®fmt.planes[n]; + res.planes[n] = find_plane_format(ra, regfmt.component_size, + plane->num_components); + if (!res.planes[n]) + return false; + for (int i = 0; i < plane->num_components; i++) + res.components[n][i] = plane->components[i]; + } + res.chroma_w = regfmt.chroma_w; + res.chroma_h = regfmt.chroma_h; + goto supported; + } + + for (int n = 0; n < ra->num_formats; n++) { + if (ra->formats[n]->special_imgfmt == imgfmt) { + res = *ra->formats[n]->special_imgfmt_desc; + goto supported; + } + } + + // Unsupported format + return false; + +supported: + + *out = res; + return true; +} + +void ra_dump_tex_formats(struct ra *ra, int msgl) +{ + if (!mp_msg_test(ra->log, msgl)) + return; + MP_MSG(ra, msgl, "Texture formats:\n"); + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + const char *ctype = "unknown"; + switch (fmt->ctype) { + case RA_CTYPE_UNORM: ctype = "unorm"; break; + case RA_CTYPE_UINT: ctype = "uint "; break; + case RA_CTYPE_FLOAT: ctype = "float"; break; + } + char cl[40] = ""; + for (int i = 0; i < fmt->num_components; i++) { + mp_snprintf_cat(cl, sizeof(cl), "%s%d", i ? " " : "", + fmt->component_size[i]); + if (fmt->component_size[i] != fmt->component_depth[i]) + mp_snprintf_cat(cl, sizeof(cl), "/%d", fmt->component_depth[i]); + } + MP_MSG(ra, msgl, " %-10s %d*%s %3dB %s %s %s {%s}\n", fmt->name, + fmt->num_components, ctype, fmt->pixel_size, + fmt->luminance_alpha ? "LA" : " ", + fmt->linear_filter ? "LF" : " ", + fmt->renderable ? "CR" : " ", cl); + } +} + +void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc, + int msgl) +{ + char pl[80] = ""; + char pf[80] = ""; + for (int n = 0; n < desc->num_planes; n++) { + if (n > 0) { + mp_snprintf_cat(pl, sizeof(pl), "/"); + mp_snprintf_cat(pf, sizeof(pf), "/"); + } + char t[5] = {0}; + for (int i = 0; i < 4; i++) + t[i] = "_rgba"[desc->components[n][i]]; + for (int i = 3; i > 0 && t[i] == '_'; i--) + t[i] = '\0'; + mp_snprintf_cat(pl, sizeof(pl), "%s", t); + mp_snprintf_cat(pf, sizeof(pf), "%s", desc->planes[n]->name); + } + MP_MSG(ra, msgl, "%d planes %dx%d %d/%d [%s] (%s)\n", + desc->num_planes, desc->chroma_w, desc->chroma_h, + desc->component_bits, desc->component_pad, pf, pl); +} + +void ra_dump_img_formats(struct ra *ra, int msgl) +{ + if (!mp_msg_test(ra->log, msgl)) + return; + MP_MSG(ra, msgl, "Image formats:\n"); + for (int imgfmt = IMGFMT_START; imgfmt < IMGFMT_END; imgfmt++) { + const char *name = mp_imgfmt_to_name(imgfmt); + if (strcmp(name, "unknown") == 0) + continue; + MP_MSG(ra, msgl, " %s", name); + struct ra_imgfmt_desc desc; + if (ra_get_imgfmt_desc(ra, imgfmt, &desc)) { + MP_MSG(ra, msgl, " => "); + ra_dump_imgfmt_desc(ra, &desc, msgl); + } else { + MP_MSG(ra, msgl, "\n"); + } + } +} diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h new file mode 100644 index 0000000000..211f87077b --- /dev/null +++ b/video/out/opengl/ra.h @@ -0,0 +1,175 @@ +#pragma once + +#include "common/common.h" + +// Handle for a rendering API backend. +struct ra { + struct ra_fns *fns; + void *priv; + + struct mp_log *log; + + // RA_CAP_* bit field. The RA backend must set supported features at init + // time. + uint64_t caps; + + // Set of supported texture formats. Must be added by RA backend at init time. + struct ra_format **formats; + int num_formats; +}; + +enum { + RA_CAP_TEX_1D = 0 << 0, // supports 1D textures (as shader source textures) + RA_CAP_TEX_3D = 0 << 1, // supports 3D textures (as shader source textures) +}; + +enum ra_ctype { + RA_CTYPE_UNKNOWN = 0, // also used for inconsistent multi-component formats + RA_CTYPE_UNORM, // unsigned normalized integer (fixed point) formats + RA_CTYPE_UINT, // full integer formats + RA_CTYPE_FLOAT, // float formats (signed, any bit size) +}; + +// All formats must be useable as texture formats. All formats must be byte +// aligned (all pixels start and end on a byte boundary), at least as far CPU +// transfers are concerned. +struct ra_format { + // All fields are read-only after creation. + const char *name; // symbolic name for user interaction/debugging + void *priv; + enum ra_ctype ctype; // data type of each component + int num_components; // component count, 0 if not applicable, max. 4 + int component_size[4]; // in bits, all entries 0 if not applicable + int component_depth[4]; // bits in use for each component, 0 if not applicable + // (_must_ be set if component_size[] includes padding, + // and the real procession as seen by shader is lower) + int pixel_size; // in bytes, total pixel size (0 if opaque) + bool luminance_alpha; // pre-GL_ARB_texture_rg hack for 2 component textures + // if this is set, shader must use .ra instead of .rg + // only applies to 2-component textures + bool linear_filter; // linear filtering available from shader + bool renderable; // can be used for render targets + + // If not 0, the format represents some sort of packed fringe format, whose + // shader representation is given by the special_imgfmt_desc pointer. + int special_imgfmt; + const struct ra_imgfmt_desc *special_imgfmt_desc; +}; + +struct ra_tex_params { + int dimensions; // 1-3 for 1D-3D textures + // Size of the texture. 1D textures require h=d=1, 2D textures require d=1. + int w, h, d; + const struct ra_format *format; + bool render_src; // must be useable as source texture in a shader + bool render_dst; // must be useable as target texture in a shader + // this requires creation of a FBO + // When used as render source texture. + bool src_linear; // if false, use nearest sampling (whether this can + // be true depends on ra_format.linear_filter) + bool src_repeat; // if false, clamp texture coordinates to edge + // if true, repeat texture coordinates + bool non_normalized; // hack for GL_TEXTURE_RECTANGLE OSX idiocy + // always set to false, except in OSX code +}; + +struct ra_tex { + // All fields are read-only after creation. + struct ra_tex_params params; + void *priv; + // Set by user, GL only: attempt to accelerate upload with PBOs. + bool use_pbo; +}; + +// A persistent mapping, which can be used for texture upload. +struct ra_mapped_buffer { + // All fields are read-only after creation. The data is read/write, but + // requires explicit fence usage. + void *priv; + void *data; // pointer to first usable byte + size_t size; // total size of the mapping, starting at data + size_t preferred_align; // preferred stride/start alignment for optimal copy +}; + +// Rendering API entrypoints. (Note: there are some additional hidden features +// you need to take care of. For example, hwdec mapping will be provided +// separately from ra, but might need to call into ra private code.) +struct ra_fns { + void (*destroy)(struct ra *ra); + + // Create a texture (with undefined contents). Return NULL on failure. + // This is a rare operation, and normally textures and even FBOs for + // temporary rendering intermediate data are cached. + struct ra_tex *(*tex_create)(struct ra *ra, + const struct ra_tex_params *params); + + void (*tex_destroy)(struct ra *ra, struct ra_tex *tex); + + // Copy from CPU RAM to the texture. The image dimensions are as specified + // in tex->params. + // This is an extremely common operation. + // Unlike with OpenGL, the src data has to have exactly the same format as + // the texture, and no conversion is supported. + // tex->params.require_upload must be true. + // For 1D textures, stride is ignored. + // For 3D textures, stride is not supported. All data is fully packed with + // no padding, and stride is ignored. + // If buf is not NULL, then src must be within the provided buffer. The + // operation is implied to have dramatically better performance, but + // requires correct flushing and fencing operations by the caller to deal + // with asynchronous host/GPU behavior. If any of these conditions are not + // met, undefined behavior will result. + void (*tex_upload)(struct ra *ra, struct ra_tex *tex, + const void *src, ptrdiff_t stride, + struct ra_mapped_buffer *buf); + + // Create a persistently mapped buffer for tex_upload. + // Optional, can be NULL or return NULL if unavailable. + struct ra_mapped_buffer *(*create_mapped_buffer)(struct ra *ra, size_t size); + + void (*destroy_mapped_buffer)(struct ra *ra, struct ra_mapped_buffer *buf); + + // Essentially a fence: once the GPU uses the mapping for read-access (e.g. + // by starting a texture upload), the host must not write to the mapped + // data until an internal object has been signalled. This call returns + // whether it was signalled yet. If true, write accesses are allowed again. + // Optional, only available if flush_mapping is. + bool (*poll_mapped_buffer)(struct ra *ra, struct ra_mapped_buffer *buf); +}; + +const struct ra_format *ra_find_unorm_format(struct ra *ra, + int bytes_per_component, + int n_components); +const struct ra_format *ra_find_uint_format(struct ra *ra, + int bytes_per_component, + int n_components); +const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components); + +struct ra_imgfmt_desc { + int num_planes; + const struct ra_format *planes[4]; + // Chroma pixel size (1x1 is 4:4:4) + uint8_t chroma_w, chroma_h; + // Component storage size in bits (possibly padded). For formats with + // different sizes per component, this is arbitrary. For padded formats + // like P010 or YUV420P10, padding is included. + int component_bits; + // Like mp_regular_imgfmt.component_pad. + int component_pad; + // For each texture and each texture output (rgba order) describe what + // component it returns. + // The values are like the values in mp_regular_imgfmt_plane.components[]. + // Access as components[plane_nr][component_index]. Set unused items to 0. + // For ra_format.luminance_alpha, this returns 1/2 ("rg") instead of 1/4 + // ("ra"). the logic is that the texture format has 2 channels, thus the + // data must be returned in the first two components. The renderer fixes + // this later. + uint8_t components[4][4]; +}; + +bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out); + +void ra_dump_tex_formats(struct ra *ra, int msgl); +void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc, + int msgl); +void ra_dump_img_formats(struct ra *ra, int msgl); diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c new file mode 100644 index 0000000000..bf926a197e --- /dev/null +++ b/video/out/opengl/ra_gl.c @@ -0,0 +1,290 @@ +#include "formats.h" + +#include "ra_gl.h" + +static struct ra_fns ra_fns_gl; + +int ra_init_gl(struct ra *ra, GL *gl) +{ + if (gl->version < 210 && gl->es < 200) { + MP_ERR(ra, "At least OpenGL 2.1 or OpenGL ES 2.0 required.\n"); + return -1; + } + + struct ra_gl *p = ra->priv = talloc_zero(NULL, struct ra_gl); + p->gl = gl; + + ra->fns = &ra_fns_gl; + ra->caps = 0; + if (gl->mpgl_caps & MPGL_CAP_1D_TEX) + ra->caps |= RA_CAP_TEX_1D; + if (gl->mpgl_caps & MPGL_CAP_3D_TEX) + ra->caps |= RA_CAP_TEX_3D; + + int gl_fmt_features = gl_format_feature_flags(gl); + int depth16 = gl_determine_16bit_tex_depth(gl); + + for (int n = 0; gl_formats[n].internal_format; n++) { + const struct gl_format *gl_fmt = &gl_formats[n]; + + if (!(gl_fmt->flags & gl_fmt_features)) + continue; + + struct ra_format *fmt = talloc_zero(ra, struct ra_format); + *fmt = (struct ra_format){ + .name = gl_fmt->name, + .priv = (void *)gl_fmt, + .ctype = gl_format_type(gl_fmt), + .num_components = gl_format_components(gl_fmt->format), + .pixel_size = gl_bytes_per_pixel(gl_fmt->format, gl_fmt->type), + .luminance_alpha = gl_fmt->format == GL_LUMINANCE_ALPHA, + .linear_filter = gl_fmt->flags & F_TF, + .renderable = gl_fmt->flags & F_CR, + }; + + int csize = gl_component_size(gl_fmt->type) * 8; + int depth = csize; + if (fmt->ctype == RA_CTYPE_UNORM) + depth = MPMIN(csize, depth16); // naive/approximate + if (gl_fmt->flags & F_F16) { + depth = 16; + csize = 32; // always upload as GL_FLOAT (simpler for us) + } + + for (int i = 0; i < fmt->num_components; i++) { + fmt->component_size[i] = csize; + fmt->component_depth[i] = depth; + } + + // Special formats for which OpenGL happens to have direct support. + if (strcmp(fmt->name, "rgb565") == 0) { + fmt->special_imgfmt = IMGFMT_RGB565; + struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc); + fmt->special_imgfmt_desc = desc; + desc->num_planes = 1; + desc->planes[0] = fmt; + for (int i = 0; i < 3; i++) + desc->components[0][i] = i + 1; + desc->chroma_w = desc->chroma_h = 1; + } + if (strcmp(fmt->name, "ashit") == 0) { + fmt->special_imgfmt = IMGFMT_UYVY; + struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc); + fmt->special_imgfmt_desc = desc; + desc->num_planes = 1; + desc->planes[0] = fmt; + desc->components[0][0] = 3; + desc->components[0][1] = 1; + desc->components[0][2] = 2; + desc->chroma_w = desc->chroma_h = 1; + } + + MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt); + } + + gl->Disable(GL_DITHER); + + return 0; +} + +static void gl_destroy(struct ra *ra) +{ + talloc_free(ra->priv); +} + +static void gl_tex_destroy(struct ra *ra, struct ra_tex *tex) +{ + struct ra_gl *p = ra->priv; + struct ra_tex_gl *tex_gl = tex->priv; + + p->gl->DeleteTextures(1, &tex_gl->texture); + gl_pbo_upload_uninit(&tex_gl->pbo); + talloc_free(tex_gl); + talloc_free(tex); +} + +static struct ra_tex *gl_tex_create(struct ra *ra, + const struct ra_tex_params *params) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + tex->params = *params; + struct ra_tex_gl *tex_gl = tex->priv = talloc_zero(NULL, struct ra_tex_gl); + + const struct gl_format *fmt = params->format->priv; + tex_gl->internal_format = fmt->internal_format; + tex_gl->format = fmt->format; + tex_gl->type = fmt->type; + switch (params->dimensions) { + case 1: tex_gl->target = GL_TEXTURE_1D; break; + case 2: tex_gl->target = GL_TEXTURE_2D; break; + case 3: tex_gl->target = GL_TEXTURE_3D; break; + default: abort(); + } + if (params->non_normalized) { + assert(params->dimensions == 2); + tex_gl->target = GL_TEXTURE_RECTANGLE; + } + + gl->GenTextures(1, &tex_gl->texture); + gl->BindTexture(tex_gl->target, tex_gl->texture); + + GLint filter = params->src_linear ? GL_LINEAR : GL_NEAREST; + GLint wrap = params->src_repeat ? GL_REPEAT : GL_CLAMP_TO_EDGE; + gl->TexParameteri(tex_gl->target, GL_TEXTURE_MIN_FILTER, filter); + gl->TexParameteri(tex_gl->target, GL_TEXTURE_MAG_FILTER, filter); + gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_S, wrap); + if (params->dimensions > 1) + gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_T, wrap); + if (params->dimensions > 2) + gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_R, wrap); + + switch (params->dimensions) { + case 1: + gl->TexImage1D(tex_gl->target, 0, tex_gl->internal_format, params->w, + 0, tex_gl->format, tex_gl->type, NULL); + break; + case 2: + gl->TexImage2D(tex_gl->target, 0, tex_gl->internal_format, params->w, + params->h, 0, tex_gl->format, tex_gl->type, NULL); + break; + case 3: + gl->TexImage3D(tex_gl->target, 0, tex_gl->internal_format, params->w, + params->h, params->d, 0, tex_gl->format, tex_gl->type, + NULL); + break; + } + + gl->BindTexture(tex_gl->target, 0); + + return tex; +} + +static void gl_tex_upload(struct ra *ra, struct ra_tex *tex, + const void *src, ptrdiff_t stride, + struct ra_mapped_buffer *buf) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + struct ra_tex_gl *tex_gl = tex->priv; + struct ra_mapped_buffer_gl *buf_gl = NULL; + + if (buf) { + buf_gl = buf->priv; + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->pbo); + src = (void *)((uintptr_t)src - (uintptr_t)buf->data); + } + + gl->BindTexture(tex_gl->target, tex_gl->texture); + + switch (tex->params.dimensions) { + case 1: + gl->TexImage1D(tex_gl->target, 0, tex_gl->internal_format, + tex->params.w, 0, tex_gl->format, tex_gl->type, src); + break; + case 2: + gl_pbo_upload_tex(&tex_gl->pbo, gl, tex->use_pbo && !buf, + tex_gl->target, tex_gl->format, tex_gl->type, + tex->params.w, tex->params.h, src, stride, + 0, 0, tex->params.w, tex->params.h); + break; + case 3: + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1); + gl->TexImage3D(GL_TEXTURE_3D, 0, tex_gl->internal_format, tex->params.w, + tex->params.h, tex->params.d, 0, tex_gl->format, + tex_gl->type, src); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); + break; + } + + gl->BindTexture(tex_gl->target, 0); + + if (buf) { + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + // Make sure the PBO is not reused until GL is done with it. If a + // previous operation is pending, "update" it by creating a new + // fence that will cover the previous operation as well. + gl->DeleteSync(buf_gl->fence); + buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } +} + +static void gl_destroy_mapped_buffer(struct ra *ra, struct ra_mapped_buffer *buf) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + struct ra_mapped_buffer_gl *buf_gl = buf->priv; + + gl->DeleteSync(buf_gl->fence); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->pbo); + if (buf->data) + gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + gl->DeleteBuffers(1, &buf_gl->pbo); + + talloc_free(buf_gl); + talloc_free(buf); +} + +static struct ra_mapped_buffer *gl_create_mapped_buffer(struct ra *ra, + size_t size) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + + if (gl->version < 440) + return NULL; + + struct ra_mapped_buffer *buf = talloc_zero(NULL, struct ra_mapped_buffer); + buf->size = size; + buf->preferred_align = 1; + + struct ra_mapped_buffer_gl *buf_gl = buf->priv = + talloc_zero(NULL, struct ra_mapped_buffer_gl); + + unsigned flags = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | + GL_MAP_COHERENT_BIT; + + gl->GenBuffers(1, &buf_gl->pbo); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->pbo); + gl->BufferStorage(GL_PIXEL_UNPACK_BUFFER, size, NULL, flags); + buf->data = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, buf->size, flags); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + if (!buf->data) { + gl_check_error(gl, ra->log, "mapping buffer"); + gl_destroy_mapped_buffer(ra, buf); + return NULL; + } + + return buf; +} + +static bool gl_poll_mapped_buffer(struct ra *ra, struct ra_mapped_buffer *buf) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + struct ra_mapped_buffer_gl *buf_gl = buf->priv; + + if (buf_gl->fence) { + GLenum res = gl->ClientWaitSync(buf_gl->fence, 0, 0); // non-blocking + if (res == GL_ALREADY_SIGNALED) { + gl->DeleteSync(buf_gl->fence); + buf_gl->fence = NULL; + } + } + + return !buf_gl->fence; +} + +static struct ra_fns ra_fns_gl = { + .destroy = gl_destroy, + .tex_create = gl_tex_create, + .tex_destroy = gl_tex_destroy, + .tex_upload = gl_tex_upload, + .create_mapped_buffer = gl_create_mapped_buffer, + .destroy_mapped_buffer = gl_destroy_mapped_buffer, + .poll_mapped_buffer = gl_poll_mapped_buffer, +}; + diff --git a/video/out/opengl/ra_gl.h b/video/out/opengl/ra_gl.h new file mode 100644 index 0000000000..9d5cb23fb7 --- /dev/null +++ b/video/out/opengl/ra_gl.h @@ -0,0 +1,29 @@ +#pragma once + +#include "common.h" +#include "ra.h" +#include "utils.h" + +// For ra.priv +struct ra_gl { + GL *gl; +}; + +// For ra_tex.priv +struct ra_tex_gl { + GLenum target; + GLuint texture; + // These 3 fields can be 0 if unknown. + GLint internal_format; + GLenum format; + GLenum type; + struct gl_pbo_upload pbo; +}; + +// For ra_mapped_buffer.priv +struct ra_mapped_buffer_gl { + GLuint pbo; + GLsync fence; +}; + +int ra_init_gl(struct ra *ra, GL *gl); diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index 8724d5ff9e..130d049136 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -36,6 +36,8 @@ #include "utils.h" #include "hwdec.h" #include "osd.h" +#include "ra.h" +#include "ra_gl.h" #include "stream/stream.h" #include "video_shaders.h" #include "user_shaders.h" @@ -84,6 +86,7 @@ static const struct gl_vao_entry vertex_vao[] = { }; struct texplane { + struct ra_tex *texture; int w, h; int tex_w, tex_h; GLint gl_internal_format; @@ -92,7 +95,6 @@ struct texplane { GLenum gl_type; GLuint gl_texture; bool flipped; - struct gl_pbo_upload pbo; }; struct video_image { @@ -175,18 +177,15 @@ struct pass_info { #define PASS_INFO_MAX (SHADER_MAX_HOOKS + 32) struct dr_buffer { - void *ptr; - size_t size; - GLuint pbo; - // While a PBO is read-accessed by GL, we must not write to the mapped data. - // The fence tells us when GL is done, and the mpi reference will keep the - // data from being recycled (or from other references gaining write access). - GLsync fence; + struct ra_mapped_buffer *buffer; + // The mpi reference will keep the data from being recycled (or from other + // references gaining write access) while the GPU is accessing the buffer. struct mp_image *mpi; }; struct gl_video { GL *gl; + struct ra *ra; struct mpv_global *global; struct mp_log *log; @@ -213,7 +212,7 @@ struct gl_video { struct mp_image_params real_image_params; // configured format struct mp_image_params image_params; // texture format (mind hwdec case) - struct gl_imgfmt_desc gl_format; // texture format + struct ra_imgfmt_desc ra_format; // texture format int plane_count; bool is_gray; @@ -715,8 +714,8 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, int h = p->image_params.h; // Determine the chroma offset - float ls_w = 1.0 / p->gl_format.chroma_w; - float ls_h = 1.0 / p->gl_format.chroma_h; + float ls_w = 1.0 / p->ra_format.chroma_w; + float ls_h = 1.0 / p->ra_format.chroma_h; struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}}; @@ -733,12 +732,12 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, } int msb_valid_bits = - p->gl_format.component_bits + MPMIN(p->gl_format.component_pad, 0); + p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0); // The existing code assumes we just have a single tex multiplier for // all of the planes. This may change in the future float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.color.space, msb_valid_bits, - p->gl_format.component_bits); + p->ra_format.component_bits); memset(tex, 0, 4 * sizeof(tex[0])); for (int n = 0; n < p->plane_count; n++) { @@ -746,7 +745,7 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, enum plane_type type = PLANE_NONE; for (int i = 0; i < 4; i++) { - int c = p->gl_format.components[n][i]; + int c = p->ra_format.components[n][i]; enum plane_type ctype; if (c == 0) { ctype = PLANE_NONE; @@ -775,7 +774,7 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, }; for (int i = 0; i < 4; i++) - tex[n].components += !!p->gl_format.components[n][i]; + tex[n].components += !!p->ra_format.components[n][i]; get_transform(t->w, t->h, p->image_params.rotate, t->flipped, &tex[n].transform); @@ -791,8 +790,8 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, struct gl_transform tr = chroma; gl_transform_vec(rot, &tr.t[0], &tr.t[1]); - float dx = (chroma_upsize(w, p->gl_format.chroma_w) - w) * ls_w; - float dy = (chroma_upsize(h, p->gl_format.chroma_h) - h) * ls_h; + float dx = (chroma_upsize(w, p->ra_format.chroma_w) - w) * ls_w; + float dy = (chroma_upsize(h, p->ra_format.chroma_h) - h) * ls_h; // Adjust the chroma offset if the real chroma size is fractional // due image sizes not aligned to chroma subsampling. @@ -814,7 +813,7 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, // Return the index of the given component (assuming all non-padding components // of all planes are concatenated into a linear list). -static int find_comp(struct gl_imgfmt_desc *desc, int component) +static int find_comp(struct ra_imgfmt_desc *desc, int component) { int cur = 0; for (int n = 0; n < desc->num_planes; n++) { @@ -831,8 +830,6 @@ static int find_comp(struct gl_imgfmt_desc *desc, int component) static void init_video(struct gl_video *p) { - GL *gl = p->gl; - p->hwdec_active = false; p->use_integer_conversion = false; @@ -849,26 +846,26 @@ static void init_video(struct gl_video *p) } } - p->gl_format = (struct gl_imgfmt_desc){0}; - gl_get_imgfmt_desc(p->gl, p->image_params.imgfmt, &p->gl_format); + p->ra_format = (struct ra_imgfmt_desc){0}; + ra_get_imgfmt_desc(p->ra, p->image_params.imgfmt, &p->ra_format); - p->plane_count = p->gl_format.num_planes; + p->plane_count = p->ra_format.num_planes; p->has_alpha = false; p->is_gray = true; - for (int n = 0; n < p->gl_format.num_planes; n++) { + for (int n = 0; n < p->ra_format.num_planes; n++) { for (int i = 0; i < 4; i++) { - if (p->gl_format.components[n][i]) { - p->has_alpha |= p->gl_format.components[n][i] == 4; - p->is_gray &= p->gl_format.components[n][i] == 1 || - p->gl_format.components[n][i] == 4; + if (p->ra_format.components[n][i]) { + p->has_alpha |= p->ra_format.components[n][i] == 4; + p->is_gray &= p->ra_format.components[n][i] == 1 || + p->ra_format.components[n][i] == 4; } } } for (int c = 0; c < 4; c++) { - int loc = find_comp(&p->gl_format, c + 1); + int loc = find_comp(&p->ra_format, c + 1); p->color_swizzle[c] = "rgba"[loc >= 0 && loc < 4 ? loc : 0]; } p->color_swizzle[4] = '\0'; @@ -892,43 +889,41 @@ static void init_video(struct gl_video *p) if (!p->hwdec_active) { struct video_image *vimg = &p->image; - GLenum gl_target = - p->opts.use_rectangle ? GL_TEXTURE_RECTANGLE : GL_TEXTURE_2D; - struct mp_image layout = {0}; mp_image_set_params(&layout, &p->image_params); for (int n = 0; n < p->plane_count; n++) { struct texplane *plane = &vimg->planes[n]; - const struct gl_format *format = p->gl_format.planes[n]; - - plane->gl_target = gl_target; - plane->gl_format = format->format; - plane->gl_internal_format = format->internal_format; - plane->gl_type = format->type; - - p->use_integer_conversion |= gl_is_integer_format(plane->gl_format); + const struct ra_format *format = p->ra_format.planes[n]; plane->w = mp_image_plane_w(&layout, n); plane->h = mp_image_plane_h(&layout, n); plane->tex_w = plane->w + p->opts.tex_pad_x; plane->tex_h = plane->h + p->opts.tex_pad_y; - gl->GenTextures(1, &plane->gl_texture); - gl->BindTexture(gl_target, plane->gl_texture); + struct ra_tex_params params = { + .dimensions = 2, + .w = plane->tex_w, + .h = plane->tex_h, + .d = 1, + .format = format, + .src_linear = format->linear_filter, + .non_normalized = p->opts.use_rectangle, + }; - gl->TexImage2D(gl_target, 0, plane->gl_internal_format, - plane->tex_w, plane->tex_h, 0, - plane->gl_format, plane->gl_type, NULL); + plane->texture = p->ra->fns->tex_create(p->ra, ¶ms); + if (!plane->texture) + abort(); // shit happens - int filter = gl_is_integer_format(plane->gl_format) - ? GL_NEAREST : GL_LINEAR; - gl->TexParameteri(gl_target, GL_TEXTURE_MIN_FILTER, filter); - gl->TexParameteri(gl_target, GL_TEXTURE_MAG_FILTER, filter); - gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + struct ra_tex_gl *tex_gl = plane->texture->priv; - gl->BindTexture(gl_target, 0); + plane->gl_texture = tex_gl->texture; + plane->gl_target = tex_gl->target; + plane->gl_format = tex_gl->format; + plane->gl_internal_format = tex_gl->internal_format; + plane->gl_type = tex_gl->type; + + p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT; MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, plane->tex_w, plane->tex_h); @@ -958,9 +953,11 @@ static void unmap_current_image(struct gl_video *p) static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr) { for (int i = 0; i < p->num_dr_buffers; i++) { - struct dr_buffer *buf = &p->dr_buffers[i]; - if (ptr >= (uint8_t *)buf->ptr && ptr < (uint8_t *)buf->ptr + buf->size) - return buf; + struct dr_buffer *buffer = &p->dr_buffers[i]; + uint8_t *buf = buffer->buffer->data; + size_t size = buffer->buffer->size; + if (ptr >= buf && ptr < buf + size) + return buffer; } return NULL; @@ -968,18 +965,14 @@ static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr) static void gc_pending_dr_fences(struct gl_video *p, bool force) { - GL *gl = p->gl; - again:; for (int n = 0; n < p->num_dr_buffers; n++) { struct dr_buffer *buffer = &p->dr_buffers[n]; - if (!buffer->fence) + if (!buffer->mpi) continue; - GLenum res = gl->ClientWaitSync(buffer->fence, 0, 0); // non-blocking - if (res == GL_ALREADY_SIGNALED || force) { - gl->DeleteSync(buffer->fence); - buffer->fence = NULL; + bool res = p->ra->fns->poll_mapped_buffer(p->ra, buffer->buffer); + if (res || force) { // Unreferencing the image could cause gl_video_dr_free_buffer() // to be called by the talloc destructor (if it was the last // reference). This will implicitly invalidate the buffer pointer @@ -1018,8 +1011,6 @@ static void unmap_overlay(struct gl_video *p) static void uninit_video(struct gl_video *p) { - GL *gl = p->gl; - uninit_rendering(p); struct video_image *vimg = &p->image; @@ -1030,8 +1021,8 @@ static void uninit_video(struct gl_video *p) for (int n = 0; n < p->plane_count; n++) { struct texplane *plane = &vimg->planes[n]; - gl->DeleteTextures(1, &plane->gl_texture); - gl_pbo_upload_uninit(&plane->pbo); + if (plane->texture) + p->ra->fns->tex_destroy(p->ra, plane->texture); } *vimg = (struct video_image){0}; @@ -1302,7 +1293,7 @@ static void copy_img_tex(struct gl_video *p, int *offset, struct img_tex img) } if (gl_is_integer_format(img.gl_format)) { - uint64_t tex_max = 1ull << p->gl_format.component_bits; + uint64_t tex_max = 1ull << p->ra_format.component_bits; img.multiplier *= 1.0 / (tex_max - 1); } @@ -2680,8 +2671,8 @@ static void pass_render_frame_dumb(struct gl_video *p, int fbo) int index = 0; for (int i = 0; i < p->plane_count; i++) { - int cw = tex[i].type == PLANE_CHROMA ? p->gl_format.chroma_w : 1; - int ch = tex[i].type == PLANE_CHROMA ? p->gl_format.chroma_h : 1; + int cw = tex[i].type == PLANE_CHROMA ? p->ra_format.chroma_w : 1; + int ch = tex[i].type == PLANE_CHROMA ? p->ra_format.chroma_h : 1; if (p->image_params.rotate % 180 == 90) MPSWAP(int, cw, ch); @@ -3340,36 +3331,21 @@ static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t plane->flipped = mpi->stride[0] < 0; - gl->BindTexture(plane->gl_target, plane->gl_texture); + // (It's unclear whether this should be changeable on the fly.) + plane->texture->use_pbo = p->opts.pbo; struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]); - if (mapped) { - assert(mapped->pbo > 0); - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, mapped->pbo); - uintptr_t offset = mpi->planes[n] - (uint8_t *)mapped->ptr; - gl_upload_tex(gl, plane->gl_target, - plane->gl_format, plane->gl_type, - (void *)offset, mpi->stride[n], - 0, 0, plane->w, plane->h); - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - // Make sure the PBO is not reused until GL is done with it. If a - // previous operation is pending, "update" it by creating a new - // fence that will cover the previous operation as well. - gl->DeleteSync(mapped->fence); - mapped->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - if (!mapped->mpi) - mapped->mpi = mp_image_new_ref(mpi); - } else { - gl_pbo_upload_tex(&plane->pbo, gl, p->opts.pbo, plane->gl_target, - plane->gl_format, plane->gl_type, plane->w, plane->h, - mpi->planes[n], mpi->stride[n], - 0, 0, plane->w, plane->h); - } + + p->ra->fns->tex_upload(p->ra, plane->texture, mpi->planes[n], + mpi->stride[n], mapped ? mapped->buffer : NULL); + + if (mapped && !mapped->mpi) + mapped->mpi = mp_image_new_ref(mpi); + if (p->using_dr_path != !!mapped) { p->using_dr_path = !!mapped; MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no"); } - gl->BindTexture(plane->gl_target, 0); } gl_timer_stop(gl); const char *mode = p->using_dr_path ? "DR" : p->opts.pbo ? "PBO" : "naive"; @@ -3565,8 +3541,6 @@ static void init_gl(struct gl_video *p) debug_check_gl(p, "before init_gl"); - gl->Disable(GL_DITHER); - gl_video_set_gl_state(p); // Test whether we can use 10 bit. @@ -3578,6 +3552,9 @@ static void init_gl(struct gl_video *p) p->blit_timer = gl_timer_create(gl); debug_check_gl(p, "after init_gl"); + + ra_dump_tex_formats(p->ra, MSGL_DEBUG); + ra_dump_img_formats(p->ra, MSGL_DEBUG); } void gl_video_uninit(struct gl_video *p) @@ -3612,6 +3589,8 @@ void gl_video_uninit(struct gl_video *p) // Should all have been unreffed already. assert(!p->num_dr_buffers); + p->ra->fns->destroy(p->ra); + talloc_free(p->ra); talloc_free(p); } @@ -3703,14 +3682,17 @@ void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd) struct gl_video *gl_video_init(GL *gl, struct mp_log *log, struct mpv_global *g) { - if (gl->version < 210 && gl->es < 200) { - mp_err(log, "At least OpenGL 2.1 or OpenGL ES 2.0 required.\n"); + struct ra *ra = talloc_zero(NULL, struct ra); + ra->log = log; + if (ra_init_gl(ra, gl) < 0) { + talloc_free(ra); return NULL; } struct gl_video *p = talloc_ptrtype(NULL, p); *p = (struct gl_video) { .gl = gl, + .ra = ra, .global = g, .log = log, .sc = gl_sc_create(gl, log), @@ -3899,9 +3881,7 @@ void gl_video_set_hwdec(struct gl_video *p, struct gl_hwdec *hwdec) void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size) { - GL *gl = p->gl; - - if (gl->version < 440) + if (!p->ra->fns->create_mapped_buffer) return NULL; MP_TARRAY_GROW(p, p->dr_buffers, p->num_dr_buffers); @@ -3909,40 +3889,24 @@ void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size) struct dr_buffer *buffer = &p->dr_buffers[index]; *buffer = (struct dr_buffer){ - .size = size, + .buffer = p->ra->fns->create_mapped_buffer(p->ra, size), }; - unsigned flags = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | - GL_MAP_COHERENT_BIT; - - gl->GenBuffers(1, &buffer->pbo); - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buffer->pbo); - gl->BufferStorage(GL_PIXEL_UNPACK_BUFFER, size, NULL, flags); - buffer->ptr = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, flags); - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - if (!buffer->ptr) { - gl_check_error(p->gl, p->log, "mapping buffer"); - gl->DeleteBuffers(1, &buffer->pbo); + if (!buffer->buffer) { MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, index); return NULL; } - return buffer->ptr; + return buffer->buffer->data; }; void gl_video_dr_free_buffer(struct gl_video *p, void *ptr) { - GL *gl = p->gl; - for (int n = 0; n < p->num_dr_buffers; n++) { struct dr_buffer *buffer = &p->dr_buffers[n]; - if (buffer->ptr == ptr) { + if (buffer->buffer->data == ptr) { assert(!buffer->mpi); // can't be freed while it has a ref - gl->DeleteSync(buffer->fence); - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buffer->pbo); - gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER); - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - gl->DeleteBuffers(1, &buffer->pbo); + p->ra->fns->destroy_mapped_buffer(p->ra, buffer->buffer); MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, n); return; diff --git a/wscript_build.py b/wscript_build.py index 12ef2c5490..58e5743d4e 100644 --- a/wscript_build.py +++ b/wscript_build.py @@ -413,6 +413,8 @@ def build(ctx): ( "video/out/opengl/hwdec_vdpau.c", "vdpau-gl-x11" ), ( "video/out/opengl/lcms.c", "gl" ), ( "video/out/opengl/osd.c", "gl" ), + ( "video/out/opengl/ra.c", "gl" ), + ( "video/out/opengl/ra_gl.c", "gl" ), ( "video/out/opengl/user_shaders.c", "gl" ), ( "video/out/opengl/utils.c", "gl" ), ( "video/out/opengl/video.c", "gl" ), -- cgit v1.2.3