diff options
Diffstat (limited to 'video/out/opengl')
-rw-r--r-- | video/out/opengl/angle_common.c | 13 | ||||
-rw-r--r-- | video/out/opengl/angle_common.h | 13 | ||||
-rw-r--r-- | video/out/opengl/context_angle.c | 17 | ||||
-rw-r--r-- | video/out/opengl/hwdec_d3d11egl.c | 3 | ||||
-rw-r--r-- | video/out/opengl/hwdec_d3d11eglrgb.c | 5 | ||||
-rw-r--r-- | video/out/opengl/hwdec_dxva2egl.c | 12 | ||||
-rw-r--r-- | video/out/opengl/hwdec_vaglx.c | 2 | ||||
-rw-r--r-- | video/out/opengl/osd.c | 172 | ||||
-rw-r--r-- | video/out/opengl/user_shaders.c | 89 | ||||
-rw-r--r-- | video/out/opengl/user_shaders.h | 5 | ||||
-rw-r--r-- | video/out/opengl/utils.c | 75 | ||||
-rw-r--r-- | video/out/opengl/utils.h | 13 | ||||
-rw-r--r-- | video/out/opengl/video.c | 438 | ||||
-rw-r--r-- | video/out/opengl/video_shaders.c | 128 | ||||
-rw-r--r-- | video/out/opengl/video_shaders.h | 5 |
15 files changed, 513 insertions, 477 deletions
diff --git a/video/out/opengl/angle_common.c b/video/out/opengl/angle_common.c deleted file mode 100644 index 21cc924714..0000000000 --- a/video/out/opengl/angle_common.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "angle_common.h" - -// Test if Direct3D11 can be used by us. Basically, this prevents trying to use -// D3D11 on Win7, and then failing somewhere in the process. -bool d3d11_check_decoding(ID3D11Device *dev) -{ - HRESULT hr; - // We assume that NV12 is always supported, if hw decoding is supported at - // all. - UINT supported = 0; - hr = ID3D11Device_CheckFormatSupport(dev, DXGI_FORMAT_NV12, &supported); - return !FAILED(hr) && (supported & D3D11_BIND_DECODER); -} diff --git a/video/out/opengl/angle_common.h b/video/out/opengl/angle_common.h deleted file mode 100644 index 14ecd6ab3c..0000000000 --- a/video/out/opengl/angle_common.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef MP_ANGLE_COMMON_H -#define MP_ANGLE_COMMON_H - -#include <initguid.h> -#include <assert.h> -#include <windows.h> -#include <d3d11.h> - -#include <stdbool.h> - -bool d3d11_check_decoding(ID3D11Device *dev); - -#endif
\ No newline at end of file diff --git a/video/out/opengl/context_angle.c b/video/out/opengl/context_angle.c index cc14fc32c6..28515f431f 100644 --- a/video/out/opengl/context_angle.c +++ b/video/out/opengl/context_angle.c @@ -38,6 +38,7 @@ struct priv { EGLContext egl_context; EGLSurface egl_surface; bool use_es2; + PFNEGLPOSTSUBBUFFERNVPROC eglPostSubBufferNV; }; static void angle_uninit(MPGLContext *ctx) @@ -288,6 +289,11 @@ static int angle_init(struct MPGLContext *ctx, int flags) // Configure the underlying Direct3D device d3d_init(ctx); + if (strstr(exts, "EGL_NV_post_sub_buffer")) { + p->eglPostSubBufferNV = + (PFNEGLPOSTSUBBUFFERNVPROC)eglGetProcAddress("eglPostSubBufferNV"); + } + mpgl_load_functions(ctx->gl, get_proc_address, NULL, vo->log); return 0; @@ -315,7 +321,16 @@ static int angle_reconfig(struct MPGLContext *ctx) static int angle_control(MPGLContext *ctx, int *events, int request, void *arg) { - return vo_w32_control(ctx->vo, events, request, arg); + struct priv *p = ctx->priv; + int r = vo_w32_control(ctx->vo, events, request, arg); + + // Calling eglPostSubBufferNV with a 0-sized region doesn't present a frame + // or block, but it does update the swapchain to match the window size + // See: https://groups.google.com/d/msg/angleproject/RvyVkjRCQGU/gfKfT64IAgAJ + if ((*events & VO_EVENT_RESIZE) && p->eglPostSubBufferNV) + p->eglPostSubBufferNV(p->egl_display, p->egl_surface, 0, 0, 0, 0); + + return r; } static void angle_swap_buffers(MPGLContext *ctx) diff --git a/video/out/opengl/hwdec_d3d11egl.c b/video/out/opengl/hwdec_d3d11egl.c index 549d3f5cac..07333c372e 100644 --- a/video/out/opengl/hwdec_d3d11egl.c +++ b/video/out/opengl/hwdec_d3d11egl.c @@ -23,7 +23,6 @@ #include <EGL/egl.h> #include <EGL/eglext.h> -#include "angle_common.h" #include "angle_dynamic.h" #include "common/common.h" @@ -31,6 +30,7 @@ #include "osdep/windows_utils.h" #include "hwdec.h" #include "video/hwdec.h" +#include "video/decode/d3d.h" #ifndef EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE #define EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE 0x3AAB @@ -195,6 +195,7 @@ static int create(struct gl_hwdec *hw) .type = HWDEC_D3D11VA, .driver_name = hw->driver->name, .ctx = p->d3d11_device, + .download_image = d3d11_download_image, }; hwdec_devices_add(hw->devs, &p->hwctx); diff --git a/video/out/opengl/hwdec_d3d11eglrgb.c b/video/out/opengl/hwdec_d3d11eglrgb.c index 2e61189154..be8057cde3 100644 --- a/video/out/opengl/hwdec_d3d11eglrgb.c +++ b/video/out/opengl/hwdec_d3d11eglrgb.c @@ -23,7 +23,6 @@ #include <EGL/egl.h> #include <EGL/eglext.h> -#include "angle_common.h" #include "angle_dynamic.h" #include "common/common.h" @@ -31,6 +30,7 @@ #include "osdep/windows_utils.h" #include "hwdec.h" #include "video/hwdec.h" +#include "video/decode/d3d.h" #ifndef EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE #define EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE 0x3AAB @@ -87,6 +87,8 @@ static int create(struct gl_hwdec *hw) if (!angle_load()) return -1; + d3d_load_dlls(); + EGLDisplay egl_display = eglGetCurrentDisplay(); if (!egl_display) return -1; @@ -104,7 +106,6 @@ static int create(struct gl_hwdec *hw) p->egl_display = egl_display; - HANDLE d3d11_dll = GetModuleHandleW(L"d3d11.dll"); if (!d3d11_dll) { if (!hw->probing) MP_ERR(hw, "Failed to load D3D11 library\n"); diff --git a/video/out/opengl/hwdec_dxva2egl.c b/video/out/opengl/hwdec_dxva2egl.c index d67a85bff5..f206b962d1 100644 --- a/video/out/opengl/hwdec_dxva2egl.c +++ b/video/out/opengl/hwdec_dxva2egl.c @@ -29,11 +29,11 @@ #include "osdep/windows_utils.h" #include "hwdec.h" #include "video/hwdec.h" +#include "video/decode/d3d.h" struct priv { struct mp_hwdec_ctx hwctx; - HMODULE d3d9_dll; IDirect3D9Ex *d3d9ex; IDirect3DDevice9Ex *device9ex; IDirect3DQuery9 *query9; @@ -89,9 +89,6 @@ static void destroy(struct gl_hwdec *hw) if (p->d3d9ex) IDirect3D9Ex_Release(p->d3d9ex); - - if (p->d3d9_dll) - FreeLibrary(p->d3d9_dll); } static int create(struct gl_hwdec *hw) @@ -99,6 +96,8 @@ static int create(struct gl_hwdec *hw) if (!angle_load()) return -1; + d3d_load_dlls(); + EGLDisplay egl_display = eglGetCurrentDisplay(); if (!egl_display) return -1; @@ -118,15 +117,14 @@ static int create(struct gl_hwdec *hw) p->egl_display = egl_display; - p->d3d9_dll = LoadLibraryW(L"d3d9.dll"); - if (!p->d3d9_dll) { + if (!d3d9_dll) { MP_FATAL(hw, "Failed to load \"d3d9.dll\": %s\n", mp_LastError_to_str()); goto fail; } HRESULT (WINAPI *Direct3DCreate9Ex)(UINT SDKVersion, IDirect3D9Ex **ppD3D); - Direct3DCreate9Ex = (void *)GetProcAddress(p->d3d9_dll, "Direct3DCreate9Ex"); + Direct3DCreate9Ex = (void *)GetProcAddress(d3d9_dll, "Direct3DCreate9Ex"); if (!Direct3DCreate9Ex) { MP_FATAL(hw, "Direct3D 9Ex not supported\n"); goto fail; diff --git a/video/out/opengl/hwdec_vaglx.c b/video/out/opengl/hwdec_vaglx.c index 2e3017c193..0400604067 100644 --- a/video/out/opengl/hwdec_vaglx.c +++ b/video/out/opengl/hwdec_vaglx.c @@ -185,7 +185,7 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, 0, 0, hw_image->w, hw_image->h, 0, 0, hw_image->w, hw_image->h, NULL, 0, - va_get_colorspace_flag(hw_image->params.colorspace)); + va_get_colorspace_flag(hw_image->params.color.space)); CHECK_VA_STATUS(p, "vaPutSurface()"); va_unlock(p->ctx); diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c index 7b1ec162fd..5df5bb199a 100644 --- a/video/out/opengl/osd.c +++ b/video/out/opengl/osd.c @@ -21,8 +21,6 @@ #include <libavutil/common.h> -#include "video/out/bitmap_packer.h" - #include "formats.h" #include "utils.h" #include "osd.h" @@ -53,20 +51,17 @@ struct mpgl_osd_part { int change_id; GLuint texture; int w, h; - GLuint buffer; + struct gl_pbo_upload pbo; int num_subparts; int prev_num_subparts; struct sub_bitmap *subparts; struct vertex *vertices; - struct bitmap_packer *packer; - void *upload; }; struct mpgl_osd { struct mp_log *log; struct osd_state *osd; GL *gl; - GLint max_tex_wh; bool use_pbo; struct mpgl_osd_part *parts[MAX_OSD_PARTS]; const struct gl_format *fmt_table[SUBBITMAP_COUNT]; @@ -89,21 +84,11 @@ struct mpgl_osd *mpgl_osd_init(GL *gl, struct mp_log *log, struct osd_state *osd .scratch = talloc_zero_size(ctx, 1), }; - gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &ctx->max_tex_wh); - ctx->fmt_table[SUBBITMAP_LIBASS] = gl_find_unorm_format(gl, 1, 1); ctx->fmt_table[SUBBITMAP_RGBA] = gl_find_unorm_format(gl, 1, 4); - for (int n = 0; n < MAX_OSD_PARTS; n++) { - struct mpgl_osd_part *p = talloc_ptrtype(ctx, p); - *p = (struct mpgl_osd_part) { - .packer = talloc_struct(p, struct bitmap_packer, { - .w_max = ctx->max_tex_wh, - .h_max = ctx->max_tex_wh, - }), - }; - ctx->parts[n] = p; - } + for (int n = 0; n < MAX_OSD_PARTS; n++) + ctx->parts[n] = talloc_zero(ctx, struct mpgl_osd_part); for (int n = 0; n < SUBBITMAP_COUNT; n++) ctx->formats[n] = !!ctx->fmt_table[n]; @@ -125,9 +110,7 @@ void mpgl_osd_destroy(struct mpgl_osd *ctx) for (int n = 0; n < MAX_OSD_PARTS; n++) { struct mpgl_osd_part *p = ctx->parts[n]; gl->DeleteTextures(1, &p->texture); - if (gl->DeleteBuffers) - gl->DeleteBuffers(1, &p->buffer); - talloc_free(p->upload); + gl_pbo_upload_uninit(&p->pbo); } talloc_free(ctx); } @@ -137,87 +120,6 @@ void mpgl_osd_set_options(struct mpgl_osd *ctx, bool pbo) ctx->use_pbo = pbo; } -static bool upload(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, - struct sub_bitmaps *imgs, bool pbo) -{ - GL *gl = ctx->gl; - bool success = true; - const struct gl_format *fmt = ctx->fmt_table[imgs->format]; - size_t pix_stride = gl_bytes_per_pixel(fmt->format, fmt->type); - size_t buffer_size = pix_stride * osd->h * osd->w; - - char *data = NULL; - void *texdata = NULL; - - if (pbo) { - if (!osd->buffer) { - gl->GenBuffers(1, &osd->buffer); - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, osd->buffer); - gl->BufferData(GL_PIXEL_UNPACK_BUFFER, buffer_size, NULL, - GL_DYNAMIC_COPY); - } - - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, osd->buffer); - data = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, buffer_size, - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); - if (!data) { - success = false; - goto done; - } - } else { - if (!imgs->packed) { - if (!osd->upload) - osd->upload = talloc_size(NULL, buffer_size); - data = osd->upload; - texdata = data; - } - } - - int copy_w = 0; - int copy_h = 0; - size_t stride = 0; - if (imgs->packed) { - copy_w = imgs->packed_w; - copy_h = imgs->packed_h; - stride = imgs->packed->stride[0]; - texdata = imgs->packed->planes[0]; - if (pbo) { - memcpy_pic(data, texdata, pix_stride * copy_w, copy_h, - osd->w * pix_stride, stride); - stride = osd->w * pix_stride; - texdata = NULL; - } - } else { - struct pos bb[2]; - packer_get_bb(osd->packer, bb); - copy_w = bb[1].x; - copy_h = bb[1].y; - stride = osd->w * pix_stride; - packer_copy_subbitmaps(osd->packer, imgs, data, pix_stride, stride); - } - - if (pbo) { - if (!gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER)) { - success = false; - goto done; - } - } - - gl_upload_tex(gl, GL_TEXTURE_2D, fmt->format, fmt->type, texdata, stride, - 0, 0, copy_w, copy_h); - - if (pbo) - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - -done: - if (!success) { - MP_FATAL(ctx, "Error: can't upload subtitles! " - "Remove the 'pbo' suboption.\n"); - } - - return success; -} - static int next_pow2(int v) { for (int x = 0; x < 30; x++) { @@ -231,31 +133,12 @@ static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, struct sub_bitmaps *imgs) { GL *gl = ctx->gl; + bool ok = false; - int req_w = 0; - int req_h = 0; - - if (imgs->packed) { - req_w = next_pow2(imgs->packed_w); - req_h = next_pow2(imgs->packed_h); - } else { - // assume 2x2 filter on scaling - osd->packer->padding = imgs->scaled; - int r = packer_pack_from_subbitmaps(osd->packer, imgs); - if (r < 0) { - MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " - "supported size %dx%d.\n", osd->packer->w_max, osd->packer->h_max); - return false; - } - req_w = osd->packer->w; - req_h = osd->packer->h; - } + assert(imgs->packed); - if (req_w > ctx->max_tex_wh || req_h > ctx->max_tex_wh) { - MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " - "supported size %dx%d.\n", ctx->max_tex_wh, ctx->max_tex_wh); - return false; - } + int req_w = next_pow2(imgs->packed_w); + int req_h = next_pow2(imgs->packed_h); const struct gl_format *fmt = ctx->fmt_table[imgs->format]; assert(fmt); @@ -270,6 +153,17 @@ static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, osd->w = FFMAX(32, req_w); osd->h = FFMAX(32, req_h); + MP_VERBOSE(ctx, "Reallocating OSD texture to %dx%d.\n", osd->w, osd->h); + + GLint max_wh; + gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &max_wh); + + if (osd->w > max_wh || osd->h > max_wh) { + MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " + "supported size %dx%d.\n", max_wh, max_wh); + goto done; + } + gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, osd->w, osd->h, 0, fmt->format, fmt->type, NULL); @@ -277,24 +171,17 @@ static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - if (gl->DeleteBuffers) - gl->DeleteBuffers(1, &osd->buffer); - osd->buffer = 0; - - talloc_free(osd->upload); - osd->upload = NULL; } - bool uploaded = false; - if (ctx->use_pbo) - uploaded = upload(ctx, osd, imgs, true); - if (!uploaded) - upload(ctx, osd, imgs, false); + gl_pbo_upload_tex(&osd->pbo, gl, ctx->use_pbo, GL_TEXTURE_2D, fmt->format, + fmt->type, osd->w, osd->h, imgs->packed->planes[0], + imgs->packed->stride[0], 0, 0, + imgs->packed_w, imgs->packed_h); + ok = true; +done: gl->BindTexture(GL_TEXTURE_2D, 0); - - return true; + return ok; } static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs) @@ -319,13 +206,6 @@ static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs) MP_TARRAY_GROW(osd, osd->subparts, osd->num_subparts); memcpy(osd->subparts, imgs->parts, osd->num_subparts * sizeof(osd->subparts[0])); - - if (!imgs->packed) { - for (int n = 0; n < osd->num_subparts; n++) { - osd->subparts[n].src_x = osd->packer->result[n].x; - osd->subparts[n].src_y = osd->packer->result[n].y; - } - } } static void write_quad(struct vertex *va, struct gl_transform t, diff --git a/video/out/opengl/user_shaders.c b/video/out/opengl/user_shaders.c index 8f915a56e3..112012f04f 100644 --- a/video/out/opengl/user_shaders.c +++ b/video/out/opengl/user_shaders.c @@ -16,6 +16,7 @@ */ #include <ctype.h> +#include <assert.h> #include "user_shaders.h" @@ -69,6 +70,94 @@ static bool parse_rpn_szexpr(struct bstr line, struct szexp out[MAX_SZEXP_SIZE]) return true; } +// Returns whether successful. 'result' is left untouched on failure +bool eval_szexpr(struct mp_log *log, void *priv, + bool (*lookup)(void *priv, struct bstr var, float size[2]), + struct szexp expr[MAX_SZEXP_SIZE], float *result) +{ + float stack[MAX_SZEXP_SIZE] = {0}; + int idx = 0; // points to next element to push + + for (int i = 0; i < MAX_SZEXP_SIZE; i++) { + switch (expr[i].tag) { + case SZEXP_END: + goto done; + + case SZEXP_CONST: + // Since our SZEXPs are bound by MAX_SZEXP_SIZE, it should be + // impossible to overflow the stack + assert(idx < MAX_SZEXP_SIZE); + stack[idx++] = expr[i].val.cval; + continue; + + case SZEXP_OP1: + if (idx < 1) { + mp_warn(log, "Stack underflow in RPN expression!\n"); + return false; + } + + switch (expr[i].val.op) { + case SZEXP_OP_NOT: stack[idx-1] = !stack[idx-1]; break; + default: abort(); + } + continue; + + case SZEXP_OP2: + if (idx < 2) { + mp_warn(log, "Stack underflow in RPN expression!\n"); + return false; + } + + // Pop the operands in reverse order + float op2 = stack[--idx]; + float op1 = stack[--idx]; + float res = 0.0; + switch (expr[i].val.op) { + case SZEXP_OP_ADD: res = op1 + op2; break; + case SZEXP_OP_SUB: res = op1 - op2; break; + case SZEXP_OP_MUL: res = op1 * op2; break; + case SZEXP_OP_DIV: res = op1 / op2; break; + case SZEXP_OP_GT: res = op1 > op2; break; + case SZEXP_OP_LT: res = op1 < op2; break; + default: abort(); + } + + if (!isfinite(res)) { + mp_warn(log, "Illegal operation in RPN expression!\n"); + return false; + } + + stack[idx++] = res; + continue; + + case SZEXP_VAR_W: + case SZEXP_VAR_H: { + struct bstr name = expr[i].val.varname; + float size[2]; + + if (!lookup(priv, name, size)) { + mp_warn(log, "Variable %.*s not found in RPN expression!\n", + BSTR_P(name)); + return false; + } + + stack[idx++] = (expr[i].tag == SZEXP_VAR_W) ? size[0] : size[1]; + continue; + } + } + } + +done: + // Return the single stack element + if (idx != 1) { + mp_warn(log, "Malformed stack after RPN expression!\n"); + return false; + } + + *result = stack[0]; + return true; +} + // Returns false if no more shaders could be parsed bool parse_user_shader_pass(struct mp_log *log, struct bstr *body, struct gl_user_shader *out) diff --git a/video/out/opengl/user_shaders.h b/video/out/opengl/user_shaders.h index b8c287b6bd..7527eb3ba2 100644 --- a/video/out/opengl/user_shaders.h +++ b/video/out/opengl/user_shaders.h @@ -71,4 +71,9 @@ struct gl_user_shader { bool parse_user_shader_pass(struct mp_log *log, struct bstr *body, struct gl_user_shader *out); +// Evaluate a szexp, given a lookup function for named textures +bool eval_szexpr(struct mp_log *log, void *priv, + bool (*lookup)(void *priv, struct bstr var, float size[2]), + struct szexp expr[MAX_SZEXP_SIZE], float *result); + #endif diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index 73b411e66c..72a748a82d 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -109,8 +109,10 @@ mp_image_t *gl_read_window_contents(GL *gl) mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, vp[2], vp[3]); if (!image) return NULL; + gl->BindFramebuffer(GL_FRAMEBUFFER, gl->main_fb); + GLenum obj = gl->main_fb ? GL_COLOR_ATTACHMENT0 : GL_FRONT; gl->PixelStorei(GL_PACK_ALIGNMENT, 1); - gl->ReadBuffer(GL_FRONT); + gl->ReadBuffer(obj); //flip image while reading (and also avoid stride-related trouble) for (int y = 0; y < vp[3]; y++) { gl->ReadPixels(vp[0], vp[1] + vp[3] - y - 1, vp[2], 1, @@ -118,6 +120,7 @@ mp_image_t *gl_read_window_contents(GL *gl) image->planes[0] + y * image->stride[0]); } gl->PixelStorei(GL_PACK_ALIGNMENT, 4); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); return image; } @@ -1121,3 +1124,73 @@ void gl_timer_stop(struct gl_timer *timer) if (gl->EndQuery) gl->EndQuery(GL_TIME_ELAPSED); } + +// Upload a texture, going through a PBO. PBO supposedly can facilitate +// asynchronous copy from CPU to GPU, so this is an optimization. Note that +// changing format/type/tex_w/tex_h or reusing the PBO in the same frame can +// ruin performance. +// This call is like gl_upload_tex(), plus PBO management/use. +// target, format, type, dataptr, stride, x, y, w, h: texture upload params +// (see gl_upload_tex()) +// tex_w, tex_h: maximum size of the used texture +// use_pbo: for convenience, if false redirects the call to gl_upload_tex +void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo, + GLenum target, GLenum format, GLenum type, + int tex_w, int tex_h, const void *dataptr, int stride, + int x, int y, int w, int h) +{ + assert(x >= 0 && y >= 0 && w >= 0 && h >= 0); + assert(x + w <= tex_w && y + h <= tex_h); + + if (!use_pbo || !gl->MapBufferRange) + goto no_pbo; + + size_t pix_stride = gl_bytes_per_pixel(format, type); + size_t buffer_size = pix_stride * tex_w * tex_h; + size_t needed_size = pix_stride * w * h; + + if (buffer_size != pbo->buffer_size) + gl_pbo_upload_uninit(pbo); + + if (!pbo->buffers[0]) { + pbo->gl = gl; + pbo->buffer_size = buffer_size; + gl->GenBuffers(2, &pbo->buffers[0]); + for (int n = 0; n < 2; n++) { + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffers[n]); + gl->BufferData(GL_PIXEL_UNPACK_BUFFER, buffer_size, NULL, + GL_DYNAMIC_COPY); + } + } + + pbo->index = (pbo->index + 1) % 2; + + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffers[pbo->index]); + void *data = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, needed_size, + GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + if (!data) + goto no_pbo; + + memcpy_pic(data, dataptr, pix_stride * w, h, pix_stride * w, stride); + + if (!gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER)) { + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + goto no_pbo; + } + + gl_upload_tex(gl, target, format, type, NULL, pix_stride * w, x, y, w, h); + + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + return; + +no_pbo: + gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h); +} + +void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo) +{ + if (pbo->gl) + pbo->gl->DeleteBuffers(2, &pbo->buffers[0]); + *pbo = (struct gl_pbo_upload){0}; +} diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 9b4fd8471d..ec54d19b8a 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -182,4 +182,17 @@ uint64_t gl_timer_last_us(struct gl_timer *timer); uint64_t gl_timer_avg_us(struct gl_timer *timer); uint64_t gl_timer_peak_us(struct gl_timer *timer); +struct gl_pbo_upload { + GL *gl; + int index; + GLuint buffers[2]; + size_t buffer_size; +}; + +void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo, + GLenum target, GLenum format, GLenum type, + int tex_w, int tex_h, const void *dataptr, int stride, + int x, int y, int w, int h); +void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo); + #endif diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index f46fdc1c9f..468bee90b5 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -41,7 +41,6 @@ #include "user_shaders.h" #include "video/out/filter_kernels.h" #include "video/out/aspect.h" -#include "video/out/bitmap_packer.h" #include "video/out/dither.h" #include "video/out/vo.h" @@ -97,13 +96,13 @@ struct texplane { GLenum gl_format; GLenum gl_type; GLuint gl_texture; - int gl_buffer; char swizzle[5]; + bool flipped; + struct gl_pbo_upload pbo; }; struct video_image { struct texplane planes[4]; - bool image_flipped; struct mp_image *mpi; // original input image bool hwdec_mapped; }; @@ -676,7 +675,7 @@ static int pass_bind(struct gl_video *p, struct img_tex tex) } // Rotation by 90° and flipping. -static void get_plane_source_transform(struct gl_video *p, int w, int h, +static void get_plane_source_transform(struct gl_video *p, struct texplane *t, struct gl_transform *out_tr) { struct gl_transform tr = identity_trans; @@ -689,11 +688,11 @@ static void get_plane_source_transform(struct gl_video *p, int w, int h, // basically, recenter to keep the whole image in view float b[2] = {1, 1}; gl_transform_vec(rot, &b[0], &b[1]); - tr.t[0] += b[0] < 0 ? w : 0; - tr.t[1] += b[1] < 0 ? h : 0; + tr.t[0] += b[0] < 0 ? t->w : 0; + tr.t[1] += b[1] < 0 ? t->h : 0; - if (p->image.image_flipped) { - struct gl_transform flip = {{{1, 0}, {0, -1}}, {0, h}}; + if (t->flipped) { + struct gl_transform flip = {{{1, 0}, {0, -1}}, {0, t->h}}; gl_transform_trans(flip, &tr); } @@ -730,7 +729,7 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, // The existing code assumes we just have a single tex multiplier for // all of the planes. This may change in the future - float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.colorspace, + float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.color.space, p->image_desc.component_bits, p->image_desc.component_full_bits); @@ -764,7 +763,7 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, .components = p->image_desc.components[n], }; snprintf(tex[n].swizzle, sizeof(tex[n].swizzle), "%s", t->swizzle); - get_plane_source_transform(p, t->w, t->h, &tex[n].transform); + get_plane_source_transform(p, t, &tex[n].transform); if (p->image_params.rotate % 180 == 90) MPSWAP(int, tex[n].w, tex[n].h); @@ -794,7 +793,7 @@ static void init_video(struct gl_video *p) mp_image_params_guess_csp(&p->image_params); int eq_caps = MP_CSP_EQ_CAPS_GAMMA; - if (p->image_params.colorspace != MP_CSP_BT_2020_C) + if (p->image_params.color.space != MP_CSP_BT_2020_C) eq_caps |= MP_CSP_EQ_CAPS_COLORMATRIX; if (p->image_desc.flags & MP_IMGFLAG_XYZ) eq_caps |= MP_CSP_EQ_CAPS_BRIGHTNESS; @@ -879,7 +878,7 @@ static void uninit_video(struct gl_video *p) struct texplane *plane = &vimg->planes[n]; gl->DeleteTextures(1, &plane->gl_texture); - gl->DeleteBuffers(1, &plane->gl_buffer); + gl_pbo_upload_uninit(&plane->pbo); } *vimg = (struct video_image){0}; @@ -1239,6 +1238,9 @@ static void load_shader(struct gl_video *p, struct bstr body) gl_sc_uniform_f(p->sc, "frame", p->frames_uploaded); gl_sc_uniform_vec2(p->sc, "image_size", (GLfloat[]){p->image_params.w, p->image_params.h}); + gl_sc_uniform_vec2(p->sc, "target_size", + (GLfloat[]){p->dst_rect.x1 - p->dst_rect.x0, + p->dst_rect.y1 - p->dst_rect.y0}); } static const char *get_custom_shader_fn(struct gl_video *p, const char *body) @@ -1542,112 +1544,40 @@ static void user_hook_old(struct gl_video *p, struct img_tex tex, GLSLF("color = %s(HOOKED_raw, HOOKED_pos, HOOKED_size);\n", fn_name); } -// Returns whether successful. 'result' is left untouched on failure -static bool eval_szexpr(struct gl_video *p, struct img_tex tex, - struct szexp expr[MAX_SZEXP_SIZE], - float *result) -{ - float stack[MAX_SZEXP_SIZE] = {0}; - int idx = 0; // points to next element to push - - for (int i = 0; i < MAX_SZEXP_SIZE; i++) { - switch (expr[i].tag) { - case SZEXP_END: - goto done; - - case SZEXP_CONST: - // Since our SZEXPs are bound by MAX_SZEXP_SIZE, it should be - // impossible to overflow the stack - assert(idx < MAX_SZEXP_SIZE); - stack[idx++] = expr[i].val.cval; - continue; - - case SZEXP_OP1: - if (idx < 1) { - MP_WARN(p, "Stack underflow in RPN expression!\n"); - return false; - } - - switch (expr[i].val.op) { - case SZEXP_OP_NOT: stack[idx-1] = !stack[idx-1]; break; - default: abort(); - } - continue; - - case SZEXP_OP2: - if (idx < 2) { - MP_WARN(p, "Stack underflow in RPN expression!\n"); - return false; - } - - // Pop the operands in reverse order - float op2 = stack[--idx]; - float op1 = stack[--idx]; - float res = 0.0; - switch (expr[i].val.op) { - case SZEXP_OP_ADD: res = op1 + op2; break; - case SZEXP_OP_SUB: res = op1 - op2; break; - case SZEXP_OP_MUL: res = op1 * op2; break; - case SZEXP_OP_DIV: res = op1 / op2; break; - case SZEXP_OP_GT: res = op1 > op2; break; - case SZEXP_OP_LT: res = op1 < op2; break; - default: abort(); - } - - if (!isfinite(res)) { - MP_WARN(p, "Illegal operation in RPN expression!\n"); - return false; - } - - stack[idx++] = res; |