diff options
Diffstat (limited to 'video/out/opengl')
-rw-r--r-- | video/out/opengl/common.c | 47 | ||||
-rw-r--r-- | video/out/opengl/common.h | 8 | ||||
-rw-r--r-- | video/out/opengl/drm_egl.c | 439 | ||||
-rw-r--r-- | video/out/opengl/hwdec.c | 2 | ||||
-rw-r--r-- | video/out/opengl/hwdec.h | 2 | ||||
-rw-r--r-- | video/out/opengl/hwdec_vaegl.c | 2 | ||||
-rw-r--r-- | video/out/opengl/hwdec_vaglx.c | 8 | ||||
-rw-r--r-- | video/out/opengl/hwdec_vdpau.c | 2 | ||||
-rw-r--r-- | video/out/opengl/nnedi3.c | 219 | ||||
-rw-r--r-- | video/out/opengl/nnedi3.h | 47 | ||||
-rw-r--r-- | video/out/opengl/nnedi3_weights.bin | bin | 0 -> 161280 bytes | |||
-rw-r--r-- | video/out/opengl/superxbr.c | 234 | ||||
-rw-r--r-- | video/out/opengl/superxbr.h | 36 | ||||
-rw-r--r-- | video/out/opengl/utils.c | 59 | ||||
-rw-r--r-- | video/out/opengl/utils.h | 5 | ||||
-rw-r--r-- | video/out/opengl/video.c | 358 | ||||
-rw-r--r-- | video/out/opengl/video.h | 7 | ||||
-rw-r--r-- | video/out/opengl/w32.c | 58 | ||||
-rw-r--r-- | video/out/opengl/x11.c | 4 | ||||
-rw-r--r-- | video/out/opengl/x11egl.c | 2 |
20 files changed, 1457 insertions, 82 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c index f045184373..54389e15cf 100644 --- a/video/out/opengl/common.c +++ b/video/out/opengl/common.c @@ -83,9 +83,7 @@ struct gl_functions { const char *extension; // introduced with this extension in any version int provides; // bitfield of MPGL_CAP_* constants int ver_core; // introduced as required function - int ver_removed; // removed as required function (no replacement) int ver_es_core; // introduced as required GL ES function - int ver_es_removed; // removed as required function (no replacement) const struct gl_function *functions; }; @@ -173,6 +171,7 @@ static const struct gl_functions gl_functions[] = { .ver_core = 300, .ver_es_core = 300, .functions = (const struct gl_function[]) { + DEF_FN(BindBufferBase), DEF_FN(GetStringi), // for ES 3.0 DEF_FN(GetTexLevelParameteriv), @@ -230,6 +229,16 @@ static const struct gl_functions gl_functions[] = { .extension = "GL_ARB_texture_rg", .provides = MPGL_CAP_TEX_RG, }, + { + .ver_core = 320, + .extension = "GL_ARB_sync", + .functions = (const struct gl_function[]) { + DEF_FN(FenceSync), + DEF_FN(ClientWaitSync), + DEF_FN(DeleteSync), + {0} + }, + }, // Swap control, always an OS specific extension // The OSX code loads this manually. { @@ -305,6 +314,16 @@ static const struct gl_functions gl_functions[] = { {0} }, }, + // uniform buffer object extensions, requires OpenGL 3.1. + { + .ver_core = 310, + .extension = "GL_ARB_uniform_buffer_object", + .functions = (const struct gl_function[]) { + DEF_FN(GetUniformBlockIndex), + DEF_FN(UniformBlockBinding), + {0} + }, + }, }; #undef FN_OFFS @@ -389,10 +408,6 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), const struct gl_functions *section = &gl_functions[n]; int version = gl->es ? gl->es : gl->version; int ver_core = gl->es ? section->ver_es_core : section->ver_core; - int ver_removed = gl->es ? section->ver_es_removed : section->ver_removed; - - if (ver_removed && version >= ver_removed) - continue; // NOTE: Function entrypoints can exist, even if they do not work. // We must always check extension strings and versions. @@ -450,16 +465,12 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), if (gl->es >= 300) gl->glsl_version = 300; } else { - if (gl->version >= 200) - gl->glsl_version = 110; - if (gl->version >= 210) - gl->glsl_version = 120; - if (gl->version >= 300) - gl->glsl_version = 130; - // Specifically needed for OSX (normally we request 3.0 contexts only, but - // OSX always creates 3.2 contexts when requesting a core context). - if (gl->version >= 320) - gl->glsl_version = 150; + gl->glsl_version = 110; + int glsl_major = 0, glsl_minor = 0; + if (sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2) + gl->glsl_version = glsl_major * 100 + glsl_minor; + // GLSL 400 defines "sample" as keyword - breaks custom shaders. + gl->glsl_version = MPMIN(gl->glsl_version, 330); } if (is_software_gl(gl)) { @@ -492,6 +503,7 @@ void mpgl_load_functions(GL *gl, void *(*getProcAddress)(const GLubyte *), extern const struct mpgl_driver mpgl_driver_x11; extern const struct mpgl_driver mpgl_driver_x11egl; +extern const struct mpgl_driver mpgl_driver_drm_egl; extern const struct mpgl_driver mpgl_driver_cocoa; extern const struct mpgl_driver mpgl_driver_wayland; extern const struct mpgl_driver mpgl_driver_w32; @@ -513,6 +525,9 @@ static const struct mpgl_driver *const backends[] = { #if HAVE_EGL_X11 &mpgl_driver_x11egl, #endif +#if HAVE_EGL_DRM + &mpgl_driver_drm_egl, +#endif #if HAVE_GL_X11 &mpgl_driver_x11, #endif diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h index 35d303e96c..d87be595ba 100644 --- a/video/out/opengl/common.h +++ b/video/out/opengl/common.h @@ -192,6 +192,7 @@ struct GL { void (GLAPIENTRY *GenBuffers)(GLsizei, GLuint *); void (GLAPIENTRY *DeleteBuffers)(GLsizei, const GLuint *); void (GLAPIENTRY *BindBuffer)(GLenum, GLuint); + void (GLAPIENTRY *BindBufferBase)(GLenum, GLuint, GLuint); GLvoid * (GLAPIENTRY * MapBuffer)(GLenum, GLenum); GLboolean (GLAPIENTRY *UnmapBuffer)(GLenum); void (GLAPIENTRY *BufferData)(GLenum, intptr_t, const GLvoid *, GLenum); @@ -244,6 +245,10 @@ struct GL { void (GLAPIENTRY *UniformMatrix3fv)(GLint, GLsizei, GLboolean, const GLfloat *); + GLsync (GLAPIENTRY *FenceSync)(GLenum, GLbitfield); + GLenum (GLAPIENTRY *ClientWaitSync)(GLsync, GLbitfield, GLuint64); + void (GLAPIENTRY *DeleteSync)(GLsync sync); + void (GLAPIENTRY *VDPAUInitNV)(const GLvoid *, const GLvoid *); void (GLAPIENTRY *VDPAUFiniNV)(void); GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterOutputSurfaceNV) @@ -256,6 +261,9 @@ struct GL { GLint (GLAPIENTRY *GetVideoSync)(GLuint *); GLint (GLAPIENTRY *WaitVideoSync)(GLint, GLint, unsigned int *); + GLuint (GLAPIENTRY *GetUniformBlockIndex)(GLuint, const GLchar *); + void (GLAPIENTRY *UniformBlockBinding)(GLuint, GLuint, GLuint); + void (GLAPIENTRY *DebugMessageCallback)(MP_GLDEBUGPROC callback, const void *userParam); diff --git a/video/out/opengl/drm_egl.c b/video/out/opengl/drm_egl.c new file mode 100644 index 0000000000..f8e528201a --- /dev/null +++ b/video/out/opengl/drm_egl.c @@ -0,0 +1,439 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + * + * You can alternatively redistribute this file and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + */ + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <sys/poll.h> +#include <time.h> +#include <unistd.h> + +#include <gbm.h> +#include <EGL/egl.h> +#include <EGL/eglext.h> +#include <GL/gl.h> + +#include "common.h" +#include "common/common.h" +#include "video/out/drm_common.h" + +#define USE_MASTER 0 + +struct framebuffer +{ + struct gbm_bo *bo; + int width, height; + int fd; + int id; +}; + +struct gbm +{ + struct gbm_surface *surface; + struct gbm_device *device; + struct gbm_bo *bo; + struct gbm_bo *next_bo; +}; + +struct egl +{ + EGLDisplay display; + EGLContext context; + EGLSurface surface; +}; + +struct priv { + struct kms *kms; + + drmEventContext ev; + drmModeCrtc *old_crtc; + + struct egl egl; + struct gbm gbm; + struct framebuffer fb; + + bool active; + bool waiting_for_flip; + + bool vt_switcher_active; + struct vt_switcher vt_switcher; +}; + +static EGLConfig select_fb_config_egl(struct MPGLContext *ctx, bool es) +{ + struct priv *p = ctx->priv; + const EGLint attributes[] = { + EGL_SURFACE_TYPE, EGL_WINDOW_BIT, + EGL_RED_SIZE, 8, + EGL_GREEN_SIZE, 8, + EGL_BLUE_SIZE, 8, + EGL_ALPHA_SIZE, 0, + EGL_DEPTH_SIZE, 1, + EGL_RENDERABLE_TYPE, es ? EGL_OPENGL_ES2_BIT : EGL_OPENGL_BIT, + EGL_NONE + }; + EGLint config_count; + EGLConfig config; + if (!eglChooseConfig(p->egl.display, attributes, &config, 1, &config_count)) { + MP_FATAL(ctx->vo, "Failed to configure EGL.\n"); + return NULL; + } + if (!config_count) { + MP_FATAL(ctx->vo, "Could not find EGL configuration!\n"); + return NULL; + } + return config; +} + +static bool init_egl(struct MPGLContext *ctx, bool es) +{ + struct priv *p = ctx->priv; + MP_VERBOSE(ctx->vo, "Initializing EGL\n"); + p->egl.display = eglGetDisplay(p->gbm.device); + if (p->egl.display == EGL_NO_DISPLAY) { + MP_ERR(ctx->vo, "Failed to get EGL display.\n"); + return false; + } + if (!eglInitialize(p->egl.display, NULL, NULL)) { + MP_ERR(ctx->vo, "Failed to initialize EGL.\n"); + return false; + } + if (!eglBindAPI(es ? EGL_OPENGL_ES_API : EGL_OPENGL_API)) { + MP_ERR(ctx->vo, "Failed to set EGL API version.\n"); + return false; + } + EGLConfig config = select_fb_config_egl(ctx, es); + if (!config) { + MP_ERR(ctx->vo, "Failed to configure EGL.\n"); + return false; + } + p->egl.context = eglCreateContext(p->egl.display, config, EGL_NO_CONTEXT, NULL); + if (!p->egl.context) { + MP_ERR(ctx->vo, "Failed to create EGL context.\n"); + return false; + } + MP_VERBOSE(ctx->vo, "Initializing EGL surface\n"); + p->egl.surface = eglCreateWindowSurface(p->egl.display, config, p->gbm.surface, NULL); + if (p->egl.surface == EGL_NO_SURFACE) { + MP_ERR(ctx->vo, "Failed to create EGL surface.\n"); + return false; + } + return true; +} + +static bool init_gbm(struct MPGLContext *ctx) +{ + struct priv *p = ctx->priv; + MP_VERBOSE(ctx->vo, "Creating GBM device\n"); + p->gbm.device = gbm_create_device(p->kms->fd); + if (!p->gbm.device) { + MP_ERR(ctx->vo, "Failed to create GBM device.\n"); + return false; + } + + MP_VERBOSE(ctx->vo, "Initializing GBM surface (%d x %d)\n", + p->kms->mode.hdisplay, p->kms->mode.vdisplay); + p->gbm.surface = gbm_surface_create( + p->gbm.device, + p->kms->mode.hdisplay, + p->kms->mode.vdisplay, + GBM_BO_FORMAT_XRGB8888, + GBM_BO_USE_SCANOUT | GBM_BO_USE_RENDERING); + if (!p->gbm.surface) { + MP_ERR(ctx->vo, "Failed to create GBM surface.\n"); + return false; + } + return true; +} + +static void framebuffer_destroy_callback(struct gbm_bo *bo, void *data) +{ + struct framebuffer *fb = data; + if (fb) { + drmModeRmFB(fb->fd, fb->id); + } +} + +static void update_framebuffer_from_bo( + const struct MPGLContext *ctx, struct gbm_bo *bo) +{ + struct priv *p = ctx->priv; + p->fb.bo = bo; + p->fb.fd = p->kms->fd; + p->fb.width = gbm_bo_get_width(bo); + p->fb.height = gbm_bo_get_height(bo); + int stride = gbm_bo_get_stride(bo); + int handle = gbm_bo_get_handle(bo).u32; + + int ret = drmModeAddFB(p->kms->fd, p->fb.width, p->fb.height, + 24, 32, stride, handle, &p->fb.id); + if (ret) { + MP_ERR(ctx->vo, "Failed to create framebuffer: %s\n", mp_strerror(errno)); + } + gbm_bo_set_user_data(bo, &p->fb, framebuffer_destroy_callback); +} + +static void page_flipped(int fd, unsigned int frame, unsigned int sec, + unsigned int usec, void *data) +{ + struct priv *p = data; + p->waiting_for_flip = false; +} + +static bool crtc_setup(struct MPGLContext *ctx) +{ + struct priv *p = ctx->priv; + if (p->active) + return true; + p->old_crtc = drmModeGetCrtc(p->kms->fd, p->kms->crtc_id); + int ret = drmModeSetCrtc(p->kms->fd, p->kms->crtc_id, + p->fb.id, + 0, + 0, + &p->kms->connector->connector_id, + 1, + &p->kms->mode); + p->active = true; + return ret == 0; +} + +static void crtc_release(struct MPGLContext *ctx) +{ + struct priv *p = ctx->priv; + + if (!p->active) + return; + p->active = false; + + // wait for current page flip + while (p->waiting_for_flip) { + int ret = drmHandleEvent(p->kms->fd, &p->ev); + if (ret) { + MP_ERR(ctx->vo, "drmHandleEvent failed: %i\n", ret); + break; + } + } + + if (p->old_crtc) { + drmModeSetCrtc(p->kms->fd, + p->old_crtc->crtc_id, + p->old_crtc->buffer_id, + p->old_crtc->x, + p->old_crtc->y, + &p->kms->connector->connector_id, + 1, + &p->old_crtc->mode); + drmModeFreeCrtc(p->old_crtc); + p->old_crtc = NULL; + } +} + +static void release_vt(void *data) +{ + struct MPGLContext *ctx = data; + MP_VERBOSE(ctx->vo, "Releasing VT"); + crtc_release(ctx); + if (USE_MASTER) { + //this function enables support for switching to x, weston etc. + //however, for whatever reason, it can be called only by root users. + //until things change, this is commented. + struct priv *p = ctx->priv; + if (drmDropMaster(p->kms->fd)) { + MP_WARN(ctx->vo, "Failed to drop DRM master: %s\n", mp_strerror(errno)); + } + } +} + +static void acquire_vt(void *data) +{ + struct MPGLContext *ctx = data; + MP_VERBOSE(ctx->vo, "Acquiring VT"); + if (USE_MASTER) { + struct priv *p = ctx->priv; + if (drmSetMaster(p->kms->fd)) { + MP_WARN(ctx->vo, "Failed to acquire DRM master: %s\n", mp_strerror(errno)); + } + } + + crtc_setup(ctx); +} + +static void drm_egl_uninit(MPGLContext *ctx) +{ + struct priv *p = ctx->priv; + crtc_release(ctx); + + if (p->vt_switcher_active) + vt_switcher_destroy(&p->vt_switcher); + + eglMakeCurrent(p->egl.display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + eglDestroyContext(p->egl.display, p->egl.context); + eglDestroySurface(p->egl.display, p->egl.surface); + gbm_surface_destroy(p->gbm.surface); + eglTerminate(p->egl.display); + gbm_device_destroy(p->gbm.device); + p->egl.context = EGL_NO_CONTEXT; + eglDestroyContext(p->egl.display, p->egl.context); + + if (p->kms) { + kms_destroy(p->kms); + p->kms = 0; + } +} + +static int drm_egl_init(struct MPGLContext *ctx, int flags) +{ + if (ctx->vo->probing) { + MP_VERBOSE(ctx->vo, "DRM EGL backend can be activated only manually.\n"); + return -1; + } + struct priv *p = ctx->priv; + p->kms = NULL; + p->old_crtc = NULL; + p->gbm.surface = NULL; + p->gbm.device = NULL; + p->active = false; + p->waiting_for_flip = false; + p->ev.version = DRM_EVENT_CONTEXT_VERSION; + p->ev.page_flip_handler = page_flipped; + + p->vt_switcher_active = vt_switcher_init(&p->vt_switcher, ctx->vo->log); + if (p->vt_switcher_active) { + vt_switcher_acquire(&p->vt_switcher, acquire_vt, ctx); + vt_switcher_release(&p->vt_switcher, release_vt, ctx); + } else { + MP_WARN(ctx->vo, "Failed to set up VT switcher. Terminal switching will be unavailable.\n"); + } + + MP_VERBOSE(ctx->vo, "Initializing KMS\n"); + p->kms = kms_create(ctx->vo->log); + if (!p->kms) { + MP_ERR(ctx->vo, "Failed to create KMS.\n"); + return -1; + } + + // TODO: arguments should be configurable + if (!kms_setup(p->kms, "/dev/dri/card0", -1, 0)) { + MP_ERR(ctx->vo, "Failed to configure KMS.\n"); + return -1; + } + + if (!init_gbm(ctx)) { + MP_ERR(ctx->vo, "Failed to setup GBM.\n"); + return -1; + } + + if (!init_egl(ctx, flags & VOFLAG_GLES)) { + MP_ERR(ctx->vo, "Failed to setup EGL.\n"); + return -1; + } + + if (!eglMakeCurrent(p->egl.display, p->egl.surface, p->egl.surface, p->egl.context)) { + MP_ERR(ctx->vo, "Failed to make context current.\n"); + return -1; + } + + const char *egl_exts = eglQueryString(p->egl.display, EGL_EXTENSIONS); + void *(*gpa)(const GLubyte*) = (void *(*)(const GLubyte*))eglGetProcAddress; + mpgl_load_functions(ctx->gl, gpa, egl_exts, ctx->vo->log); + + // required by gbm_surface_lock_front_buffer + eglSwapBuffers(p->egl.display, p->egl.surface); + + MP_VERBOSE(ctx->vo, "Preparing framebuffer\n"); + p->gbm.bo = gbm_surface_lock_front_buffer(p->gbm.surface); + if (!p->gbm.bo) { + MP_ERR(ctx->vo, "Failed to lock GBM surface.\n"); + return -1; + } + update_framebuffer_from_bo(ctx, p->gbm.bo); + if (!p->fb.id) { + MP_ERR(ctx->vo, "Failed to create framebuffer.\n"); + return -1; + } + + if (!crtc_setup(ctx)) { + MP_ERR( + ctx->vo, + "Failed to set CRTC for connector %u: %s\n", + p->kms->connector->connector_id, + mp_strerror(errno)); + return -1; + } + + return 0; +} + +static int drm_egl_reconfig(struct MPGLContext *ctx) +{ + struct priv *p = ctx->priv; + ctx->vo->dwidth = p->fb.width; + ctx->vo->dheight = p->fb.height; + return 0; +} + +static int drm_egl_control(struct MPGLContext *ctx, int *events, int request, + void *arg) +{ + return VO_NOTIMPL; +} + +static void drm_egl_swap_buffers(MPGLContext *ctx) +{ + struct priv *p = ctx->priv; + eglSwapBuffers(p->egl.display, p->egl.surface); + p->gbm.next_bo = gbm_surface_lock_front_buffer(p->gbm.surface); + p->waiting_for_flip = true; + update_framebuffer_from_bo(ctx, p->gbm.next_bo); + int ret = drmModePageFlip(p->kms->fd, p->kms->crtc_id, p->fb.id, + DRM_MODE_PAGE_FLIP_EVENT, p); + if (ret) { + MP_WARN(ctx->vo, "Failed to queue page flip: %s\n", mp_strerror(errno)); + } + + // poll page flip finish event + const int timeout_ms = 3000; + struct pollfd fds[1] = { { .events = POLLIN, .fd = p->kms->fd } }; + poll(fds, 1, timeout_ms); + if (fds[0].revents & POLLIN) { + ret = drmHandleEvent(p->kms->fd, &p->ev); + if (ret != 0) { + MP_ERR(ctx->vo, "drmHandleEvent failed: %i\n", ret); + return; + } + } + + gbm_surface_release_buffer(p->gbm.surface, p->gbm.bo); + p->gbm.bo = p->gbm.next_bo; +} + +const struct mpgl_driver mpgl_driver_drm_egl = { + .name = "drm-egl", + .priv_size = sizeof(struct priv), + .init = drm_egl_init, + .reconfig = drm_egl_reconfig, + .swap_buffers = drm_egl_swap_buffers, + .control = drm_egl_control, + .uninit = drm_egl_uninit, +}; diff --git a/video/out/opengl/hwdec.c b/video/out/opengl/hwdec.c index 6eefbc4744..2a8c1eb14b 100644 --- a/video/out/opengl/hwdec.c +++ b/video/out/opengl/hwdec.c @@ -64,7 +64,7 @@ static struct gl_hwdec *load_hwdec_driver(struct mp_log *log, GL *gl, .log = mp_log_new(hwdec, log, drv->api_name), .gl = gl, .gl_texture_target = GL_TEXTURE_2D, - .reject_emulated = is_auto, + .probing = is_auto, }; mp_verbose(log, "Loading hwdec driver '%s'\n", drv->api_name); if (hwdec->driver->create(hwdec) < 0) { diff --git a/video/out/opengl/hwdec.h b/video/out/opengl/hwdec.h index 7d6f300db7..6f07536f57 100644 --- a/video/out/opengl/hwdec.h +++ b/video/out/opengl/hwdec.h @@ -14,7 +14,7 @@ struct gl_hwdec { // For free use by hwdec driver void *priv; // For working around the vdpau vs. vaapi mess. - bool reject_emulated; + bool probing; // hwdec backends must set this to an IMGFMT_ that has an equivalent // internal representation in gl_video.c as the hardware texture. // It's used to build the rendering chain. For example, setting it to diff --git a/video/out/opengl/hwdec_vaegl.c b/video/out/opengl/hwdec_vaegl.c index efc09c28ba..462d69a6f0 100644 --- a/video/out/opengl/hwdec_vaegl.c +++ b/video/out/opengl/hwdec_vaegl.c @@ -216,7 +216,7 @@ static int create(struct gl_hwdec *hw) return -1; } - if (hw->reject_emulated && va_guess_if_emulated(p->ctx)) { + if (hw->probing && va_guess_if_emulated(p->ctx)) { destroy(hw); return -1; } diff --git a/video/out/opengl/hwdec_vaglx.c b/video/out/opengl/hwdec_vaglx.c index 34e8ee937e..ff97d14857 100644 --- a/video/out/opengl/hwdec_vaglx.c +++ b/video/out/opengl/hwdec_vaglx.c @@ -74,6 +74,10 @@ static int create(struct gl_hwdec *hw) Display *x11disp = glXGetCurrentDisplay(); if (!x11disp) return -1; + if (hw->probing) { + MP_VERBOSE(hw, "Not using this by default.\n"); + return -1; + } int x11scr = DefaultScreen(x11disp); struct priv *p = talloc_zero(hw, struct priv); hw->priv = p; @@ -96,10 +100,6 @@ static int create(struct gl_hwdec *hw) vaTerminate(p->display); return -1; } - if (hw->reject_emulated && va_guess_if_emulated(p->ctx)) { - destroy(hw); - return -1; - } int attribs[] = { GLX_BIND_TO_TEXTURE_RGBA_EXT, True, diff --git a/video/out/opengl/hwdec_vdpau.c b/video/out/opengl/hwdec_vdpau.c index b9320ba842..086ebf5197 100644 --- a/video/out/opengl/hwdec_vdpau.c +++ b/video/out/opengl/hwdec_vdpau.c @@ -116,7 +116,7 @@ static int create(struct gl_hwdec *hw) return -1; p->vdp_surface = VDP_INVALID_HANDLE; p->mixer = mp_vdpau_mixer_create(p->ctx, hw->log); - if (hw->reject_emulated && mp_vdpau_guess_if_emulated(p->ctx)) { + if (hw->probing && mp_vdpau_guess_if_emulated(p->ctx)) { destroy(hw); return -1; } diff --git a/video/out/opengl/nnedi3.c b/video/out/opengl/nnedi3.c new file mode 100644 index 0000000000..04131078e3 --- /dev/null +++ b/video/out/opengl/nnedi3.c @@ -0,0 +1,219 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + * + * You can alternatively redistribute this file and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + */ + +#include "nnedi3.h" + +#include <assert.h> +#include <stdint.h> +#include <float.h> + +#include <libavutil/bswap.h> + +#include "video.h" + +#define GLSL(x) gl_sc_add(sc, #x "\n"); +#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) +#define GLSLH(x) gl_sc_hadd(sc, #x "\n"); +#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__) + +const struct nnedi3_opts nnedi3_opts_def = { + .neurons = 1, + .window = 0, + .upload = NNEDI3_UPLOAD_UBO, +}; + +#define OPT_BASE_STRUCT struct nnedi3_opts +const struct m_sub_options nnedi3_conf = { + .opts = (const m_option_t[]) { + OPT_CHOICE("neurons", neurons, 0, + ({"16", 0}, + {"32", 1}, + {"64", 2}, + {"128", 3})), + OPT_CHOICE("window", window, 0, + ({"8x4", 0}, + {"8x6", 1})), + OPT_CHOICE("upload", upload, 0, + ({"ubo", NNEDI3_UPLOAD_UBO}, + {"shader", NNEDI3_UPLOAD_SHADER})), + {0} + }, + .size = sizeof(struct nnedi3_opts), + .defaults = &nnedi3_opts_def, +}; + +const static char nnedi3_weights[40320 * 4 + 1] = +#include "video/out/opengl/nnedi3_weights.inc" +; + +const int nnedi3_weight_offsets[9] = + {0, 1088, 3264, 7616, 16320, 17920, 21120, 27520, 40320}; + +const int nnedi3_neurons[4] = {16, 32, 64, 128}; +const int nnedi3_window_width[2] = {8, 8}; +const int nnedi3_window_height[2] = {4, 6}; + +const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size) +{ + int idx = conf->window * 4 + conf->neurons; + const int offset = nnedi3_weight_offsets[idx]; + *size = (nnedi3_weight_offsets[idx + 1] - offset) * 4; + return (const float*)(nnedi3_weights + offset * 4); +} + +void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num, + int step, float tex_mul, const struct nnedi3_opts *conf, + struct gl_transform *transform) +{ + assert(0 <= step && step < 2); + + if (!conf) + conf = &nnedi3_opts_def; + + const int neurons = nnedi3_neurons[conf->neurons]; + const int width = nnedi3_window_width[conf->window]; + const int height = nnedi3_window_height[conf->window]; + + const int offset = nnedi3_weight_offsets[conf->window * 4 + conf->neurons]; + const uint32_t *weights = (const int*)(nnedi3_weights + offset * 4); + + GLSLF("// nnedi3 (tex %d, step %d, neurons %d, window %dx%d, mode %d)\n", + tex_num, step + 1, neurons, width, height, conf->upload); + + // This is required since each row will be encoded into vec4s + assert(width % 4 == 0); + const int sample_count = width * height / 4; + + if (conf->upload == NNEDI3_UPLOAD_UBO) { + char buf[32]; + snprintf(buf, sizeof(buf), "vec4 weights[%d];", + neurons * (sample_count * 2 + 1)); + gl_sc_uniform_buffer(sc, "NNEDI3_WEIGHTS", buf, 0); + if (gl->glsl_version < 140) + gl_sc_enable_extension(sc, "GL_ARB_uniform_buffer_object"); + } else if (conf->upload == NNEDI3_UPLOAD_SHADER) { + // Somehow necessary for hard coding approach. + GLSLH(#pragma optionNV(fastprecision on)) + } + + GLSLHF("float nnedi3(sampler2D tex, vec2 pos, vec2 tex_size, int plane, float tex_mul) {\n"); + + if (step == 0) { + *transform = (struct gl_transform){{{1.0,0.0}, {0.0,2.0}}, {0.0,-0.5}}; + + GLSLH(if (fract(pos.y * tex_size.y) < 0.5) + return texture(tex, pos + vec2(0, 0.25) / tex_size)[plane] * tex_mul;) + GLSLHF("#define GET(i, j) " + "(texture(tex, pos+vec2((i)-(%f),(j)-(%f)+0.25)/tex_size)[plane]*tex_mul)\n", + width / 2.0 - 1, (height - 1) / 2.0); + } else { + *transform = (struct gl_transform){{{2.0,0.0}, {0.0,1.0}}, {-0.5,0.0}}; + + GLSLH(if (fract(pos.x * tex_size.x) < 0.5) + return texture(tex, pos + vec2(0.25, 0) / tex_size)[plane] * tex_mul;) + GLSLHF("#define GET(i, j) " + "(texture(tex, pos+vec2((j)-(%f)+0.25,(i)-(%f))/tex_size)[plane]*tex_mul)\n", + (height - 1) / 2.0, width / 2.0 - 1); + } + + GLSLHF("vec4 samples[%d];\n", sample_count); + + for (int y = 0; y < height; y++) + for (int x = 0; x < width; x += 4) { + GLSLHF("samples[%d] = vec4(GET(%d, %d), GET(%d, %d)," + "GET(%d, %d), GET(%d, %d));\n", + (y * width + x) / 4, x, y, x+1, y, x+2, y, x+3, y); + } + + GLSLHF("float sum = 0, sumsq = 0;" + "for (int i = 0; i < %d; i++) {" + "sum += dot(samples[i], vec4(1.0));" + "sumsq += dot(samples[i], samples[i]);" + "}\n", sample_count); + + GLSLHF("float mstd0 = sum / %d.0;\n" + "float mstd1 = sumsq / %d.0 - mstd0 * mstd0;\n" + "float mstd2 = mix(0, inversesqrt(mstd1), mstd1 >= %.12e);\n" + "mstd1 *= mstd2;\n", + width * height, width * height, FLT_EPSILON); + + GLSLHF("float vsum = 0, wsum = 0, sum1, sum2;\n"); + + if (conf->upload == NNEDI3_UPLOAD_SHADER) { + GLSLH(#define T(x) intBitsToFloat(x)) + GLSLH(#define W(i,w0,w1,w2,w3) dot(samples[i],vec4(T(w0),T(w1),T(w2),T(w3)))) + + GLSLHF("#define WS(w0,w1) " + "sum1 = exp(sum1 * mstd2 + T(w0));" + "sum2 = sum2 * mstd2 + T(w1);" + "wsum += sum1;" + "vsum += sum1*(sum2/(1+abs(sum2)));\n"); + + for (int n = 0; n < neurons; n++) { + const uint32_t *weights_ptr = weights + (sample_count * 2 + 1) * 4 * n; + for (int s = 0; s < 2; s++) { + GLSLHF("sum%d", s + 1); + for (int i = 0; i < sample_count; i++) { + GLSLHF("%cW(%d,%d,%d,%d,%d)", i == 0 ? '=' : '+', i, + (int)av_le2ne32(weights_ptr[0]), + (int)av_le2ne32(weights_ptr[1]), + (int)av_le2ne32(weights_ptr[2]), + (int)av_le2ne32(weights_ptr[3])); + weights_ptr += 4; + } + GLSLHF(";"); + } + GLSLHF("WS(%d,%d);\n", (int)av_le2ne32(weights_ptr[0]), + (int)av_le2ne32(weights_ptr[1])); + } + } else if (conf->upload == NNEDI3_UPLOAD_UBO) { + GLSLH(int idx = 0;) + + GLSLHF("for (int n = 0; n < %d; n++) {\n", neurons); + + for (int s = 0; s < 2; s++) { + GLSLHF("sum%d = 0;\n" + "for (int i = 0; i |