summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2017-07-23 09:41:51 +0200
committerwm4 <wm4@nowhere>2017-07-24 04:32:55 +0200
commit64d56114ed9258efe2e864315d7130bb58a03d52 (patch)
treef4300ffb5c3d912f958b93acba86b0ed438ddcf7 /video
parent9e7665b21b530cbbfeb187521dc9db78c2ca60db (diff)
downloadmpv-64d56114ed9258efe2e864315d7130bb58a03d52.tar.bz2
mpv-64d56114ed9258efe2e864315d7130bb58a03d52.tar.xz
vo_opengl: add direct rendering support
Can be enabled via --vd-lavc-dr=yes. See manpage additions for what it does. This reminds of the MPlayer -dr flag, but the implementation is completely different. It's the same basic concept: letting the decoder render into a GPU buffer to avoid a copy. Unlike MPlayer, this doesn't try to go through filters (libavfilter doesn't support this anyway). Unless a filter can work in-place, DR will be silently disabled. MPlayer had very complex semantics about buffer types and management (which apparently nobody ever understood) and weird restrictions that mostly limited it to mpeg2 style codecs. The mpv code does not do any of this, and just lets the decoder allocate an arbitrary number of untyped images. (No MPlayer code was used.) Parts of the code based on work by atomnuker (starting point for the generic code) and haasn (some GL definitions, some basic PBO code, and correct fencing).
Diffstat (limited to 'video')
-rw-r--r--video/decode/dec_video.h1
-rw-r--r--video/decode/lavc.h7
-rw-r--r--video/decode/vd_lavc.c98
-rw-r--r--video/out/opengl/common.c8
-rw-r--r--video/out/opengl/common.h2
-rw-r--r--video/out/opengl/context.c1
-rw-r--r--video/out/opengl/gl_headers.h7
-rw-r--r--video/out/opengl/video.c155
-rw-r--r--video/out/opengl/video.h3
-rw-r--r--video/out/vo.c91
-rw-r--r--video/out/vo.h32
-rw-r--r--video/out/vo_opengl.c29
12 files changed, 430 insertions, 4 deletions
diff --git a/video/decode/dec_video.h b/video/decode/dec_video.h
index 261f47fca8..73570f8ed5 100644
--- a/video/decode/dec_video.h
+++ b/video/decode/dec_video.h
@@ -37,6 +37,7 @@ struct dec_video {
struct mp_hwdec_devices *hwdec_devs; // video output hwdec handles
struct sh_stream *header;
struct mp_codec_params *codec;
+ struct vo *vo; // required for direct rendering into video memory
char *decoder_desc;
diff --git a/video/decode/lavc.h b/video/decode/lavc.h
index 44b103e3f5..9e27a6e18c 100644
--- a/video/decode/lavc.h
+++ b/video/decode/lavc.h
@@ -2,6 +2,7 @@
#define MPV_LAVC_H
#include <stdbool.h>
+#include <pthread.h>
#include <libavcodec/avcodec.h>
@@ -72,6 +73,12 @@ typedef struct lavc_ctx {
struct mp_image_pool *hwdec_swpool;
AVBufferRef *cached_hw_frames_ctx;
+
+ // --- The following fields are protected by dr_lock.
+ pthread_mutex_t dr_lock;
+ bool dr_failed;
+ struct mp_image_pool *dr_pool;
+ int dr_imgfmt, dr_w, dr_h, dr_stride_align;
} vd_ffmpeg_ctx;
struct vd_lavc_hwdec {
diff --git a/video/decode/vd_lavc.c b/video/decode/vd_lavc.c
index 27861171f5..f1b2a83749 100644
--- a/video/decode/vd_lavc.c
+++ b/video/decode/vd_lavc.c
@@ -57,6 +57,7 @@
#include "demux/packet.h"
#include "video/csputils.h"
#include "video/sws_utils.h"
+#include "video/out/vo.h"
#if LIBAVCODEC_VERSION_MICRO >= 100
#include <libavutil/mastering_display_metadata.h>
@@ -74,6 +75,7 @@ static void init_avctx(struct dec_video *vd, const char *decoder,
struct vd_lavc_hwdec *hwdec);
static void uninit_avctx(struct dec_video *vd);
+static int get_buffer2_direct(AVCodecContext *avctx, AVFrame *pic, int flags);
static int get_buffer2_hwdec(AVCodecContext *avctx, AVFrame *pic, int flags);
static enum AVPixelFormat get_format_hwdec(struct AVCodecContext *avctx,
const enum AVPixelFormat *pix_fmt);
@@ -92,6 +94,7 @@ struct vd_lavc_params {
int check_hw_profile;
int software_fallback;
char **avopts;
+ int dr;
};
static const struct m_opt_choice_alternatives discard_names[] = {
@@ -121,6 +124,7 @@ const struct m_sub_options vd_lavc_conf = {
OPT_CHOICE_OR_INT("software-fallback", software_fallback, 0, 1, INT_MAX,
({"no", INT_MAX}, {"yes", 1})),
OPT_KEYVALUELIST("o", avopts, 0),
+ OPT_FLAG("dr", dr, 0),
{0}
},
.size = sizeof(struct vd_lavc_params),
@@ -425,7 +429,11 @@ static struct vd_lavc_hwdec *probe_hwdec(struct dec_video *vd, bool autoprobe,
static void uninit(struct dec_video *vd)
{
+ vd_ffmpeg_ctx *ctx = vd->priv;
+
uninit_avctx(vd);
+
+ pthread_mutex_destroy(&ctx->dr_lock);
talloc_free(vd->priv);
}
@@ -514,6 +522,9 @@ static int init(struct dec_video *vd, const char *decoder)
ctx->decoder = talloc_strdup(ctx, decoder);
ctx->hwdec_devs = vd->hwdec_devs;
ctx->hwdec_swpool = talloc_steal(ctx, mp_image_pool_new(17));
+ ctx->dr_pool = talloc_steal(ctx, mp_image_pool_new(INT_MAX));
+
+ pthread_mutex_init(&ctx->dr_lock, NULL);
reinit(vd);
@@ -597,6 +608,12 @@ static void init_avctx(struct dec_video *vd, const char *decoder,
mp_set_avcodec_threads(vd->log, avctx, lavc_param->threads);
}
+ if (!ctx->hwdec && vd->vo && lavc_param->dr) {
+ avctx->opaque = vd;
+ avctx->get_buffer2 = get_buffer2_direct;
+ avctx->thread_safe_callbacks = 1;
+ }
+
avctx->flags |= lavc_param->bitexact ? AV_CODEC_FLAG_BITEXACT : 0;
avctx->flags2 |= lavc_param->fast ? AV_CODEC_FLAG2_FAST : 0;
@@ -917,6 +934,87 @@ static enum AVPixelFormat get_format_hwdec(struct AVCodecContext *avctx,
return select;
}
+static int get_buffer2_direct(AVCodecContext *avctx, AVFrame *pic, int flags)
+{
+ struct dec_video *vd = avctx->opaque;
+ vd_ffmpeg_ctx *p = vd->priv;
+
+ pthread_mutex_lock(&p->dr_lock);
+
+ int w = pic->width;
+ int h = pic->height;
+ int linesize_align[AV_NUM_DATA_POINTERS] = {0};
+ avcodec_align_dimensions2(avctx, &w, &h, linesize_align);
+
+ // We assume that different alignments are just different power-of-2s.
+ // Thus, a higher alignment always satisfies a lower alignment.
+ int stride_align = 0;
+ for (int n = 0; n < AV_NUM_DATA_POINTERS; n++)
+ stride_align = MPMAX(stride_align, linesize_align[n]);
+
+ int imgfmt = pixfmt2imgfmt(pic->format);
+ if (!imgfmt)
+ goto fallback;
+
+ if (p->dr_failed)
+ goto fallback;
+
+ // (For simplicity, we realloc on any parameter change, instead of trying
+ // to be clever.)
+ if (stride_align != p->dr_stride_align || w != p->dr_w || h != p->dr_h ||
+ imgfmt != p->dr_imgfmt)
+ {
+ mp_image_pool_clear(p->dr_pool);
+ p->dr_imgfmt = imgfmt;
+ p->dr_w = w;
+ p->dr_h = h;
+ p->dr_stride_align = stride_align;
+ MP_VERBOSE(p, "DR parameter change to %dx%d %s align=%d\n", w, h,
+ mp_imgfmt_to_name(imgfmt), stride_align);
+ }
+
+ struct mp_image *img = mp_image_pool_get_no_alloc(p->dr_pool, imgfmt, w, h);
+ if (!img) {
+ MP_VERBOSE(p, "Allocating new DR image...\n");
+ img = vo_get_image(vd->vo, imgfmt, w, h, stride_align);
+ if (!img) {
+ MP_VERBOSE(p, "...failed..\n");
+ goto fallback;
+ }
+
+ // Now make the mp_image part of the pool. This requires doing magic to
+ // the image, so just add it to the pool and get it back to avoid
+ // dealing with magic ourselves. (Normally this never fails.)
+ mp_image_pool_add(p->dr_pool, img);
+ img = mp_image_pool_get_no_alloc(p->dr_pool, imgfmt, w, h);
+ if (!img)
+ goto fallback;
+ }
+
+ // get_buffer2 callers seem very unappreciative of overwriting pic with a
+ // new reference. The AVCodecContext.get_buffer2 comments tell us exactly
+ // what we should do, so follow that.
+ for (int n = 0; n < 4; n++) {
+ pic->data[n] = img->planes[n];
+ pic->linesize[n] = img->stride[n];
+ pic->buf[n] = img->bufs[n];
+ img->bufs[n] = NULL;
+ }
+ talloc_free(img);
+
+ pthread_mutex_unlock(&p->dr_lock);
+
+ return 0;
+
+fallback:
+ if (!p->dr_failed)
+ MP_VERBOSE(p, "DR failed - disabling.\n");
+ p->dr_failed = true;
+ pthread_mutex_unlock(&p->dr_lock);
+
+ return avcodec_default_get_buffer2(avctx, pic, flags);
+}
+
static int get_buffer2_hwdec(AVCodecContext *avctx, AVFrame *pic, int flags)
{
struct dec_video *vd = avctx->opaque;
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index 203c14b7ef..c7eee414ac 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -327,6 +327,14 @@ static const struct gl_functions gl_functions[] = {
{0}
},
},
+ {
+ .ver_core = 440,
+ .extension = "GL_ARB_buffer_storage",
+ .functions = (const struct gl_function[]) {
+ DEF_FN(BufferStorage),
+ {0}
+ },
+ },
// Swap control, always an OS specific extension
// The OSX code loads this manually.
{
diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h
index c9162f2479..7842c5a910 100644
--- a/video/out/opengl/common.h
+++ b/video/out/opengl/common.h
@@ -192,6 +192,8 @@ struct GL {
GLenum (GLAPIENTRY *ClientWaitSync)(GLsync, GLbitfield, GLuint64);
void (GLAPIENTRY *DeleteSync)(GLsync sync);
+ void (GLAPIENTRY *BufferStorage)(GLenum, intptr_t, const GLvoid *, GLenum);
+
void (GLAPIENTRY *GenQueries)(GLsizei, GLuint *);
void (GLAPIENTRY *DeleteQueries)(GLsizei, const GLuint *);
void (GLAPIENTRY *BeginQuery)(GLenum, GLuint);
diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c
index 20b16b73ef..ab98eddbf9 100644
--- a/video/out/opengl/context.c
+++ b/video/out/opengl/context.c
@@ -92,6 +92,7 @@ static const struct mpgl_driver *const backends[] = {
// 0-terminated list of desktop GL versions a backend should try to
// initialize. The first entry is the most preferred version.
const int mpgl_preferred_gl_versions[] = {
+ 440,
400,
330,
320,
diff --git a/video/out/opengl/gl_headers.h b/video/out/opengl/gl_headers.h
index bfefc3d3bf..74a4947137 100644
--- a/video/out/opengl/gl_headers.h
+++ b/video/out/opengl/gl_headers.h
@@ -70,6 +70,13 @@
#define GL_DEBUG_SEVERITY_LOW 0x9148
#define GL_DEBUG_SEVERITY_NOTIFICATION 0x826B
+// --- GL 4.4 or GL_ARB_buffer_storage
+
+#define GL_MAP_PERSISTENT_BIT 0x0040
+#define GL_MAP_COHERENT_BIT 0x0080
+#define GL_DYNAMIC_STORAGE_BIT 0x0100
+#define GL_CLIENT_STORAGE_BIT 0x0200
+
// --- GL_NV_vdpau_interop
#define GLvdpauSurfaceNV GLintptr
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index ba54dd4c6c..9587eaaa5f 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -174,6 +174,17 @@ struct pass_info {
#define PASS_INFO_MAX (SHADER_MAX_HOOKS + 32)
+struct dr_buffer {
+ void *ptr;
+ size_t size;
+ GLuint pbo;
+ // While a PBO is read-accessed by GL, we must not write to the mapped data.
+ // The fence tells us when GL is done, and the mpi reference will keep the
+ // data from being recycled (or from other references gaining write access).
+ GLsync fence;
+ struct mp_image *mpi;
+};
+
struct gl_video {
GL *gl;
@@ -212,6 +223,11 @@ struct gl_video {
struct video_image image;
+ struct dr_buffer *dr_buffers;
+ int num_dr_buffers;
+
+ bool using_dr_path;
+
bool dumb_mode;
bool forced_dumb_mode;
@@ -933,11 +949,56 @@ static void unmap_current_image(struct gl_video *p)
}
}
+static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr)
+{
+ for (int i = 0; i < p->num_dr_buffers; i++) {
+ struct dr_buffer *buf = &p->dr_buffers[i];
+ if (ptr >= (uint8_t *)buf->ptr && ptr < (uint8_t *)buf->ptr + buf->size)
+ return buf;
+ }
+
+ return NULL;
+}
+
+static void gc_pending_dr_fences(struct gl_video *p, bool force)
+{
+ GL *gl = p->gl;
+
+again:;
+ for (int n = 0; n < p->num_dr_buffers; n++) {
+ struct dr_buffer *buffer = &p->dr_buffers[n];
+ if (!buffer->fence)
+ continue;
+
+ GLenum res = gl->ClientWaitSync(buffer->fence, 0, 0); // non-blocking
+ if (res == GL_ALREADY_SIGNALED || force) {
+ gl->DeleteSync(buffer->fence);
+ buffer->fence = NULL;
+ // Unreferencing the image could cause gl_video_dr_free_buffer()
+ // to be called by the talloc destructor (if it was the last
+ // reference). This will implicitly invalidate the buffer pointer
+ // and change the p->dr_buffers array. To make it worse, it could
+ // free multiple dr_buffers due to weird theoretical corner cases.
+ // This is also why we use the goto to iterate again from the
+ // start, because everything gets fucked up. Hail satan!
+ struct mp_image *ref = buffer->mpi;
+ buffer->mpi = NULL;
+ talloc_free(ref);
+ goto again;
+ }
+ }
+}
+
static void unref_current_image(struct gl_video *p)
{
unmap_current_image(p);
- mp_image_unrefp(&p->image.mpi);
p->image.id = 0;
+
+ mp_image_unrefp(&p->image.mpi);
+
+ // While we're at it, also garbage collect pending fences in here to
+ // get it out of the way.
+ gc_pending_dr_fences(p, false);
}
// If overlay mode is used, make sure to remove the overlay.
@@ -3088,10 +3149,34 @@ static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t
plane->flipped = mpi->stride[0] < 0;
gl->BindTexture(plane->gl_target, plane->gl_texture);
- gl_pbo_upload_tex(&plane->pbo, gl, p->opts.pbo, plane->gl_target,
- plane->gl_format, plane->gl_type, plane->w, plane->h,
- mpi->planes[n], mpi->stride[n],
+
+ struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]);
+ if (mapped) {
+ assert(mapped->pbo > 0);
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, mapped->pbo);
+ uintptr_t offset = mpi->planes[n] - (uint8_t *)mapped->ptr;
+ gl_upload_tex(gl, plane->gl_target,
+ plane->gl_format, plane->gl_type,
+ (void *)offset, mpi->stride[n],
0, 0, plane->w, plane->h);
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ // Make sure the PBO is not reused until GL is done with it. If a
+ // previous operation is pending, "update" it by creating a new
+ // fence that will cover the previous operation as well.
+ gl->DeleteSync(mapped->fence);
+ mapped->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ if (!mapped->mpi)
+ mapped->mpi = mp_image_new_ref(mpi);
+ } else {
+ gl_pbo_upload_tex(&plane->pbo, gl, p->opts.pbo, plane->gl_target,
+ plane->gl_format, plane->gl_type, plane->w, plane->h,
+ mpi->planes[n], mpi->stride[n],
+ 0, 0, plane->w, plane->h);
+ }
+ if (p->using_dr_path != !!mapped) {
+ p->using_dr_path = !!mapped;
+ MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no");
+ }
gl->BindTexture(plane->gl_target, 0);
}
gl_timer_stop(gl);
@@ -3319,6 +3404,13 @@ void gl_video_uninit(struct gl_video *p)
gl_set_debug_logger(gl, NULL);
+ // Forcibly destroy possibly remaining image references. This should also
+ // cause gl_video_dr_free_buffer() to be called for the remaining buffers.
+ gc_pending_dr_fences(p, true);
+
+ // Should all have been unreffed already.
+ assert(!p->num_dr_buffers);
+
talloc_free(p);
}
@@ -3603,3 +3695,58 @@ void gl_video_set_hwdec(struct gl_video *p, struct gl_hwdec *hwdec)
p->hwdec = hwdec;
unref_current_image(p);
}
+
+void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size)
+{
+ GL *gl = p->gl;
+
+ if (gl->version < 440)
+ return NULL;
+
+ MP_TARRAY_GROW(p, p->dr_buffers, p->num_dr_buffers);
+ int index = p->num_dr_buffers++;
+ struct dr_buffer *buffer = &p->dr_buffers[index];
+
+ *buffer = (struct dr_buffer){
+ .size = size,
+ };
+
+ unsigned flags = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT |
+ GL_MAP_COHERENT_BIT;
+
+ gl->GenBuffers(1, &buffer->pbo);
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buffer->pbo);
+ gl->BufferStorage(GL_PIXEL_UNPACK_BUFFER, size, NULL, flags);
+ buffer->ptr = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, flags);
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ if (!buffer->ptr) {
+ gl_check_error(p->gl, p->log, "mapping buffer");
+ gl->DeleteBuffers(1, &buffer->pbo);
+ MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, index);
+ return NULL;
+ }
+
+ return buffer->ptr;
+};
+
+void gl_video_dr_free_buffer(struct gl_video *p, void *ptr)
+{
+ GL *gl = p->gl;
+
+ for (int n = 0; n < p->num_dr_buffers; n++) {
+ struct dr_buffer *buffer = &p->dr_buffers[n];
+ if (buffer->ptr == ptr) {
+ assert(!buffer->mpi); // can't be freed while it has a ref
+ gl->DeleteSync(buffer->fence);
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buffer->pbo);
+ gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+ gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ gl->DeleteBuffers(1, &buffer->pbo);
+
+ MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, n);
+ return;
+ }
+ }
+ // not found - must not happen
+ assert(0);
+}
diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h
index 09083da41b..f3608626e4 100644
--- a/video/out/opengl/video.h
+++ b/video/out/opengl/video.h
@@ -182,4 +182,7 @@ void gl_video_set_hwdec(struct gl_video *p, struct gl_hwdec *hwdec);
struct vo;
void gl_video_configure_queue(struct gl_video *p, struct vo *vo);
+void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size);
+void gl_video_dr_free_buffer(struct gl_video *p, void *ptr);
+
#endif
diff --git a/video/out/vo.c b/video/out/vo.c
index 79fc4f3bb4..e52495e195 100644
--- a/video/out/vo.c
+++ b/video/out/vo.c
@@ -23,9 +23,12 @@
#include <pthread.h>
#include <math.h>
+#include <libavutil/buffer.h>
+
#include "mpv_talloc.h"
#include "config.h"
+#include "osdep/atomic.h"
#include "osdep/timer.h"
#include "osdep/threads.h"
#include "misc/dispatch.h"
@@ -113,6 +116,8 @@ struct vo_internal {
pthread_t thread;
struct mp_dispatch_queue *dispatch;
+ atomic_ullong dr_in_flight;
+
// --- The following fields are protected by lock
pthread_mutex_t lock;
pthread_cond_t wakeup;
@@ -955,6 +960,7 @@ static void *vo_thread(void *ptr)
talloc_free(in->current_frame);
in->current_frame = NULL;
vo->driver->uninit(vo);
+ assert(atomic_load(&vo->in->dr_in_flight) == 0);
return NULL;
}
@@ -1258,3 +1264,88 @@ int lookup_keymap_table(const struct mp_keymap *map, int key)
map++;
return map->to;
}
+
+struct free_dr_context {
+ struct vo *vo;
+ AVBufferRef *ref;
+};
+
+static void vo_thread_free(void *ptr)
+{
+ struct free_dr_context *ctx = ptr;
+
+ unsigned long long v = atomic_fetch_add(&ctx->vo->in->dr_in_flight, -1);
+ assert(v); // value before sub is 0 - unexpected underflow.
+
+ av_buffer_unref(&ctx->ref);
+ talloc_free(ctx);
+}
+
+static void free_dr_buffer_on_vo_thread(void *opaque, uint8_t *data)
+{
+ struct free_dr_context *ctx = opaque;
+
+ // The image could be unreffed even on the VO thread. In practice, this
+ // matters most on VO destruction.
+ if (pthread_equal(ctx->vo->in->thread, pthread_self())) {
+ vo_thread_free(ctx);
+ } else {
+ mp_dispatch_run(ctx->vo->in->dispatch, vo_thread_free, ctx);
+ }
+}
+
+struct get_image_cmd {
+ struct vo *vo;
+ int imgfmt, w, h, stride_align;
+ struct mp_image *res;
+};
+
+static void sync_get_image(void *ptr)
+{
+ struct get_image_cmd *cmd = ptr;
+ struct vo *vo = cmd->vo;
+
+ cmd->res = vo->driver->get_image(vo, cmd->imgfmt, cmd->w, cmd->h,
+ cmd->stride_align);
+ if (!cmd->res)
+ return;
+
+ // We require exactly 1 AVBufferRef.
+ assert(cmd->res->bufs[0]);
+ assert(!cmd->res->bufs[1]);
+
+ // Apply some magic to get it free'd on the VO thread as well. For this to
+ // work, we create a dummy-ref that aliases the original ref, which is why
+ // the original ref must be writable in the first place. (A newly allocated
+ // image should be always writable of course.)
+ assert(mp_image_is_writeable(cmd->res));
+
+ struct free_dr_context *ctx = talloc_zero(NULL, struct free_dr_context);
+ *ctx = (struct free_dr_context){
+ .vo = vo,
+ .ref = cmd->res->bufs[0],
+ };
+
+ AVBufferRef *new_ref = av_buffer_create(ctx->ref->data, ctx->ref->size,
+ free_dr_buffer_on_vo_thread, ctx, 0);
+ if (!new_ref)
+ abort(); // tiny malloc OOM
+
+ cmd->res->bufs[0] = new_ref;
+
+ atomic_fetch_add(&vo->in->dr_in_flight, 1);
+}
+
+struct mp_image *vo_get_image(struct vo *vo, int imgfmt, int w, int h,
+ int stride_align)
+{
+ if (!vo->driver->get_image)
+ return NULL;
+
+ struct get_image_cmd cmd = {
+ .vo = vo,
+ .imgfmt = imgfmt, .w = w, .h = h, .stride_align = stride_align,
+ };
+ mp_dispatch_run(vo->in->dispatch, sync_get_image, &cmd);
+ return cmd.res;
+}
diff --git a/video/out/vo.h b/video/out/vo.h
index 6dce8f6c2f..82ec284219 100644
--- a/video/out/vo.h
+++ b/video/out/vo.h
@@ -280,6 +280,36 @@ struct vo_driver {
int (*control)(struct vo *vo, uint32_t request, void *data);
/*
+ * lavc callback for direct rendering
+ *
+ * Optional. To make implementation easier, the callback is always run on
+ * the VO thread. The returned mp_image's destructor callback is also called
+ * on the VO thread, even if it's actually unref'ed from another thread.
+ *
+ * It is guaranteed that the last reference to an image is destroyed before
+ * ->uninit is called (except it's not - libmpv screenshots can hold the
+ * reference longer, fuck).
+ *
+ * The allocated image - or a part of it, can be passed to draw_frame(). The
+ * point of this mechanism is that the decoder directly renders to GPU
+ * staging memory, to avoid a memcpy on frame upload. But this is not a
+ * guarantee. A filter could change the data pointers or return a newly
+ * allocated image. It's even possible that only 1 plane uses the buffer
+ * allocated by the get_image function. The VO has to check for this.
+ *
+ * stride_align is always a value >=1 that is a power of 2. The stride
+ * values of the returned image must be divisible by this value.
+ *
+ * Currently, the returned image must have exactly 1 AVBufferRef set, for
+ * internal implementation simplicity.
+ *
+ * returns: an allocated, refcounted image; if NULL is returned, the caller
+ * will silently fallback to a default allocator
+ */
+ struct mp_image *(*get_image)(struct vo *vo, int imgfmt, int w, int h,
+ int stride_align);
+
+ /*
* Render the given frame to the VO's backbuffer. This operation will be
* followed by a draw_osd and a flip_page[_timed] call.
* mpi belongs to the VO; the VO must free it eventually.
@@ -410,6 +440,8 @@ double vo_get_estimated_vsync_jitter(struct vo *vo);
double vo_get_display_fps(struct vo *vo);
double vo_get_delay(struct vo *vo);
void vo_discard_timing_info(struct vo *vo);
+struct mp_image *vo_get_image(struct vo *vo, int imgfmt, int w, int h,
+ int stride_align);
void vo_wakeup(struct vo *vo);
void vo_wait_default(struct vo *vo, int64_t until_time);
diff --git a/video/out/vo_opengl.c b/video/out/vo_opengl.c
index f5b0bd37c4..d3b8bbffa3 100644
--- a/video/out/vo_opengl.c
+++ b/video/out/vo_opengl.c
@@ -343,6 +343,34 @@ static void wait_events(struct vo *vo, int64_t until_time_us)
}
}
+static void vo_opengl_free_dr(void *opaque, uint8_t *data)
+{
+ struct gl_priv *p = opaque;
+ gl_video_dr_free_buffer(p->renderer, data);
+}
+
+static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h,
+ int stride_align)
+{
+ struct gl_priv *p = vo->priv;
+
+ int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align);
+ if (size < 0)
+ return NULL;
+
+ int alloc_size = size + stride_align;
+ void *ptr = gl_video_dr_alloc_buffer(p->renderer, alloc_size);
+ if (!ptr)
+ return NULL;
+
+ struct mp_image *res = mp_image_from_buffer(imgfmt, w, h, stride_align,
+ ptr, alloc_size, p,
+ vo_opengl_free_dr);
+ if (!res)
+ gl_video_dr_free_buffer(p->renderer, ptr);
+ return res;
+}
+
static void uninit(struct vo *vo)
{
struct gl_priv *p = vo->priv;
@@ -427,6 +455,7 @@ const struct vo_driver video_out_opengl = {
.query_format = query_format,
.reconfig = reconfig,
.control = control,
+ .get_image = get_image,
.draw_frame = draw_frame,
.flip_page = flip_page,
.wait_events = wait_events,