summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.dev>2022-02-25 22:07:56 +0100
committerNiklas Haas <github-daiK1o@haasn.dev>2022-03-03 13:06:05 +0100
commitdfe846f9de30e2606a38afb6ca6de755e84df3df (patch)
tree86f076ef7389d76c6784567536cb602951b2f320
parentc13f5f24cd3c7a7a1cae3258146c603864c74e67 (diff)
downloadmpv-dfe846f9de30e2606a38afb6ca6de755e84df3df.tar.bz2
mpv-dfe846f9de30e2606a38afb6ca6de755e84df3df.tar.xz
vo_gpu_next: add support for hardware decoding
There are two major ways of going about this: 1. Expose the native ra_gl/ra_pl/ra_d3d11 objects to the pre-existing hwdec mappers, and then add code in vo_gpu_next to rewrap those ra_tex objects into pl_tex. 2. Wrap the underlying pl_opengl/pl_d3d11 into a ra_pl object and expose it to the hwdec mappers, then directly use the resulting pl_tex. I ultimately opted for approach 1 because it enables compatibility with more hardware decoders, specifically including ones that use native OpenGL calls currently. The second approach only really works with cuda_vk and vaapi_pl.
-rw-r--r--DOCS/man/vo.rst4
-rw-r--r--video/out/gpu_next/context.h1
-rw-r--r--video/out/vo_gpu_next.c201
3 files changed, 166 insertions, 40 deletions
diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst
index cd995df3c7..1843a56e4f 100644
--- a/DOCS/man/vo.rst
+++ b/DOCS/man/vo.rst
@@ -262,10 +262,6 @@ Available video output drivers are:
the same set of features as ``--vo=gpu``. See `GPU renderer options`_ for a
list.
- Currently, this only supports Vulkan, OpenGL, D3D11 and no hardware
- decoding. Unlike ``--vo=gpu``, the FBO formats are not tunable, but you can
- still set ``--gpu-dumb-mode=yes`` to forcibly disable their use.
-
Should generally be faster and higher quality, but some features may still
be missing or misbehave. Expect (and report!) bugs. See here for a list of
known differences and bugs:
diff --git a/video/out/gpu_next/context.h b/video/out/gpu_next/context.h
index 2e2bc3cd29..b98b9e7251 100644
--- a/video/out/gpu_next/context.h
+++ b/video/out/gpu_next/context.h
@@ -26,7 +26,6 @@ struct gl_video_opts;
struct gpu_ctx {
struct mp_log *log;
-
struct ra_ctx *ra_ctx;
pl_log pllog;
diff --git a/video/out/vo_gpu_next.c b/video/out/vo_gpu_next.c
index 0ba5eb5f77..0b78bf0acc 100644
--- a/video/out/vo_gpu_next.c
+++ b/video/out/vo_gpu_next.c
@@ -32,15 +32,22 @@
#include "options/path.h"
#include "osdep/io.h"
#include "stream/stream.h"
-#include "video/mp_image.h"
#include "video/fmt-conversion.h"
+#include "video/mp_image.h"
+#include "video/out/placebo/ra_pl.h"
#include "placebo/utils.h"
#include "gpu/context.h"
+#include "gpu/hwdec.h"
#include "gpu/video.h"
#include "gpu/video_shaders.h"
#include "sub/osd.h"
#include "gpu_next/context.h"
+#if HAVE_GL && defined(PL_HAVE_OPENGL)
+#include <libplacebo/opengl.h>
+#include "video/out/opengl/ra_gl.h"
+#endif
+
struct osd_entry {
pl_tex tex;
struct pl_overlay_part *parts;
@@ -75,6 +82,11 @@ struct priv {
struct mpv_global *global;
struct ra_ctx *ra_ctx;
struct gpu_ctx *context;
+ struct ra_hwdec_ctx hwdec_ctx;
+
+ // Pooled/cached mappers, for performance
+ struct ra_hwdec_mapper **hwdec_mappers;
+ int num_hwdec_mappers;
pl_log pllog;
pl_gpu gpu;
@@ -306,6 +318,7 @@ struct frame_priv {
struct vo *vo;
struct osd_state subs;
uint64_t osd_sync;
+ struct ra_hwdec_mapper *hwdec_mapper;
};
static int plane_data_from_imgfmt(struct pl_plane_data out_data[4],
@@ -441,19 +454,70 @@ static struct pl_color_space get_mpi_csp(struct vo *vo, struct mp_image *mpi)
return csp;
}
+// For RAs not based on ra_pl, this creates a new pl_tex wrapper
+static pl_tex hwdec_get_tex(struct frame_priv *fp, int n)
+{
+ struct priv *p = fp->vo->priv;
+ struct ra_tex *ratex = fp->hwdec_mapper->tex[n];
+ struct ra *ra = fp->hwdec_mapper->ra;
+ if (ra_pl_get(ra))
+ return (pl_tex) ratex->priv;
+
+#if HAVE_GL && defined(PL_HAVE_OPENGL)
+ if (ra_is_gl(ra) && pl_opengl_get(p->gpu)) {
+ struct pl_opengl_wrap_params par = {
+ .width = ratex->params.w,
+ .height = ratex->params.h,
+ };
+
+ ra_gl_get_format(ratex->params.format, &par.iformat,
+ &(GLenum){0}, &(GLenum){0});
+ ra_gl_get_raw_tex(ra, ratex, &par.texture, &par.target);
+ return pl_opengl_wrap(p->gpu, &par);
+ }
+#endif
+
+ // TODO: d3d11 wrapping/unwrapping
+
+ MP_ERR(p, "Failed mapping hwdec frame? Open a bug!\n");
+ return false;
+}
+
static bool map_frame(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src,
struct pl_frame *frame)
{
struct mp_image *mpi = src->frame_data;
const struct mp_image_params *par = &mpi->params;
struct frame_priv *fp = mpi->priv;
- struct pl_plane_data data[4] = {0};
struct vo *vo = fp->vo;
struct priv *p = vo->priv;
- // TODO: implement support for hwdec wrappers
+ struct ra_hwdec *hwdec = ra_hwdec_get(&p->hwdec_ctx, mpi->imgfmt);
+ if (hwdec) {
+ if (MP_TARRAY_POP(p->hwdec_mappers, p->num_hwdec_mappers, &fp->hwdec_mapper)) {
+ if (!mp_image_params_equal(&mpi->params, &fp->hwdec_mapper->src_params))
+ ra_hwdec_mapper_free(&fp->hwdec_mapper);
+ }
+
+ if (!fp->hwdec_mapper) {
+ fp->hwdec_mapper = ra_hwdec_mapper_create(hwdec, &mpi->params);
+ if (!fp->hwdec_mapper) {
+ MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
+ return false;
+ }
+ }
+
+ if (ra_hwdec_mapper_map(fp->hwdec_mapper, mpi) < 0) {
+ MP_ERR(p, "Mapping hardware decoded surface failed.\n");
+ MP_TARRAY_APPEND(p, p->hwdec_mappers, p->num_hwdec_mappers, fp->hwdec_mapper);
+ fp->hwdec_mapper = NULL;
+ return false;
+ }
+
+ par = &fp->hwdec_mapper->dst_params;
+ }
+
*frame = (struct pl_frame) {
- .num_planes = mpi->num_planes,
.color = get_mpi_csp(vo, mpi),
.repr = {
.sys = mp_csp_to_pl(par->color.space),
@@ -485,43 +549,71 @@ static bool map_frame(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src
default: break;
}
- enum pl_chroma_location chroma = mp_chroma_to_pl(par->chroma_location);
- int planes = plane_data_from_imgfmt(data, &frame->repr.bits, mpi->imgfmt);
- for (int n = 0; n < planes; n++) {
- struct pl_plane *plane = &frame->planes[n];
- data[n].width = mp_image_plane_w(mpi, n);
- data[n].height = mp_image_plane_h(mpi, n);
- if (mpi->stride[n] < 0) {
- data[n].pixels = mpi->planes[n] + (data[n].height - 1) * mpi->stride[n];
- data[n].row_stride = -mpi->stride[n];
- plane->flipped = true;
- } else {
- data[n].pixels = mpi->planes[n];
- data[n].row_stride = mpi->stride[n];
+ if (hwdec) {
+
+ struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(par->imgfmt);
+ frame->num_planes = desc.num_planes;
+ for (int n = 0; n < frame->num_planes; n++) {
+ struct pl_plane *plane = &frame->planes[n];
+ plane->texture = hwdec_get_tex(fp, n);
+ if (!plane->texture)
+ return false;
+
+ int *map = plane->component_mapping;
+ for (int c = 0; c < mp_imgfmt_desc_get_num_comps(&desc); c++) {
+ if (desc.comps[c].plane != n)
+ continue;
+
+ // Sort by component offset
+ uint8_t offset = desc.comps[c].offset;
+ int index = plane->components++;
+ while (index > 0 && desc.comps[map[index - 1]].offset > offset) {
+ map[index] = map[index - 1];
+ index--;
+ }
+ map[index] = c;
+ }
}
- pl_buf buf = get_dr_buf(mpi);
- if (buf) {
- data[n].buf = buf;
- data[n].buf_offset = (uint8_t *) data[n].pixels - buf->data;
- data[n].pixels = NULL;
- } else if (gpu->limits.callbacks) {
- data[n].callback = talloc_free;
- data[n].priv = mp_image_new_ref(mpi);
- }
+ } else { // swdec
+
+ struct pl_plane_data data[4] = {0};
+ frame->num_planes = plane_data_from_imgfmt(data, &frame->repr.bits, mpi->imgfmt);
+ for (int n = 0; n < frame->num_planes; n++) {
+ struct pl_plane *plane = &frame->planes[n];
+ data[n].width = mp_image_plane_w(mpi, n);
+ data[n].height = mp_image_plane_h(mpi, n);
+ if (mpi->stride[n] < 0) {
+ data[n].pixels = mpi->planes[n] + (data[n].height - 1) * mpi->stride[n];
+ data[n].row_stride = -mpi->stride[n];
+ plane->flipped = true;
+ } else {
+ data[n].pixels = mpi->planes[n];
+ data[n].row_stride = mpi->stride[n];
+ }
- if (!pl_upload_plane(gpu, plane, &tex[n], &data[n])) {
- MP_ERR(vo, "Failed uploading frame!\n");
- talloc_free(data[n].priv);
- return false;
- }
+ pl_buf buf = get_dr_buf(mpi);
+ if (buf) {
+ data[n].buf = buf;
+ data[n].buf_offset = (uint8_t *) data[n].pixels - buf->data;
+ data[n].pixels = NULL;
+ } else if (gpu->limits.callbacks) {
+ data[n].callback = talloc_free;
+ data[n].priv = mp_image_new_ref(mpi);
+ }
- if (mpi->fmt.xs[n] || mpi->fmt.ys[n]) {
- pl_chroma_location_offset(chroma, &plane->shift_x, &plane->shift_y);
- plane->shift_y = -plane->shift_y;
+ if (!pl_upload_plane(gpu, plane, &tex[n], &data[n])) {
+ MP_ERR(vo, "Failed uploading frame!\n");
+ talloc_free(data[n].priv);
+ return false;
+ }
}
+
}
+ // Update chroma location, must be done after initializing planes
+ pl_frame_set_chroma_location(frame, mp_chroma_to_pl(par->chroma_location));
+
#ifdef PL_HAVE_LAV_DOLBY_VISION
if (mpi->dovi) {
const AVDOVIMetadata *metadata = (AVDOVIMetadata *) mpi->dovi->data;
@@ -558,6 +650,17 @@ static void unmap_frame(pl_gpu gpu, struct pl_frame *frame,
struct mp_image *mpi = src->frame_data;
struct frame_priv *fp = mpi->priv;
struct priv *p = fp->vo->priv;
+ if (fp->hwdec_mapper) {
+ // Clean up after wrapped plane textures
+ if (!ra_pl_get(fp->hwdec_mapper->ra)) {
+ for (int n = 0; n < frame->num_planes; n++)
+ pl_tex_destroy(p->gpu, &frame->planes[n].texture);
+ }
+
+ ra_hwdec_mapper_unmap(fp->hwdec_mapper);
+ MP_TARRAY_APPEND(p, p->hwdec_mappers, p->num_hwdec_mappers, fp->hwdec_mapper);
+ fp->hwdec_mapper = NULL;
+ }
for (int i = 0; i < MP_ARRAY_SIZE(fp->subs.entries); i++) {
pl_tex tex = fp->subs.entries[i].tex;
if (tex)
@@ -844,6 +947,9 @@ static void get_vsync(struct vo *vo, struct vo_vsync_info *info)
static int query_format(struct vo *vo, int format)
{
struct priv *p = vo->priv;
+ if (ra_hwdec_get(&p->hwdec_ctx, format))
+ return true;
+
struct pl_bit_encoding bits;
struct pl_plane_data data[4] = {0};
int planes = plane_data_from_imgfmt(data, &bits, format);
@@ -1086,6 +1192,10 @@ static int control(struct vo *vo, uint32_t request, void *data)
case VOCTRL_EXTERNAL_RESIZE:
reconfig(vo, NULL);
return true;
+
+ case VOCTRL_LOAD_HWDEC_API:
+ ra_hwdec_ctx_load_fmt(&p->hwdec_ctx, vo->hwdec_devs, (intptr_t) data);
+ return true;
}
int events = 0;
@@ -1144,6 +1254,14 @@ static void uninit(struct vo *vo)
for (int i = 0; i < p->num_user_hooks; i++)
pl_mpv_user_shader_destroy(&p->user_hooks[i].hook);
+ if (vo->hwdec_devs) {
+ for (int n = 0; n < p->num_hwdec_mappers; n++)
+ ra_hwdec_mapper_free(&p->hwdec_mappers[n]);
+ ra_hwdec_ctx_uninit(&p->hwdec_ctx);
+ hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL);
+ hwdec_devices_destroy(vo->hwdec_devs);
+ }
+
char *cache_file = get_cache_file(p);
if (cache_file) {
FILE *cache = fopen(cache_file, "wb");
@@ -1167,6 +1285,11 @@ static void uninit(struct vo *vo)
gpu_ctx_destroy(&p->context);
}
+static void load_hwdec_api(void *ctx, int imgfmt)
+{
+ vo_control(ctx, VOCTRL_LOAD_HWDEC_API, (void *)(intptr_t) imgfmt);
+}
+
static int preinit(struct vo *vo)
{
struct priv *p = vo->priv;
@@ -1176,7 +1299,6 @@ static int preinit(struct vo *vo)
p->log = vo->log;
struct gl_video_opts *gl_opts = p->opts_cache->opts;
-
p->context = gpu_ctx_create(vo, gl_opts);
if (!p->context)
goto err_out;
@@ -1185,6 +1307,15 @@ static int preinit(struct vo *vo)
p->pllog = p->context->pllog;
p->gpu = p->context->gpu;
p->sw = p->context->swapchain;
+ p->hwdec_ctx = (struct ra_hwdec_ctx) {
+ .log = p->log,
+ .global = p->global,
+ .ra = p->ra_ctx->ra,
+ };
+
+ vo->hwdec_devs = hwdec_devices_create();
+ hwdec_devices_set_loader(vo->hwdec_devs, load_hwdec_api, vo);
+ ra_hwdec_ctx_init(&p->hwdec_ctx, vo->hwdec_devs, gl_opts->hwdec_interop, false);
p->rr = pl_renderer_create(p->pllog, p->gpu);
p->queue = pl_queue_create(p->gpu);