summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--DOCS/man/options.rst4
-rw-r--r--video/decode/cuda.c3
-rw-r--r--video/fmt-conversion.c3
-rw-r--r--video/img_format.h4
-rw-r--r--video/out/opengl/hwdec_cuda.c53
5 files changed, 55 insertions, 12 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index bf98370eee..82c51fc503 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -703,8 +703,8 @@ Video
mechanism in the opengl output path. To use this deinterlacing you
must pass the option: ``vd-lavc-o=deint=[weave|bob|adaptive]``. Pass
``weave`` to not attempt any deinterlacing.
- 10bit HEVC is available if the hardware supports it but it will be
- rounded down to 8 bits.
+ 10 and 12bit HEVC is available if the hardware supports it and a
+ sufficiently new driver (> 375.xx) is used.
``cuda-copy`` has the same behaviour as ``cuda`` - including the ability
to deinterlace inside the decoder. However, traditional deinterlacing
diff --git a/video/decode/cuda.c b/video/decode/cuda.c
index f9dd418fd5..b606315906 100644
--- a/video/decode/cuda.c
+++ b/video/decode/cuda.c
@@ -21,6 +21,7 @@
#include <libavutil/hwcontext_cuda.h>
#include "common/av_common.h"
+#include "video/fmt-conversion.h"
#include "video/decode/lavc.h"
typedef struct CUVIDContext {
@@ -114,7 +115,7 @@ static void uninit(struct lavc_ctx *ctx)
static struct mp_image *process_image(struct lavc_ctx *ctx, struct mp_image *img)
{
if (img->imgfmt == IMGFMT_CUDA)
- img->params.hw_subfmt = IMGFMT_NV12;
+ img->params.hw_subfmt = pixfmt2imgfmt(ctx->avctx->sw_pix_fmt);
return img;
}
diff --git a/video/fmt-conversion.c b/video/fmt-conversion.c
index 32330ddeb1..8b991c5d19 100644
--- a/video/fmt-conversion.c
+++ b/video/fmt-conversion.c
@@ -112,6 +112,9 @@ static const struct {
#ifdef AV_PIX_FMT_P010
{IMGFMT_P010, AV_PIX_FMT_P010},
#endif
+#ifdef AV_PIX_FMT_P016
+ {IMGFMT_P016, AV_PIX_FMT_P016},
+#endif
{0, AV_PIX_FMT_NONE}
};
diff --git a/video/img_format.h b/video/img_format.h
index a91dcf865c..ee731aa51c 100644
--- a/video/img_format.h
+++ b/video/img_format.h
@@ -151,8 +151,10 @@ enum mp_imgfmt {
IMGFMT_NV12,
IMGFMT_NV21,
- // Like IMGFMT_NV12, but with 16 bits per component
+ // Like IMGFMT_NV12, but with 10 bits per component (and 6 bits of padding)
IMGFMT_P010,
+ // Like IMGFMT_NV12, but with 16 bits per component
+ IMGFMT_P016,
// RGB/BGR Formats
diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c
index 539acbd4ba..4dc842706c 100644
--- a/video/out/opengl/hwdec_cuda.c
+++ b/video/out/opengl/hwdec_cuda.c
@@ -42,7 +42,7 @@ struct priv {
GLuint gl_textures[2];
CUgraphicsResource cu_res[2];
CUarray cu_array[2];
- bool mapped;
+ int sample_width;
CUcontext cuda_ctx;
};
@@ -81,7 +81,21 @@ static struct mp_image *cuda_download_image(struct mp_hwdec_ctx *ctx,
if (hw_image->imgfmt != IMGFMT_CUDA)
return NULL;
- struct mp_image *out = mp_image_pool_get(swpool, IMGFMT_NV12,
+ int sample_width;
+ switch (hw_image->params.hw_subfmt) {
+ case IMGFMT_NV12:
+ sample_width = 1;
+ break;
+ case IMGFMT_P010:
+ case IMGFMT_P016:
+ sample_width = 2;
+ break;
+ default:
+ return NULL;
+ }
+
+ struct mp_image *out = mp_image_pool_get(swpool,
+ hw_image->params.hw_subfmt,
hw_image->w, hw_image->h);
if (!out)
return NULL;
@@ -101,7 +115,8 @@ static struct mp_image *cuda_download_image(struct mp_hwdec_ctx *ctx,
.dstHost = out->planes[n],
.srcPitch = hw_image->stride[n],
.dstPitch = out->stride[n],
- .WidthInBytes = mp_image_plane_w(out, n) * (n + 1),
+ .WidthInBytes = mp_image_plane_w(out, n) *
+ (n + 1) * sample_width,
.Height = mp_image_plane_h(out, n),
};
@@ -176,11 +191,32 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params)
int ret = 0, eret = 0;
assert(params->imgfmt == hw->driver->imgfmt);
- params->imgfmt = IMGFMT_NV12;
+ params->imgfmt = params->hw_subfmt;
params->hw_subfmt = 0;
mp_image_set_params(&p->layout, params);
+ GLint luma_format, chroma_format;
+ GLenum type;
+ switch (params->imgfmt) {
+ case IMGFMT_NV12:
+ luma_format = GL_R8;
+ chroma_format = GL_RG8;
+ type = GL_UNSIGNED_BYTE;
+ p->sample_width = 1;
+ break;
+ case IMGFMT_P010:
+ case IMGFMT_P016:
+ luma_format = GL_R16;
+ chroma_format = GL_RG16;
+ type = GL_UNSIGNED_SHORT;
+ p->sample_width = 2;
+ break;
+ default:
+ MP_ERR(hw, "Unsupported format: %s\n", mp_imgfmt_to_name(params->imgfmt));
+ return -1;
+ }
+
ret = CHECK_CU(cuCtxPushCurrent(p->cuda_ctx));
if (ret < 0)
return ret;
@@ -193,10 +229,10 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params)
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter);
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- gl->TexImage2D(GL_TEXTURE_2D, 0, n == 0 ? GL_R8 : GL_RG8,
+ gl->TexImage2D(GL_TEXTURE_2D, 0, n == 0 ? luma_format : chroma_format,
mp_image_plane_w(&p->layout, n),
mp_image_plane_h(&p->layout, n),
- 0, n == 0 ? GL_RED : GL_RG, GL_UNSIGNED_BYTE, NULL);
+ 0, n == 0 ? GL_RED : GL_RG, type, NULL);
gl->BindTexture(GL_TEXTURE_2D, 0);
ret = CHECK_CU(cuGraphicsGLRegisterImage(&p->cu_res[n], p->gl_textures[n],
@@ -261,7 +297,7 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image,
for (int n = 0; n < 2; n++) {
// widthInBytes must account for the chroma plane
- // elements being two bytes wide.
+ // elements being two samples wide.
CUDA_MEMCPY2D cpy = {
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
.dstMemoryType = CU_MEMORYTYPE_ARRAY,
@@ -269,7 +305,8 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image,
.srcPitch = hw_image->stride[n],
.srcY = 0,
.dstArray = p->cu_array[n],
- .WidthInBytes = mp_image_plane_w(&p->layout, n) * (n + 1),
+ .WidthInBytes = mp_image_plane_w(&p->layout, n) *
+ (n + 1) * p->sample_width,
.Height = mp_image_plane_h(&p->layout, n),
};
ret = CHECK_CU(cuMemcpy2D(&cpy));