vo_opengl: hwdec_cuda: Support P016 output surfaces

The latest 375.xx nvidia drivers add support for P016 output surfaces. In combination with an ffmpeg change to return those surfaces, we can display them. The bulk of the work is related to knowing which format you're dealing with at the right time. Once you know, it's straight forward.
author: Philip Langdale <philipl@overt.org> 2016-11-19 13:57:23 -0800
committer: wm4 <wm4@nowhere> 2016-11-22 20:19:58 +0100
commit: 585c5c34f1195007beb012668aa9a22cb47b1f37 (patch)
tree: 216b2254833461835c15638beacda26e9e3edb4a /video
parent: 5087816a7431caf27a5a8a9e00d0004d8322cdaa (diff)
download: mpv-585c5c34f1195007beb012668aa9a22cb47b1f37.tar.bz2
mpv-585c5c34f1195007beb012668aa9a22cb47b1f37.tar.xz
4 files changed, 53 insertions, 10 deletions
diff --git a/video/decode/cuda.c b/video/decode/cuda.c
index f9dd418fd5..b606315906 100644
--- a/video/decode/cuda.c
+++ b/video/decode/cuda.c
@@ -21,6 +21,7 @@
 #include <libavutil/hwcontext_cuda.h>
 
 #include "common/av_common.h"
+#include "video/fmt-conversion.h"
 #include "video/decode/lavc.h"
 
 typedef struct CUVIDContext {
@@ -114,7 +115,7 @@ static void uninit(struct lavc_ctx *ctx)
 static struct mp_image *process_image(struct lavc_ctx *ctx, struct mp_image *img)
 {
     if (img->imgfmt == IMGFMT_CUDA)
-        img->params.hw_subfmt = IMGFMT_NV12;
+        img->params.hw_subfmt = pixfmt2imgfmt(ctx->avctx->sw_pix_fmt);
     return img;
 }
 
diff --git a/video/fmt-conversion.c b/video/fmt-conversion.c
index 32330ddeb1..8b991c5d19 100644
--- a/video/fmt-conversion.c
+++ b/video/fmt-conversion.c
@@ -112,6 +112,9 @@ static const struct {
 #ifdef AV_PIX_FMT_P010
     {IMGFMT_P010, AV_PIX_FMT_P010},
 #endif
+#ifdef AV_PIX_FMT_P016
+    {IMGFMT_P016, AV_PIX_FMT_P016},
+#endif
 
     {0, AV_PIX_FMT_NONE}
 };
diff --git a/video/img_format.h b/video/img_format.h
index a91dcf865c..ee731aa51c 100644
--- a/video/img_format.h
+++ b/video/img_format.h
@@ -151,8 +151,10 @@ enum mp_imgfmt {
     IMGFMT_NV12,
     IMGFMT_NV21,
 
-    // Like IMGFMT_NV12, but with 16 bits per component
+    // Like IMGFMT_NV12, but with 10 bits per component (and 6 bits of padding)
     IMGFMT_P010,
+    // Like IMGFMT_NV12, but with 16 bits per component
+    IMGFMT_P016,
 
     // RGB/BGR Formats
 
diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c
index 539acbd4ba..4dc842706c 100644
--- a/video/out/opengl/hwdec_cuda.c
+++ b/video/out/opengl/hwdec_cuda.c
@@ -42,7 +42,7 @@ struct priv {
     GLuint gl_textures[2];
     CUgraphicsResource cu_res[2];
     CUarray cu_array[2];
-    bool mapped;
+    int sample_width;
 
     CUcontext cuda_ctx;
 };
@@ -81,7 +81,21 @@ static struct mp_image *cuda_download_image(struct mp_hwdec_ctx *ctx,
     if (hw_image->imgfmt != IMGFMT_CUDA)
         return NULL;
 
-    struct mp_image *out = mp_image_pool_get(swpool, IMGFMT_NV12,
+    int sample_width;
+    switch (hw_image->params.hw_subfmt) {
+    case IMGFMT_NV12:
+        sample_width = 1;
+        break;
+    case IMGFMT_P010:
+    case IMGFMT_P016:
+        sample_width = 2;
+        break;
+    default:
+        return NULL;
+    }
+
+    struct mp_image *out = mp_image_pool_get(swpool,
+                                             hw_image->params.hw_subfmt,
                                              hw_image->w, hw_image->h);
     if (!out)
         return NULL;
@@ -101,7 +115,8 @@ static struct mp_image *cuda_download_image(struct mp_hwdec_ctx *ctx,
             .dstHost       = out->planes[n],
             .srcPitch      = hw_image->stride[n],
             .dstPitch      = out->stride[n],
-            .WidthInBytes  = mp_image_plane_w(out, n) * (n + 1),
+            .WidthInBytes  = mp_image_plane_w(out, n) *
+                             (n + 1) * sample_width,
             .Height        = mp_image_plane_h(out, n),
         };
 
@@ -176,11 +191,32 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params)
     int ret = 0, eret = 0;
 
     assert(params->imgfmt == hw->driver->imgfmt);
-    params->imgfmt = IMGFMT_NV12;
+    params->imgfmt = params->hw_subfmt;
     params->hw_subfmt = 0;
 
     mp_image_set_params(&p->layout, params);
 
+    GLint luma_format, chroma_format;
+    GLenum type;
+    switch (params->imgfmt) {
+    case IMGFMT_NV12:
+        luma_format = GL_R8;
+        chroma_format = GL_RG8;
+        type = GL_UNSIGNED_BYTE;
+        p->sample_width = 1;
+        break;
+    case IMGFMT_P010:
+    case IMGFMT_P016:
+        luma_format = GL_R16;
+        chroma_format = GL_RG16;
+        type = GL_UNSIGNED_SHORT;
+        p->sample_width = 2;
+        break;
+    default:
+        MP_ERR(hw, "Unsupported format: %s\n", mp_imgfmt_to_name(params->imgfmt));
+        return -1;
+    }
+
     ret = CHECK_CU(cuCtxPushCurrent(p->cuda_ctx));
     if (ret < 0)
         return ret;
@@ -193,10 +229,10 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params)
         gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter);
         gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
         gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-        gl->TexImage2D(GL_TEXTURE_2D, 0, n == 0 ? GL_R8 : GL_RG8,
+        gl->TexImage2D(GL_TEXTURE_2D, 0, n == 0 ? luma_format : chroma_format,
                        mp_image_plane_w(&p->layout, n),
                        mp_image_plane_h(&p->layout, n),
-                       0, n == 0 ? GL_RED : GL_RG, GL_UNSIGNED_BYTE, NULL);
+                       0, n == 0 ? GL_RED : GL_RG, type, NULL);
         gl->BindTexture(GL_TEXTURE_2D, 0);
 
         ret = CHECK_CU(cuGraphicsGLRegisterImage(&p->cu_res[n], p->gl_textures[n],
@@ -261,7 +297,7 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image,
 
     for (int n = 0; n < 2; n++) {
         // widthInBytes must account for the chroma plane
-        // elements being two bytes wide.
+        // elements being two samples wide.
         CUDA_MEMCPY2D cpy = {
             .srcMemoryType = CU_MEMORYTYPE_DEVICE,
             .dstMemoryType = CU_MEMORYTYPE_ARRAY,
@@ -269,7 +305,8 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image,
             .srcPitch      = hw_image->stride[n],
             .srcY          = 0,
             .dstArray      = p->cu_array[n],
-            .WidthInBytes  = mp_image_plane_w(&p->layout, n) * (n + 1),
+            .WidthInBytes  = mp_image_plane_w(&p->layout, n) *
+                             (n + 1) * p->sample_width,
             .Height        = mp_image_plane_h(&p->layout, n),
         };
         ret = CHECK_CU(cuMemcpy2D(&cpy));
author	Philip Langdale <philipl@overt.org>	2016-11-19 13:57:23 -0800
committer	wm4 <wm4@nowhere>	2016-11-22 20:19:58 +0100
commit	585c5c34f1195007beb012668aa9a22cb47b1f37 (patch)
tree	216b2254833461835c15638beacda26e9e3edb4a /video
parent	5087816a7431caf27a5a8a9e00d0004d8322cdaa (diff)
download	mpv-585c5c34f1195007beb012668aa9a22cb47b1f37.tar.bz2 mpv-585c5c34f1195007beb012668aa9a22cb47b1f37.tar.xz