video: add vaapi decode and output support

This is based on the MPlayer VA API patches. To be exact it's based on a very stripped down version of commit f1ad459a263f8537f6c from git://gitorious.org/vaapi/mplayer.git. This doesn't contain useless things like benchmarking hacks and the demo code for GLX interop. Also, unlike in the original patch, decoding and video output are split into separate source files (the separation between decoding and display also makes pixel format hacks unnecessary). On the other hand, some features not present in the original patch were added, like screenshot support. VA API is rather bad for actual video output. Dealing with older libva versions or the completely broken vdpau backend doesn't help. OSD is low quality and should be rather slow. In some cases, only either OSD or subtitles can be shown at the same time (because OSD is drawn first, OSD is prefered). Also, libva can't decide whether it accepts straight or premultiplied alpha for OSD sub-pictures: the vdpau backend seems to assume premultiplied, while a native vaapi driver uses straight. So I picked straight alpha. It doesn't matter much, because the blending code for straight alpha I added to img_convert.c is probably buggy, and ASS subtitles might be blended incorrectly. Really good video output with VA API would probably use OpenGL and the GL interop features, but at this point you might just use vo_opengl. (Patches for making HW decoding with vo_opengl have a chance of being accepted.) Despite these issues, decoding seems to work ok. I still got tearing on the Intel system I tested (Intel(R) Core(TM) i3-2350M). It was also tested with the vdpau vaapi wrapper on a nvidia system; however this was rather broken. (Fortunately, there is no reason to use mpv's VAAPI support over native VDPAU.)
author: wm4 <wm4@nowhere> 2013-08-09 14:01:30 +0200
committer: wm4 <wm4@nowhere> 2013-08-12 01:12:02 +0200
commit: 2827295703c74e3c119df9a435aa856e268c2ea9 (patch)
tree: 7464b1630d76e84f1abdf53680544a74b7dab300 /video/decode
parent: c7da4ba74469bc6c404c396340ffadc748535f6e (diff)
download: mpv-2827295703c74e3c119df9a435aa856e268c2ea9.tar.bz2
mpv-2827295703c74e3c119df9a435aa856e268c2ea9.tar.xz
4 files changed, 420 insertions, 0 deletions
diff --git a/video/decode/dec_video.h b/video/decode/dec_video.h
index 4ba052afd1..021abaaf22 100644
--- a/video/decode/dec_video.h
+++ b/video/decode/dec_video.h
@@ -51,6 +51,7 @@ extern int divx_quality;
 // The VO can set the context pointer for supported APIs.
 struct mp_hwdec_info {
     struct mp_vdpau_ctx *vdpau_ctx;
+    struct mp_vaapi_ctx *vaapi_ctx;
 };
 
 #endif /* MPLAYER_DEC_VIDEO_H */
diff --git a/video/decode/lavc.h b/video/decode/lavc.h
index 94973cb2d8..4252034b2c 100644
--- a/video/decode/lavc.h
+++ b/video/decode/lavc.h
@@ -17,6 +17,7 @@ enum hwdec_type {
     HWDEC_VDPAU = 1,
     HWDEC_VDA = 2,
     HWDEC_CRYSTALHD = 3,
+    HWDEC_VAAPI = 4,
 };
 
 typedef struct lavc_ctx {
diff --git a/video/decode/vaapi.c b/video/decode/vaapi.c
new file mode 100644
index 0000000000..baa13588c9
--- /dev/null
+++ b/video/decode/vaapi.c
@@ -0,0 +1,414 @@
+/*
+ * This file is part of mpv.
+ *
+ * With some chunks from original MPlayer VAAPI patch:
+ * Copyright (C) 2008-2009 Splitted-Desktop Systems
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <assert.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavcodec/vaapi.h>
+#include <libavutil/common.h>
+
+#include "lavc.h"
+#include "mpvcore/mp_common.h"
+#include "mpvcore/av_common.h"
+#include "video/fmt-conversion.h"
+#include "video/vaapi.h"
+#include "video/decode/dec_video.h"
+
+/*
+ * The VAAPI decoder can work only with surfaces passed to the decoder at
+ * creation time. This means all surfaces have to be created in advance.
+ * So, additionally to the maximum number of reference frames, we need
+ * surfaces for:
+ * - 1 decode frame
+ * - decoding 1 frame ahead (done by generic playback code)
+ * - keeping the reference to the previous frame (done by vo_vaapi.c)
+ * Note that redundant additional surfaces also might allow for some
+ * buffering (i.e. not trying to reuse a surface while it's busy).
+ */
+#define ADDTIONAL_SURFACES 3
+
+// Magic number taken from original MPlayer vaapi patch.
+#define MAX_DECODER_SURFACES 21
+
+#define MAX_SURFACES (MAX_DECODER_SURFACES + ADDTIONAL_SURFACES)
+
+struct priv {
+    struct mp_vaapi_ctx *ctx;
+    VADisplay display;
+
+    // libavcodec shared struct
+    struct vaapi_context *va_context;
+    struct vaapi_context va_context_storage;
+
+    int format, w, h;
+    VASurfaceID surfaces[MAX_SURFACES];
+};
+
+struct profile_entry {
+    enum AVCodecID av_codec;
+    int ff_profile;
+    VAProfile va_profile;
+    int maxrefs;
+};
+
+#define PE(av_codec_id, ff_profile, va_dcoder_profile, maxrefs) \
+    {AV_CODEC_ID_ ## av_codec_id,                               \
+     FF_PROFILE_ ## ff_profile,                                 \
+     VAProfile ## va_dcoder_profile,                            \
+     maxrefs}
+
+static const struct profile_entry profiles[] = {
+    PE(MPEG2VIDEO,  MPEG2_SIMPLE,               MPEG2Simple,    2),
+    PE(MPEG2VIDEO,  UNKNOWN,                    MPEG2Main,      2),
+    PE(H264,        H264_BASELINE,              H264Baseline,   16),
+    PE(H264,        H264_CONSTRAINED_BASELINE,  H264ConstrainedBaseline, 16),
+    PE(H264,        H264_MAIN,                  H264Main,       16),
+    PE(H264,        UNKNOWN,                    H264High,       16),
+    PE(WMV3,        VC1_SIMPLE,                 VC1Simple,      2),
+    PE(WMV3,        VC1_MAIN,                   VC1Main,        2),
+    PE(WMV3,        UNKNOWN,                    VC1Advanced,    2),
+    PE(VC1,         VC1_SIMPLE,                 VC1Simple,      2),
+    PE(VC1,         VC1_MAIN,                   VC1Main,        2),
+    PE(VC1,         UNKNOWN,                    VC1Advanced,    2),
+    // No idea whether these are correct
+    PE(MPEG4,       MPEG4_SIMPLE,               MPEG4Simple,    2),
+    PE(MPEG4,       MPEG4_MAIN,                 MPEG4Main,      2),
+    PE(MPEG4,       UNKNOWN,                    MPEG4AdvancedSimple, 2),
+};
+
+static const struct profile_entry *find_codec(enum AVCodecID id, int ff_profile)
+{
+    for (int n = 0; n < MP_ARRAY_SIZE(profiles); n++) {
+        if (profiles[n].av_codec == id &&
+            (profiles[n].ff_profile == ff_profile ||
+             profiles[n].ff_profile == FF_PROFILE_UNKNOWN))
+        {
+            return &profiles[n];
+        }
+    }
+    return NULL;
+}
+
+
+static const char *str_va_profile(VAProfile profile)
+{
+    switch (profile) {
+#define PROFILE(profile) \
+        case VAProfile##profile: return "VAProfile" #profile
+        PROFILE(MPEG2Simple);
+        PROFILE(MPEG2Main);
+        PROFILE(MPEG4Simple);
+        PROFILE(MPEG4AdvancedSimple);
+        PROFILE(MPEG4Main);
+        PROFILE(H264Baseline);
+        PROFILE(H264Main);
+        PROFILE(H264High);
+        PROFILE(VC1Simple);
+        PROFILE(VC1Main);
+        PROFILE(VC1Advanced);
+#undef PROFILE
+    }
+    return "<unknown>";
+}
+
+static int find_entrypoint(int format, VAEntrypoint *ep, int num_ep)
+{
+    int entrypoint = -1;
+    switch (format) {
+    case IMGFMT_VAAPI:              entrypoint = VAEntrypointVLD;    break;
+    case IMGFMT_VAAPI_MPEG2_IDCT:   entrypoint = VAEntrypointIDCT;   break;
+    case IMGFMT_VAAPI_MPEG2_MOCO:   entrypoint = VAEntrypointMoComp; break;
+    }
+    for (int n = 0; n < num_ep; n++) {
+        if (ep[n] == entrypoint)
+            return entrypoint;
+    }
+    return -1;
+}
+
+static int is_direct_mapping(VADisplay display)
+{
+    VADisplayAttribute attr;
+    VAStatus status;
+
+#if VA_CHECK_VERSION(0,34,0)
+    attr.type  = VADisplayAttribRenderMode;
+    attr.flags = VA_DISPLAY_ATTRIB_GETTABLE;
+
+    status = vaGetDisplayAttributes(display, &attr, 1);
+    if (status == VA_STATUS_SUCCESS)
+        return !(attr.value & (VA_RENDER_MODE_LOCAL_OVERLAY|
+                               VA_RENDER_MODE_EXTERNAL_OVERLAY));
+#else
+    /* If the driver doesn't make a copy of the VA surface for
+       display, then we have to retain it until it's no longer the
+       visible surface. In other words, if the driver is using
+       DirectSurface mode, we don't want to decode the new surface
+       into the previous one that was used for display. */
+    attr.type  = VADisplayAttribDirectSurface;
+    attr.flags = VA_DISPLAY_ATTRIB_GETTABLE;
+
+    status = vaGetDisplayAttributes(display, &attr, 1);
+    if (status == VA_STATUS_SUCCESS)
+        return !attr.value;
+#endif
+    return 0;
+}
+
+// Make vo_vaapi.c pool the required number of surfaces.
+// This is very touchy: vo_vaapi.c must not free surfaces while we decode,
+// and we must allocate only surfaces that were passed to the decoder on
+// creation.
+// We achieve this by deleting all previous surfaces, then allocate every
+// surface needed. Then we free these surfaces, and rely on the fact that
+// vo_vaapi.c keeps the released surfaces in the pool, and only allocates
+// new surfaces out of that pool.
+static int preallocate_surfaces(struct lavc_ctx *ctx, int va_rt_format, int num)
+{
+    struct priv *p = ctx->hwdec_priv;
+    int res = -1;
+
+    struct mp_image *tmp_surfaces[MAX_SURFACES] = {0};
+
+    p->ctx->flush(p->ctx); // free previously allocated surfaces
+
+    for (int n = 0; n < num; n++) {
+        tmp_surfaces[n] = p->ctx->get_surface(p->ctx, va_rt_format, p->format,
+                                              p->w, p->h);
+        if (!tmp_surfaces[n])
+            goto done;
+        p->surfaces[n] = (uintptr_t)tmp_surfaces[n]->planes[3];
+    }
+    res = 0;
+
+done:
+    for (int n = 0; n < num; n++)
+        talloc_free(tmp_surfaces[n]);
+    return res;
+}
+
+static void destroy_decoder(struct lavc_ctx *ctx)
+{
+    struct priv *p = ctx->hwdec_priv;
+
+    if (p->va_context->context_id != VA_INVALID_ID) {
+        vaDestroyContext(p->display, p->va_context->context_id);
+        p->va_context->context_id = VA_INVALID_ID;
+    }
+
+    if (p->va_context->config_id != VA_INVALID_ID) {
+        vaDestroyConfig(p->display, p->va_context->config_id);
+        p->va_context->config_id = VA_INVALID_ID;
+    }
+
+    for (int n = 0; n < MAX_SURFACES; n++)
+        p->surfaces[n] = VA_INVALID_ID;
+}
+
+static int create_decoder(struct lavc_ctx *ctx)
+{
+    void *tmp = talloc_new(NULL);
+
+    struct priv *p = ctx->hwdec_priv;
+    VAStatus status;
+    int res = -1;
+
+    assert(IMGFMT_IS_VAAPI(p->format));
+
+    destroy_decoder(ctx);
+
+    const struct profile_entry *pe = find_codec(ctx->avctx->codec_id,
+                                                ctx->avctx->profile);
+    if (!pe) {
+        mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Unknown codec!\n");
+        goto error;
+    }
+
+    int num_profiles = vaMaxNumProfiles(p->display);
+    VAProfile *va_profiles = talloc_zero_array(tmp, VAProfile, num_profiles);
+    status = vaQueryConfigProfiles(p->display, va_profiles, &num_profiles);
+    if (!check_va_status(status, "vaQueryConfigProfiles()"))
+        goto error;
+    mp_msg(MSGT_VO, MSGL_DBG2, "[vaapi] %d profiles available:\n", num_profiles);
+    for (int i = 0; i < num_profiles; i++)
+        mp_msg(MSGT_VO, MSGL_DBG2, "  %s\n", str_va_profile(va_profiles[i]));
+
+    bool profile_found = false;
+    for (int i = 0; i < num_profiles; i++) {
+        if (pe->va_profile == va_profiles[i]) {
+            profile_found = true;
+            break;
+        }
+    }
+    if (!profile_found) {
+        mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Profile '%s' not available.\n",
+               str_va_profile(pe->va_profile));
+        goto error;
+    }
+
+    int num_surfaces = pe->maxrefs;
+    if (!is_direct_mapping(p->display)) {
+        mp_msg(MSGT_VO, MSGL_V, "[vaapi] No direct mapping.\n");
+        // Note: not sure why it has to be *=2 rather than +=1.
+        num_surfaces *= 2;
+    }
+    num_surfaces = MPMIN(num_surfaces, MAX_DECODER_SURFACES) + ADDTIONAL_SURFACES;
+
+    if (num_surfaces > MAX_SURFACES) {
+        mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Internal error: too many surfaces.\n");
+        goto error;
+    }
+
+    if (preallocate_surfaces(ctx, VA_RT_FORMAT_YUV420, num_surfaces) < 0) {
+        mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Could not allocate surfaces.\n");
+        goto error;
+    }
+
+    int num_ep = vaMaxNumEntrypoints(p->display);
+    VAEntrypoint *ep = talloc_zero_array(tmp, VAEntrypoint, num_ep);
+    status = vaQueryConfigEntrypoints(p->display, pe->va_profile, ep, &num_ep);
+    if (!check_va_status(status, "vaQueryConfigEntrypoints()"))
+        goto error;
+
+    VAEntrypoint entrypoint = find_entrypoint(p->format, ep, num_ep);
+    if (entrypoint < 0) {
+        mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Could not find VA entrypoint.\n");
+        goto error;
+    }
+
+    VAConfigAttrib attrib = {
+        .type = VAConfigAttribRTFormat,
+    };
+    status = vaGetConfigAttributes(p->display, pe->va_profile, entrypoint,
+                                   &attrib, 1);
+    if (!check_va_status(status, "vaGetConfigAttributes()"))
+        goto error;
+    if ((attrib.value & VA_RT_FORMAT_YUV420) == 0) {
+        mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Chroma format not supported.\n");
+        goto error;
+    }
+
+    status = vaCreateConfig(p->display, pe->va_profile, entrypoint, &attrib, 1,
+                            &p->va_context->config_id);
+    if (!check_va_status(status, "vaCreateConfig()"))
+        goto error;
+
+    status = vaCreateContext(p->display, p->va_context->config_id,
+                             p->w, p->h, VA_PROGRESSIVE,
+                             p->surfaces, num_surfaces,
+                             &p->va_context->context_id);
+    if (!check_va_status(status, "vaCreateContext()"))
+        goto error;
+
+    res = 0;
+error:
+    talloc_free(tmp);
+    return res;
+}
+
+static struct mp_image *allocate_image(struct lavc_ctx *ctx, AVFrame *frame)
+{
+    struct priv *p = ctx->hwdec_priv;
+    int format = pixfmt2imgfmt(frame->format);
+
+    if (!IMGFMT_IS_VAAPI(format))
+        return NULL;
+
+    // frame->width/height lie. Using them breaks with non-mod 16 video.
+    int w = ctx->avctx->width;
+    int h = ctx->avctx->height;
+
+    if (format != p->format || w != p->w || h != p->h ||
+        p->va_context->context_id == VA_INVALID_ID)
+    {
+        p->format = format;
+        p->w = w;
+        p->h = h;
+        if (create_decoder(ctx) < 0)
+            return NULL;
+    }
+
+    struct mp_image *img = p->ctx->get_surface(p->ctx, VA_RT_FORMAT_YUV420,
+                                               format, p->w, p->h);
+    if (img) {
+        for (int n = 0; n < MAX_SURFACES; n++) {
+            if (p->surfaces[n] == (uintptr_t)img->planes[3])
+                return img;
+        }
+        talloc_free(img);
+    }
+    mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Insufficient number of surfaces.\n");
+    return NULL;
+}
+
+static void uninit(struct lavc_ctx *ctx)
+{
+    struct priv *p = ctx->hwdec_priv;
+
+    if (!p)
+        return;
+
+    destroy_decoder(ctx);
+
+    talloc_free(p);
+    ctx->hwdec_priv = NULL;
+}
+
+static int init(struct lavc_ctx *ctx)
+{
+    struct priv *p = talloc_ptrtype(NULL, p);
+    *p = (struct priv) {
+        .ctx = ctx->hwdec_info->vaapi_ctx,
+        .va_context = &p->va_context_storage,
+    };
+    ctx->hwdec_priv = p;
+
+    p->display = p->ctx->display;
+
+    p->va_context->display = p->display;
+    p->va_context->config_id = VA_INVALID_ID;
+    p->va_context->context_id = VA_INVALID_ID;
+
+    ctx->avctx->hwaccel_context = p->va_context;
+
+    return 0;
+}
+
+
+static int probe(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info,
+                 const char *decoder)
+{
+    if (!info || !info->vaapi_ctx)
+        return HWDEC_ERR_NO_CTX;
+    if (!find_codec(mp_codec_to_av_codec_id(decoder), FF_PROFILE_UNKNOWN))
+        return HWDEC_ERR_NO_CODEC;
+    return 0;
+}
+
+const struct vd_lavc_hwdec mp_vd_lavc_vaapi = {
+    .type = HWDEC_VAAPI,
+    .image_formats = (const int[]) {IMGFMT_VAAPI, IMGFMT_VAAPI_MPEG2_IDCT,
+                                    IMGFMT_VAAPI_MPEG2_MOCO, 0},
+    .probe = probe,
+    .init = init,
+    .uninit = uninit,
+    .allocate_image = allocate_image,
+};
diff --git a/video/decode/vd_lavc.c b/video/decode/vd_lavc.c
index 2fc7a1ea4c..639e46ebcc 100644
--- a/video/decode/vd_lavc.c
+++ b/video/decode/vd_lavc.c
@@ -84,6 +84,7 @@ const m_option_t lavc_decode_opts_conf[] = {
 
 const struct vd_lavc_hwdec mp_vd_lavc_vdpau;
 const struct vd_lavc_hwdec mp_vd_lavc_vdpau_old;
+const struct vd_lavc_hwdec mp_vd_lavc_vaapi;
 
 static const struct vd_lavc_hwdec mp_vd_lavc_crystalhd = {
     .type = HWDEC_CRYSTALHD,
@@ -113,6 +114,9 @@ static const struct vd_lavc_hwdec *hwdec_list[] = {
 #endif // CONFIG_VDPAU
     &mp_vd_lavc_vda,
     &mp_vd_lavc_crystalhd,
+#if CONFIG_VAAPI
+    &mp_vd_lavc_vaapi,
+#endif
     NULL
 };
author	wm4 <wm4@nowhere>	2013-08-09 14:01:30 +0200
committer	wm4 <wm4@nowhere>	2013-08-12 01:12:02 +0200
commit	2827295703c74e3c119df9a435aa856e268c2ea9 (patch)
tree	7464b1630d76e84f1abdf53680544a74b7dab300 /video/decode
parent	c7da4ba74469bc6c404c396340ffadc748535f6e (diff)
download	mpv-2827295703c74e3c119df9a435aa856e268c2ea9.tar.bz2 mpv-2827295703c74e3c119df9a435aa856e268c2ea9.tar.xz