summaryrefslogtreecommitdiffstats
path: root/video/decode
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2016-01-25 21:00:53 +0100
committerwm4 <wm4@nowhere>2016-01-25 21:46:40 +0100
commitb53cb8de5e8552bfe142df9cea3de6907a187dc7 (patch)
tree2cd69d9b99a90389be3653bb5dc54e7bbd276ec7 /video/decode
parent271cabe6a5bd8342b56ab855d033abba63dfead6 (diff)
downloadmpv-b53cb8de5e8552bfe142df9cea3de6907a187dc7.tar.bz2
mpv-b53cb8de5e8552bfe142df9cea3de6907a187dc7.tar.xz
vd_lavc: delay images before reading them back
Facilitates hardware pipelining in particular with nvidia/dxva.
Diffstat (limited to 'video/decode')
-rw-r--r--video/decode/dxva2.c4
-rw-r--r--video/decode/lavc.h6
-rw-r--r--video/decode/vaapi.c2
-rw-r--r--video/decode/vd_lavc.c49
4 files changed, 52 insertions, 9 deletions
diff --git a/video/decode/dxva2.c b/video/decode/dxva2.c
index 2dfbae3dd3..0400b74bab 100644
--- a/video/decode/dxva2.c
+++ b/video/decode/dxva2.c
@@ -39,6 +39,8 @@
#include "video/hwdec.h"
#include "video/d3d.h"
+#define ADDTIONAL_SURFACES HWDEC_DELAY_QUEUE_COUNT
+
// A minor evil.
#ifndef FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO
#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2
@@ -574,7 +576,7 @@ static int dxva2_create_decoder(struct lavc_ctx *s, int w, int h,
surface_alignment = 16;
/* 4 base work surfaces */
- ctx->num_surfaces = 4;
+ ctx->num_surfaces = 4 + ADDTIONAL_SURFACES;
/* add surfaces based on number of possible refs */
if (codec_id == AV_CODEC_ID_H264 || codec_id == AV_CODEC_ID_HEVC)
diff --git a/video/decode/lavc.h b/video/decode/lavc.h
index 76b7ac7883..db8b26eefa 100644
--- a/video/decode/lavc.h
+++ b/video/decode/lavc.h
@@ -9,6 +9,8 @@
#include "video/mp_image.h"
#include "video/hwdec.h"
+#define HWDEC_DELAY_QUEUE_COUNT 2
+
typedef struct lavc_ctx {
struct mp_log *log;
struct MPOpts *opts;
@@ -23,6 +25,10 @@ typedef struct lavc_ctx {
bool hwdec_failed;
bool hwdec_notified;
+ struct mp_image **delay_queue;
+ int num_delay_queue;
+ int max_delay_queue;
+
// From VO
struct mp_hwdec_info *hwdec_info;
diff --git a/video/decode/vaapi.c b/video/decode/vaapi.c
index 313a901db2..4b8987cf24 100644
--- a/video/decode/vaapi.c
+++ b/video/decode/vaapi.c
@@ -44,7 +44,7 @@
* Note that redundant additional surfaces also might allow for some
* buffering (i.e. not trying to reuse a surface while it's busy).
*/
-#define ADDTIONAL_SURFACES 6
+#define ADDTIONAL_SURFACES (6 + HWDEC_DELAY_QUEUE_COUNT)
// Some upper bound.
#define MAX_SURFACES 25
diff --git a/video/decode/vd_lavc.c b/video/decode/vd_lavc.c
index f480d185b6..29e7d1108f 100644
--- a/video/decode/vd_lavc.c
+++ b/video/decode/vd_lavc.c
@@ -391,6 +391,10 @@ static void init_avctx(struct dec_video *vd, const char *decoder,
avctx->get_buffer2 = get_buffer2_hwdec;
if (ctx->hwdec->init(ctx) < 0)
goto error;
+ // This can increase efficiency by not blocking on the hardware
+ // pipeline by reading back immediately after decoding.
+ if (ctx->hwdec->process_image)
+ ctx->max_delay_queue = HWDEC_DELAY_QUEUE_COUNT;
} else {
mp_set_avcodec_threads(vd->log, avctx, lavc_param->threads);
}
@@ -454,6 +458,17 @@ static void reset_avctx(struct dec_video *vd)
ctx->flushing = false;
}
+static void flush_all(struct dec_video *vd)
+{
+ vd_ffmpeg_ctx *ctx = vd->priv;
+
+ for (int n = 0; n < ctx->num_delay_queue; n++)
+ talloc_free(ctx->delay_queue[n]);
+ ctx->num_delay_queue = 0;
+
+ reset_avctx(vd);
+}
+
static void uninit_avctx(struct dec_video *vd)
{
vd_ffmpeg_ctx *ctx = vd->priv;
@@ -474,9 +489,11 @@ static void uninit_avctx(struct dec_video *vd)
av_frame_free(&ctx->pic);
- ctx->flushing = false;
+ flush_all(vd);
+
ctx->hwdec_failed = false;
ctx->hwdec_fail_count = 0;
+ ctx->max_delay_queue = 0;
}
static void update_image_params(struct dec_video *vd, AVFrame *frame,
@@ -618,6 +635,22 @@ static int get_buffer2_hwdec(AVCodecContext *avctx, AVFrame *pic, int flags)
return 0;
}
+static struct mp_image *read_output(struct dec_video *vd)
+{
+ vd_ffmpeg_ctx *ctx = vd->priv;
+
+ if (!ctx->num_delay_queue)
+ return NULL;
+
+ struct mp_image *res = ctx->delay_queue[0];
+ MP_TARRAY_REMOVE_AT(ctx->delay_queue, ctx->num_delay_queue, 0);
+
+ if (ctx->hwdec && ctx->hwdec->process_image)
+ res = ctx->hwdec->process_image(ctx, res);
+
+ return mp_img_swap_to_native(res);
+}
+
static void decode(struct dec_video *vd, struct demux_packet *packet,
int flags, struct mp_image **out_image)
{
@@ -671,8 +704,11 @@ static void decode(struct dec_video *vd, struct demux_packet *packet,
}
// Skipped frame, or delayed output due to multithreaded decoding.
- if (!got_picture)
+ if (!got_picture) {
+ if (!packet)
+ *out_image = read_output(vd);
return;
+ }
ctx->hwdec_fail_count = 0;
@@ -701,10 +737,9 @@ static void decode(struct dec_video *vd, struct demux_packet *packet,
av_frame_unref(ctx->pic);
- if (ctx->hwdec && ctx->hwdec->process_image)
- mpi = ctx->hwdec->process_image(ctx, mpi);
-
- *out_image = mp_img_swap_to_native(mpi);
+ MP_TARRAY_APPEND(ctx, ctx->delay_queue, ctx->num_delay_queue, mpi);
+ if (ctx->num_delay_queue > ctx->max_delay_queue)
+ *out_image = read_output(vd);
}
static struct mp_image *decode_with_fallback(struct dec_video *vd,
@@ -740,7 +775,7 @@ static int control(struct dec_video *vd, int cmd, void *arg)
vd_ffmpeg_ctx *ctx = vd->priv;
switch (cmd) {
case VDCTRL_RESET:
- reset_avctx(vd);
+ flush_all(vd);
return CONTROL_TRUE;
case VDCTRL_QUERY_UNSEEN_FRAMES: {
AVCodecContext *avctx = ctx->avctx;