From b53cb8de5e8552bfe142df9cea3de6907a187dc7 Mon Sep 17 00:00:00 2001 From: wm4 Date: Mon, 25 Jan 2016 21:00:53 +0100 Subject: vd_lavc: delay images before reading them back Facilitates hardware pipelining in particular with nvidia/dxva. --- video/decode/dxva2.c | 4 +++- video/decode/lavc.h | 6 ++++++ video/decode/vaapi.c | 2 +- video/decode/vd_lavc.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 52 insertions(+), 9 deletions(-) (limited to 'video/decode') diff --git a/video/decode/dxva2.c b/video/decode/dxva2.c index 2dfbae3dd3..0400b74bab 100644 --- a/video/decode/dxva2.c +++ b/video/decode/dxva2.c @@ -39,6 +39,8 @@ #include "video/hwdec.h" #include "video/d3d.h" +#define ADDTIONAL_SURFACES HWDEC_DELAY_QUEUE_COUNT + // A minor evil. #ifndef FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO #define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 @@ -574,7 +576,7 @@ static int dxva2_create_decoder(struct lavc_ctx *s, int w, int h, surface_alignment = 16; /* 4 base work surfaces */ - ctx->num_surfaces = 4; + ctx->num_surfaces = 4 + ADDTIONAL_SURFACES; /* add surfaces based on number of possible refs */ if (codec_id == AV_CODEC_ID_H264 || codec_id == AV_CODEC_ID_HEVC) diff --git a/video/decode/lavc.h b/video/decode/lavc.h index 76b7ac7883..db8b26eefa 100644 --- a/video/decode/lavc.h +++ b/video/decode/lavc.h @@ -9,6 +9,8 @@ #include "video/mp_image.h" #include "video/hwdec.h" +#define HWDEC_DELAY_QUEUE_COUNT 2 + typedef struct lavc_ctx { struct mp_log *log; struct MPOpts *opts; @@ -23,6 +25,10 @@ typedef struct lavc_ctx { bool hwdec_failed; bool hwdec_notified; + struct mp_image **delay_queue; + int num_delay_queue; + int max_delay_queue; + // From VO struct mp_hwdec_info *hwdec_info; diff --git a/video/decode/vaapi.c b/video/decode/vaapi.c index 313a901db2..4b8987cf24 100644 --- a/video/decode/vaapi.c +++ b/video/decode/vaapi.c @@ -44,7 +44,7 @@ * Note that redundant additional surfaces also might allow for some * buffering (i.e. not trying to reuse a surface while it's busy). */ -#define ADDTIONAL_SURFACES 6 +#define ADDTIONAL_SURFACES (6 + HWDEC_DELAY_QUEUE_COUNT) // Some upper bound. #define MAX_SURFACES 25 diff --git a/video/decode/vd_lavc.c b/video/decode/vd_lavc.c index f480d185b6..29e7d1108f 100644 --- a/video/decode/vd_lavc.c +++ b/video/decode/vd_lavc.c @@ -391,6 +391,10 @@ static void init_avctx(struct dec_video *vd, const char *decoder, avctx->get_buffer2 = get_buffer2_hwdec; if (ctx->hwdec->init(ctx) < 0) goto error; + // This can increase efficiency by not blocking on the hardware + // pipeline by reading back immediately after decoding. + if (ctx->hwdec->process_image) + ctx->max_delay_queue = HWDEC_DELAY_QUEUE_COUNT; } else { mp_set_avcodec_threads(vd->log, avctx, lavc_param->threads); } @@ -454,6 +458,17 @@ static void reset_avctx(struct dec_video *vd) ctx->flushing = false; } +static void flush_all(struct dec_video *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + for (int n = 0; n < ctx->num_delay_queue; n++) + talloc_free(ctx->delay_queue[n]); + ctx->num_delay_queue = 0; + + reset_avctx(vd); +} + static void uninit_avctx(struct dec_video *vd) { vd_ffmpeg_ctx *ctx = vd->priv; @@ -474,9 +489,11 @@ static void uninit_avctx(struct dec_video *vd) av_frame_free(&ctx->pic); - ctx->flushing = false; + flush_all(vd); + ctx->hwdec_failed = false; ctx->hwdec_fail_count = 0; + ctx->max_delay_queue = 0; } static void update_image_params(struct dec_video *vd, AVFrame *frame, @@ -618,6 +635,22 @@ static int get_buffer2_hwdec(AVCodecContext *avctx, AVFrame *pic, int flags) return 0; } +static struct mp_image *read_output(struct dec_video *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + if (!ctx->num_delay_queue) + return NULL; + + struct mp_image *res = ctx->delay_queue[0]; + MP_TARRAY_REMOVE_AT(ctx->delay_queue, ctx->num_delay_queue, 0); + + if (ctx->hwdec && ctx->hwdec->process_image) + res = ctx->hwdec->process_image(ctx, res); + + return mp_img_swap_to_native(res); +} + static void decode(struct dec_video *vd, struct demux_packet *packet, int flags, struct mp_image **out_image) { @@ -671,8 +704,11 @@ static void decode(struct dec_video *vd, struct demux_packet *packet, } // Skipped frame, or delayed output due to multithreaded decoding. - if (!got_picture) + if (!got_picture) { + if (!packet) + *out_image = read_output(vd); return; + } ctx->hwdec_fail_count = 0; @@ -701,10 +737,9 @@ static void decode(struct dec_video *vd, struct demux_packet *packet, av_frame_unref(ctx->pic); - if (ctx->hwdec && ctx->hwdec->process_image) - mpi = ctx->hwdec->process_image(ctx, mpi); - - *out_image = mp_img_swap_to_native(mpi); + MP_TARRAY_APPEND(ctx, ctx->delay_queue, ctx->num_delay_queue, mpi); + if (ctx->num_delay_queue > ctx->max_delay_queue) + *out_image = read_output(vd); } static struct mp_image *decode_with_fallback(struct dec_video *vd, @@ -740,7 +775,7 @@ static int control(struct dec_video *vd, int cmd, void *arg) vd_ffmpeg_ctx *ctx = vd->priv; switch (cmd) { case VDCTRL_RESET: - reset_avctx(vd); + flush_all(vd); return CONTROL_TRUE; case VDCTRL_QUERY_UNSEEN_FRAMES: { AVCodecContext *avctx = ctx->avctx; -- cgit v1.2.3