From a95ce07273a7ee4b0f8db0d5de236d9708ddcd0d Mon Sep 17 00:00:00 2001 From: James Ross-Gowan Date: Mon, 27 Oct 2014 22:28:20 +1100 Subject: dxva2: use gpu_memcpy with unmatched strides Like memcpy_pic, this checks if the strides match first. --- video/decode/dxva2.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/video/decode/dxva2.c b/video/decode/dxva2.c index 00c33fd43d..0cd5735d2d 100644 --- a/video/decode/dxva2.c +++ b/video/decode/dxva2.c @@ -264,15 +264,36 @@ static void copy_nv12_fallback(struct mp_image *dest, uint8_t *src_bits, static void copy_nv12_gpu_sse4(struct mp_image *dest, uint8_t *src_bits, unsigned src_pitch, unsigned surf_height) { - // Unfortunately the fallback must be used if the stride doesn't match - if (dest->stride[0] != src_pitch) { - copy_nv12_fallback(dest, src_bits, src_pitch, surf_height); - return; - } + const int lines = dest->h; + const int stride_y = dest->stride[0]; + const int stride_uv = dest->stride[1]; + + // If the strides match, the image can be copied in one go + if (stride_y == src_pitch && stride_uv == src_pitch) { + const size_t size = lines * src_pitch; + gpu_memcpy(dest->planes[0], src_bits, size); + gpu_memcpy(dest->planes[1], src_bits + src_pitch * surf_height, size / 2); + + } else { + // Copy the Y plane line-by-line + uint8_t *dest_y = dest->planes[0]; + const uint8_t *src_y = src_bits; + const int bytes_per_line = dest->w; + for (int i = 0; i < lines; i++) { + gpu_memcpy(dest_y, src_y, bytes_per_line); + dest_y += stride_y; + src_y += src_pitch; + } - unsigned size = dest->h * src_pitch; - gpu_memcpy(dest->planes[0], src_bits, size); - gpu_memcpy(dest->planes[1], src_bits + src_pitch * surf_height, size / 2); + // Copy the UV plane line-by-line + uint8_t *dest_uv = dest->planes[1]; + const uint8_t *src_uv = src_bits + src_pitch * surf_height; + for (int i = 0; i < lines / 2; i++) { + gpu_memcpy(dest_uv, src_uv, bytes_per_line); + dest_uv += stride_uv; + src_uv += src_pitch; + } + } } static struct mp_image *dxva2_retrieve_image(struct lavc_ctx *s, -- cgit v1.2.3