diff options
Diffstat (limited to 'video/mp_image.c')
-rw-r--r-- | video/mp_image.c | 82 |
1 files changed, 64 insertions, 18 deletions
diff --git a/video/mp_image.c b/video/mp_image.c index debdbbb201..57650eea0d 100644 --- a/video/mp_image.c +++ b/video/mp_image.c @@ -35,6 +35,7 @@ #include "mp_image.h" #include "sws_utils.h" #include "fmt-conversion.h" +#include "gpu_memcpy.h" #include "video/filter/vf.h" @@ -300,7 +301,30 @@ void mp_image_unrefp(struct mp_image **p_img) *p_img = NULL; } -void mp_image_copy(struct mp_image *dst, struct mp_image *src) +typedef void *(*memcpy_fn)(void *d, const void *s, size_t size); + +static void memcpy_pic_cb(void *dst, const void *src, int bytesPerLine, int height, + int dstStride, int srcStride, memcpy_fn cpy) +{ + if (bytesPerLine == dstStride && dstStride == srcStride && height) { + if (srcStride < 0) { + src = (uint8_t*)src + (height - 1) * srcStride; + dst = (uint8_t*)dst + (height - 1) * dstStride; + srcStride = -srcStride; + } + + cpy(dst, src, srcStride * (height - 1) + bytesPerLine); + } else { + for (int i = 0; i < height; i++) { + cpy(dst, src, bytesPerLine); + src = (uint8_t*)src + srcStride; + dst = (uint8_t*)dst + dstStride; + } + } +} + +static void mp_image_copy_cb(struct mp_image *dst, struct mp_image *src, + memcpy_fn cpy) { assert(dst->imgfmt == src->imgfmt); assert(dst->w == src->w && dst->h == src->h); @@ -308,14 +332,50 @@ void mp_image_copy(struct mp_image *dst, struct mp_image *src) for (int n = 0; n < dst->num_planes; n++) { int line_bytes = (mp_image_plane_w(dst, n) * dst->fmt.bpp[n] + 7) / 8; int plane_h = mp_image_plane_h(dst, n); - memcpy_pic(dst->planes[n], src->planes[n], line_bytes, plane_h, - dst->stride[n], src->stride[n]); + memcpy_pic_cb(dst->planes[n], src->planes[n], line_bytes, plane_h, + dst->stride[n], src->stride[n], cpy); } // Watch out for AV_PIX_FMT_FLAG_PSEUDOPAL retardation if ((dst->fmt.flags & MP_IMGFLAG_PAL) && dst->planes[1] && src->planes[1]) memcpy(dst->planes[1], src->planes[1], MP_PALETTE_SIZE); } +void mp_image_copy(struct mp_image *dst, struct mp_image *src) +{ + mp_image_copy_cb(dst, src, memcpy); +} + +void mp_image_copy_gpu(struct mp_image *dst, struct mp_image *src) +{ +#if HAVE_SSE4_INTRINSICS + if (av_get_cpu_flags() & AV_CPU_FLAG_SSE4) { + mp_image_copy_cb(dst, src, gpu_memcpy); + return; + } +#endif + mp_image_copy(dst, src); +} + +// Helper, only for outputting some log info. +void mp_check_gpu_memcpy(struct mp_log *log, bool *once) +{ + if (once) { + if (*once) + return; + *once = true; + } + + bool have_sse = false; +#if HAVE_SSE4_INTRINSICS + have_sse = av_get_cpu_flags() & AV_CPU_FLAG_SSE4; +#endif + if (have_sse) { + mp_verbose(log, "Using SSE4 memcpy\n"); + } else { + mp_warn(log, "Using fallback memcpy (slow)\n"); + } +} + void mp_image_copy_attributes(struct mp_image *dst, struct mp_image *src) { dst->pict_type = src->pict_type; @@ -675,21 +735,7 @@ struct AVFrame *mp_image_to_av_frame_and_unref(struct mp_image *img) void memcpy_pic(void *dst, const void *src, int bytesPerLine, int height, int dstStride, int srcStride) { - if (bytesPerLine == dstStride && dstStride == srcStride && height) { - if (srcStride < 0) { - src = (uint8_t*)src + (height - 1) * srcStride; - dst = (uint8_t*)dst + (height - 1) * dstStride; - srcStride = -srcStride; - } - - memcpy(dst, src, srcStride * (height - 1) + bytesPerLine); - } else { - for (int i = 0; i < height; i++) { - memcpy(dst, src, bytesPerLine); - src = (uint8_t*)src + srcStride; - dst = (uint8_t*)dst + dstStride; - } - } + memcpy_pic_cb(dst, src, bytesPerLine, height, dstStride, srcStride, memcpy); } void memset_pic(void *dst, int fill, int bytesPerLine, int height, int stride) |