summaryrefslogtreecommitdiffstats
path: root/video/mp_image.c
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2015-09-25 18:58:17 +0200
committerwm4 <wm4@nowhere>2015-09-25 19:18:16 +0200
commit0ae8aebb89b5d0b2226a5d3852a9c72cd52da2ff (patch)
tree4f9517ae03b50d21b534a8cfcdcbbdcdd8d326b5 /video/mp_image.c
parent361040f9d912140832192af78808218d601c7465 (diff)
downloadmpv-0ae8aebb89b5d0b2226a5d3852a9c72cd52da2ff.tar.bz2
mpv-0ae8aebb89b5d0b2226a5d3852a9c72cd52da2ff.tar.xz
video: refactor GPU memcpy usage
Make the GPU memcpy from the dxva2 code generally useful to other parts of the player. We need to check at configure time whether SSE intrinsics work at all. (At least in this form, they won't work on clang, for example. It also won't work on non-x86.) Introduce a mp_image_copy_gpu(), and make the dxva2 code use it. Do some awkward stuff to share the existing code used by mp_image_copy(). I'm hoping that FFmpeg will sooner or later provide a function like this, so we can remove most of this again. (There is a patch, bit it's stuck in limbo since forever.) All this is used by the following commit.
Diffstat (limited to 'video/mp_image.c')
-rw-r--r--video/mp_image.c82
1 files changed, 64 insertions, 18 deletions
diff --git a/video/mp_image.c b/video/mp_image.c
index debdbbb201..57650eea0d 100644
--- a/video/mp_image.c
+++ b/video/mp_image.c
@@ -35,6 +35,7 @@
#include "mp_image.h"
#include "sws_utils.h"
#include "fmt-conversion.h"
+#include "gpu_memcpy.h"
#include "video/filter/vf.h"
@@ -300,7 +301,30 @@ void mp_image_unrefp(struct mp_image **p_img)
*p_img = NULL;
}
-void mp_image_copy(struct mp_image *dst, struct mp_image *src)
+typedef void *(*memcpy_fn)(void *d, const void *s, size_t size);
+
+static void memcpy_pic_cb(void *dst, const void *src, int bytesPerLine, int height,
+ int dstStride, int srcStride, memcpy_fn cpy)
+{
+ if (bytesPerLine == dstStride && dstStride == srcStride && height) {
+ if (srcStride < 0) {
+ src = (uint8_t*)src + (height - 1) * srcStride;
+ dst = (uint8_t*)dst + (height - 1) * dstStride;
+ srcStride = -srcStride;
+ }
+
+ cpy(dst, src, srcStride * (height - 1) + bytesPerLine);
+ } else {
+ for (int i = 0; i < height; i++) {
+ cpy(dst, src, bytesPerLine);
+ src = (uint8_t*)src + srcStride;
+ dst = (uint8_t*)dst + dstStride;
+ }
+ }
+}
+
+static void mp_image_copy_cb(struct mp_image *dst, struct mp_image *src,
+ memcpy_fn cpy)
{
assert(dst->imgfmt == src->imgfmt);
assert(dst->w == src->w && dst->h == src->h);
@@ -308,14 +332,50 @@ void mp_image_copy(struct mp_image *dst, struct mp_image *src)
for (int n = 0; n < dst->num_planes; n++) {
int line_bytes = (mp_image_plane_w(dst, n) * dst->fmt.bpp[n] + 7) / 8;
int plane_h = mp_image_plane_h(dst, n);
- memcpy_pic(dst->planes[n], src->planes[n], line_bytes, plane_h,
- dst->stride[n], src->stride[n]);
+ memcpy_pic_cb(dst->planes[n], src->planes[n], line_bytes, plane_h,
+ dst->stride[n], src->stride[n], cpy);
}
// Watch out for AV_PIX_FMT_FLAG_PSEUDOPAL retardation
if ((dst->fmt.flags & MP_IMGFLAG_PAL) && dst->planes[1] && src->planes[1])
memcpy(dst->planes[1], src->planes[1], MP_PALETTE_SIZE);
}
+void mp_image_copy(struct mp_image *dst, struct mp_image *src)
+{
+ mp_image_copy_cb(dst, src, memcpy);
+}
+
+void mp_image_copy_gpu(struct mp_image *dst, struct mp_image *src)
+{
+#if HAVE_SSE4_INTRINSICS
+ if (av_get_cpu_flags() & AV_CPU_FLAG_SSE4) {
+ mp_image_copy_cb(dst, src, gpu_memcpy);
+ return;
+ }
+#endif
+ mp_image_copy(dst, src);
+}
+
+// Helper, only for outputting some log info.
+void mp_check_gpu_memcpy(struct mp_log *log, bool *once)
+{
+ if (once) {
+ if (*once)
+ return;
+ *once = true;
+ }
+
+ bool have_sse = false;
+#if HAVE_SSE4_INTRINSICS
+ have_sse = av_get_cpu_flags() & AV_CPU_FLAG_SSE4;
+#endif
+ if (have_sse) {
+ mp_verbose(log, "Using SSE4 memcpy\n");
+ } else {
+ mp_warn(log, "Using fallback memcpy (slow)\n");
+ }
+}
+
void mp_image_copy_attributes(struct mp_image *dst, struct mp_image *src)
{
dst->pict_type = src->pict_type;
@@ -675,21 +735,7 @@ struct AVFrame *mp_image_to_av_frame_and_unref(struct mp_image *img)
void memcpy_pic(void *dst, const void *src, int bytesPerLine, int height,
int dstStride, int srcStride)
{
- if (bytesPerLine == dstStride && dstStride == srcStride && height) {
- if (srcStride < 0) {
- src = (uint8_t*)src + (height - 1) * srcStride;
- dst = (uint8_t*)dst + (height - 1) * dstStride;
- srcStride = -srcStride;
- }
-
- memcpy(dst, src, srcStride * (height - 1) + bytesPerLine);
- } else {
- for (int i = 0; i < height; i++) {
- memcpy(dst, src, bytesPerLine);
- src = (uint8_t*)src + srcStride;
- dst = (uint8_t*)dst + dstStride;
- }
- }
+ memcpy_pic_cb(dst, src, bytesPerLine, height, dstStride, srcStride, memcpy);
}
void memset_pic(void *dst, int fill, int bytesPerLine, int height, int stride)