summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Ross-Gowan <rossymiles@gmail.com>2014-10-26 15:17:43 +1100
committerwm4 <wm4@nowhere>2014-10-26 13:56:41 +0100
commit0166f2c7a00fc3e952f8cfa9fb5d914abf42070b (patch)
tree6874e706992575cfd84cbd7fad3ba2c53e08bde9
parent3b34f0078da0a946d68f964b214412de261ebdc2 (diff)
downloadmpv-0166f2c7a00fc3e952f8cfa9fb5d914abf42070b.tar.bz2
mpv-0166f2c7a00fc3e952f8cfa9fb5d914abf42070b.tar.xz
dxva2: gpu_memcpy: fix build for GCC 4.8.3
-rw-r--r--video/decode/gpu_memcpy_sse4.h13
1 files changed, 10 insertions, 3 deletions
diff --git a/video/decode/gpu_memcpy_sse4.h b/video/decode/gpu_memcpy_sse4.h
index c441ff38fe..160209bdc5 100644
--- a/video/decode/gpu_memcpy_sse4.h
+++ b/video/decode/gpu_memcpy_sse4.h
@@ -19,14 +19,18 @@
* Taken from the QuickSync decoder by Eric Gur
*/
-#include <emmintrin.h>
+#ifndef GPU_MEMCPY_SSE4_H_
+#define GPU_MEMCPY_SSE4_H_
+
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#include <smmintrin.h>
// gpu_memcpy is a memcpy style function that copied data very fast from a
// GPU tiled memory (write back)
// Performance tip: page offset (12 lsb) of both addresses should be different
// optimally use a 2K offset between them.
-__attribute__((target("sse4"))) static inline void
-*gpu_memcpy(void *restrict d, const void *restrict s, size_t size)
+static inline void *gpu_memcpy(void *restrict d, const void *restrict s, size_t size)
{
static const size_t regsInLoop = sizeof(size_t) * 2; // 8 or 16
@@ -127,3 +131,6 @@ __attribute__((target("sse4"))) static inline void
return d;
}
+
+#pragma GCC pop_options
+#endif