summaryrefslogtreecommitdiffstats
path: root/libvo
diff options
context:
space:
mode:
authoraurel <aurel@b3059339-0415-0410-9bf9-f77b7e298cf2>2004-10-21 11:55:20 +0000
committeraurel <aurel@b3059339-0415-0410-9bf9-f77b7e298cf2>2004-10-21 11:55:20 +0000
commit798d2d133780c000166f01fd2d7fbde395196be3 (patch)
tree1346451ddb9911c63cd778af21261a6ae6f70346 /libvo
parent159928a38a3fc65427a1336b2c73aa3f78a82aa7 (diff)
downloadmpv-798d2d133780c000166f01fd2d7fbde395196be3.tar.bz2
mpv-798d2d133780c000166f01fd2d7fbde395196be3.tar.xz
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@13721 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libvo')
-rw-r--r--libvo/aclib.c7
-rw-r--r--libvo/aclib_template.c70
-rw-r--r--libvo/osd.c24
-rw-r--r--libvo/osd_template.c6
4 files changed, 49 insertions, 58 deletions
diff --git a/libvo/aclib.c b/libvo/aclib.c
index e2a19223f8..a3330eafef 100644
--- a/libvo/aclib.c
+++ b/libvo/aclib.c
@@ -17,7 +17,7 @@
//Feel free to fine-tune the above 2, it might be possible to get some speedup with them :)
//#define STATISTICS
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#define CAN_COMPILE_X86_ASM
#endif
@@ -50,7 +50,6 @@
#undef HAVE_3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
-#undef ARCH_X86
/*
#ifdef COMPILE_C
#undef HAVE_MMX
@@ -69,7 +68,6 @@
#undef HAVE_3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
-#define ARCH_X86
#define RENAME(a) a ## _MMX
#include "aclib_template.c"
#endif
@@ -82,7 +80,6 @@
#undef HAVE_3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
-#define ARCH_X86
#define RENAME(a) a ## _MMX2
#include "aclib_template.c"
#endif
@@ -95,7 +92,6 @@
#define HAVE_3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
-#define ARCH_X86
#define RENAME(a) a ## _3DNow
#include "aclib_template.c"
#endif
@@ -108,7 +104,6 @@
#undef HAVE_3DNOW
#define HAVE_SSE
#define HAVE_SSE2
-#define ARCH_X86
#define RENAME(a) a ## _SSE
#include "aclib_template.c"
#endif
diff --git a/libvo/aclib_template.c b/libvo/aclib_template.c
index 54b420eecf..0b50f7ecf3 100644
--- a/libvo/aclib_template.c
+++ b/libvo/aclib_template.c
@@ -257,62 +257,62 @@ static void * RENAME(fast_memcpy)(void * to, const void * from, size_t len)
// Pure Assembly cuz gcc is a bit unpredictable ;)
if(i>=BLOCK_SIZE/64)
asm volatile(
- "xorl %%eax, %%eax \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
- "movl (%0, %%eax), %%ebx \n\t"
- "movl 32(%0, %%eax), %%ebx \n\t"
- "movl 64(%0, %%eax), %%ebx \n\t"
- "movl 96(%0, %%eax), %%ebx \n\t"
- "addl $128, %%eax \n\t"
- "cmpl %3, %%eax \n\t"
+ "movl (%0, %%"REG_a"), %%ebx \n\t"
+ "movl 32(%0, %%"REG_a"), %%ebx \n\t"
+ "movl 64(%0, %%"REG_a"), %%ebx \n\t"
+ "movl 96(%0, %%"REG_a"), %%ebx \n\t"
+ "add $128, %%"REG_a" \n\t"
+ "cmp %3, %%"REG_a" \n\t"
" jb 1b \n\t"
- "xorl %%eax, %%eax \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"2: \n\t"
- "movq (%0, %%eax), %%mm0\n"
- "movq 8(%0, %%eax), %%mm1\n"
- "movq 16(%0, %%eax), %%mm2\n"
- "movq 24(%0, %%eax), %%mm3\n"
- "movq 32(%0, %%eax), %%mm4\n"
- "movq 40(%0, %%eax), %%mm5\n"
- "movq 48(%0, %%eax), %%mm6\n"
- "movq 56(%0, %%eax), %%mm7\n"
- MOVNTQ" %%mm0, (%1, %%eax)\n"
- MOVNTQ" %%mm1, 8(%1, %%eax)\n"
- MOVNTQ" %%mm2, 16(%1, %%eax)\n"
- MOVNTQ" %%mm3, 24(%1, %%eax)\n"
- MOVNTQ" %%mm4, 32(%1, %%eax)\n"
- MOVNTQ" %%mm5, 40(%1, %%eax)\n"
- MOVNTQ" %%mm6, 48(%1, %%eax)\n"
- MOVNTQ" %%mm7, 56(%1, %%eax)\n"
- "addl $64, %%eax \n\t"
- "cmpl %3, %%eax \n\t"
+ "movq (%0, %%"REG_a"), %%mm0\n"
+ "movq 8(%0, %%"REG_a"), %%mm1\n"
+ "movq 16(%0, %%"REG_a"), %%mm2\n"
+ "movq 24(%0, %%"REG_a"), %%mm3\n"
+ "movq 32(%0, %%"REG_a"), %%mm4\n"
+ "movq 40(%0, %%"REG_a"), %%mm5\n"
+ "movq 48(%0, %%"REG_a"), %%mm6\n"
+ "movq 56(%0, %%"REG_a"), %%mm7\n"
+ MOVNTQ" %%mm0, (%1, %%"REG_a")\n"
+ MOVNTQ" %%mm1, 8(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm2, 16(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm3, 24(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm4, 32(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm5, 40(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm6, 48(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm7, 56(%1, %%"REG_a")\n"
+ "add $64, %%"REG_a" \n\t"
+ "cmp %3, %%"REG_a" \n\t"
"jb 2b \n\t"
#if CONFUSION_FACTOR > 0
// a few percent speedup on out of order executing CPUs
- "movl %5, %%eax \n\t"
+ "mov %5, %%"REG_a" \n\t"
"2: \n\t"
"movl (%0), %%ebx \n\t"
"movl (%0), %%ebx \n\t"
"movl (%0), %%ebx \n\t"
"movl (%0), %%ebx \n\t"
- "decl %%eax \n\t"
+ "dec %%"REG_a" \n\t"
" jnz 2b \n\t"
#endif
- "xorl %%eax, %%eax \n\t"
- "addl %3, %0 \n\t"
- "addl %3, %1 \n\t"
- "subl %4, %2 \n\t"
- "cmpl %4, %2 \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
+ "add %3, %0 \n\t"
+ "add %3, %1 \n\t"
+ "sub %4, %2 \n\t"
+ "cmp %4, %2 \n\t"
" jae 1b \n\t"
: "+r" (from), "+r" (to), "+r" (i)
- : "r" (BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" (CONFUSION_FACTOR)
- : "%eax", "%ebx"
+ : "r" ((long)BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" ((long)CONFUSION_FACTOR)
+ : "%"REG_a, "%ebx"
);
for(; i>0; i--)
diff --git a/libvo/osd.c b/libvo/osd.c
index 742174e7c8..3c616dc0de 100644
--- a/libvo/osd.c
+++ b/libvo/osd.c
@@ -14,7 +14,7 @@
extern int verbose; // defined in mplayer.c
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#define CAN_COMPILE_X86_ASM
#endif
@@ -48,18 +48,18 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
-#undef ARCH_X86
+
+#ifndef CAN_COMPILE_X86_ASM
#ifdef COMPILE_C
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
-#undef ARCH_X86
#define RENAME(a) a ## _C
#include "osd_template.c"
#endif
-#ifdef CAN_COMPILE_X86_ASM
+#else
//X86 noMMX versions
#ifdef COMPILE_C
@@ -67,7 +67,6 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
-#define ARCH_X86
#define RENAME(a) a ## _X86
#include "osd_template.c"
#endif
@@ -78,7 +77,6 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
#define HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
-#define ARCH_X86
#define RENAME(a) a ## _MMX
#include "osd_template.c"
#endif
@@ -89,7 +87,6 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
#define HAVE_MMX
#define HAVE_MMX2
#undef HAVE_3DNOW
-#define ARCH_X86
#define RENAME(a) a ## _MMX2
#include "osd_template.c"
#endif
@@ -100,7 +97,6 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
#define HAVE_MMX
#undef HAVE_MMX2
#define HAVE_3DNOW
-#define ARCH_X86
#define RENAME(a) a ## _3DNow
#include "osd_template.c"
#endif
@@ -129,7 +125,7 @@ void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, in
vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif defined (HAVE_MMX)
vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -159,7 +155,7 @@ void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, in
vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif defined (HAVE_MMX)
vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -189,7 +185,7 @@ void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, in
vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif defined (HAVE_MMX)
vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -219,7 +215,7 @@ void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, i
vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif defined (HAVE_MMX)
vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -249,7 +245,7 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i
vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
#elif defined (HAVE_MMX)
vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
#else
vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
@@ -294,7 +290,7 @@ void vo_draw_alpha_init(){
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
#elif defined (HAVE_MMX)
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
-#elif defined (ARCH_X86)
+#elif defined(ARCH_X86) || defined(ARCH_X86_64)
mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
#else
mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
diff --git a/libvo/osd_template.c b/libvo/osd_template.c
index 5c8c009ec7..e2ada2ccaf 100644
--- a/libvo/osd_template.c
+++ b/libvo/osd_template.c
@@ -189,7 +189,7 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src,
for(y=0;y<h;y++){
register unsigned char *dst = dstbase;
register int x;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#ifdef HAVE_MMX
asm volatile(
PREFETCHW" %0\n\t"
@@ -253,7 +253,7 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src,
"addl %2, %%eax\n\t"
"movb %%ah, 2(%0)\n\t"
:
- :"r" (dst),
+ :"D" (dst),
"r" ((unsigned)srca[x]),
"r" (((unsigned)src[x])<<8)
:"%eax", "%ecx"
@@ -293,7 +293,7 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
#endif
for(y=0;y<h;y++){
register int x;
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#ifdef HAVE_MMX
#ifdef HAVE_3DNOW
asm volatile(