summaryrefslogtreecommitdiffstats
path: root/libvo/aclib_template.c
diff options
context:
space:
mode:
authoraurel <aurel@b3059339-0415-0410-9bf9-f77b7e298cf2>2004-10-21 11:55:20 +0000
committeraurel <aurel@b3059339-0415-0410-9bf9-f77b7e298cf2>2004-10-21 11:55:20 +0000
commit798d2d133780c000166f01fd2d7fbde395196be3 (patch)
tree1346451ddb9911c63cd778af21261a6ae6f70346 /libvo/aclib_template.c
parent159928a38a3fc65427a1336b2c73aa3f78a82aa7 (diff)
downloadmpv-798d2d133780c000166f01fd2d7fbde395196be3.tar.bz2
mpv-798d2d133780c000166f01fd2d7fbde395196be3.tar.xz
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@13721 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libvo/aclib_template.c')
-rw-r--r--libvo/aclib_template.c70
1 files changed, 35 insertions, 35 deletions
diff --git a/libvo/aclib_template.c b/libvo/aclib_template.c
index 54b420eecf..0b50f7ecf3 100644
--- a/libvo/aclib_template.c
+++ b/libvo/aclib_template.c
@@ -257,62 +257,62 @@ static void * RENAME(fast_memcpy)(void * to, const void * from, size_t len)
// Pure Assembly cuz gcc is a bit unpredictable ;)
if(i>=BLOCK_SIZE/64)
asm volatile(
- "xorl %%eax, %%eax \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
- "movl (%0, %%eax), %%ebx \n\t"
- "movl 32(%0, %%eax), %%ebx \n\t"
- "movl 64(%0, %%eax), %%ebx \n\t"
- "movl 96(%0, %%eax), %%ebx \n\t"
- "addl $128, %%eax \n\t"
- "cmpl %3, %%eax \n\t"
+ "movl (%0, %%"REG_a"), %%ebx \n\t"
+ "movl 32(%0, %%"REG_a"), %%ebx \n\t"
+ "movl 64(%0, %%"REG_a"), %%ebx \n\t"
+ "movl 96(%0, %%"REG_a"), %%ebx \n\t"
+ "add $128, %%"REG_a" \n\t"
+ "cmp %3, %%"REG_a" \n\t"
" jb 1b \n\t"
- "xorl %%eax, %%eax \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"2: \n\t"
- "movq (%0, %%eax), %%mm0\n"
- "movq 8(%0, %%eax), %%mm1\n"
- "movq 16(%0, %%eax), %%mm2\n"
- "movq 24(%0, %%eax), %%mm3\n"
- "movq 32(%0, %%eax), %%mm4\n"
- "movq 40(%0, %%eax), %%mm5\n"
- "movq 48(%0, %%eax), %%mm6\n"
- "movq 56(%0, %%eax), %%mm7\n"
- MOVNTQ" %%mm0, (%1, %%eax)\n"
- MOVNTQ" %%mm1, 8(%1, %%eax)\n"
- MOVNTQ" %%mm2, 16(%1, %%eax)\n"
- MOVNTQ" %%mm3, 24(%1, %%eax)\n"
- MOVNTQ" %%mm4, 32(%1, %%eax)\n"
- MOVNTQ" %%mm5, 40(%1, %%eax)\n"
- MOVNTQ" %%mm6, 48(%1, %%eax)\n"
- MOVNTQ" %%mm7, 56(%1, %%eax)\n"
- "addl $64, %%eax \n\t"
- "cmpl %3, %%eax \n\t"
+ "movq (%0, %%"REG_a"), %%mm0\n"
+ "movq 8(%0, %%"REG_a"), %%mm1\n"
+ "movq 16(%0, %%"REG_a"), %%mm2\n"
+ "movq 24(%0, %%"REG_a"), %%mm3\n"
+ "movq 32(%0, %%"REG_a"), %%mm4\n"
+ "movq 40(%0, %%"REG_a"), %%mm5\n"
+ "movq 48(%0, %%"REG_a"), %%mm6\n"
+ "movq 56(%0, %%"REG_a"), %%mm7\n"
+ MOVNTQ" %%mm0, (%1, %%"REG_a")\n"
+ MOVNTQ" %%mm1, 8(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm2, 16(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm3, 24(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm4, 32(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm5, 40(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm6, 48(%1, %%"REG_a")\n"
+ MOVNTQ" %%mm7, 56(%1, %%"REG_a")\n"
+ "add $64, %%"REG_a" \n\t"
+ "cmp %3, %%"REG_a" \n\t"
"jb 2b \n\t"
#if CONFUSION_FACTOR > 0
// a few percent speedup on out of order executing CPUs
- "movl %5, %%eax \n\t"
+ "mov %5, %%"REG_a" \n\t"
"2: \n\t"
"movl (%0), %%ebx \n\t"
"movl (%0), %%ebx \n\t"
"movl (%0), %%ebx \n\t"
"movl (%0), %%ebx \n\t"
- "decl %%eax \n\t"
+ "dec %%"REG_a" \n\t"
" jnz 2b \n\t"
#endif
- "xorl %%eax, %%eax \n\t"
- "addl %3, %0 \n\t"
- "addl %3, %1 \n\t"
- "subl %4, %2 \n\t"
- "cmpl %4, %2 \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
+ "add %3, %0 \n\t"
+ "add %3, %1 \n\t"
+ "sub %4, %2 \n\t"
+ "cmp %4, %2 \n\t"
" jae 1b \n\t"
: "+r" (from), "+r" (to), "+r" (i)
- : "r" (BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" (CONFUSION_FACTOR)
- : "%eax", "%ebx"
+ : "r" ((long)BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" ((long)CONFUSION_FACTOR)
+ : "%"REG_a, "%ebx"
);
for(; i>0; i--)