diff options
author | aurel <aurel@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2005-08-05 13:33:50 +0000 |
---|---|---|
committer | aurel <aurel@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2005-08-05 13:33:50 +0000 |
commit | 236d514567d5681fe2f7df110d7b1f46a7be701f (patch) | |
tree | 86f2a2412d99fd7d9d2ab06eb7529103b4350c47 /liba52/resample_mmx.c | |
parent | 4bf7522a2ed124855eb7756b9c8f72a4cb683be4 (diff) | |
download | mpv-236d514567d5681fe2f7df110d7b1f46a7be701f.tar.bz2 mpv-236d514567d5681fe2f7df110d7b1f46a7be701f.tar.xz |
liba52 asm optimizations ported to amd64
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@16174 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'liba52/resample_mmx.c')
-rw-r--r-- | liba52/resample_mmx.c | 320 |
1 files changed, 161 insertions, 159 deletions
diff --git a/liba52/resample_mmx.c b/liba52/resample_mmx.c index 6f45d88ea7..799b2e3683 100644 --- a/liba52/resample_mmx.c +++ b/liba52/resample_mmx.c @@ -7,6 +7,9 @@ and it would mean (C / MMX2 / MMX / 3DNOW) versions */ +#include "a52_internal.h" + + static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; @@ -15,36 +18,36 @@ static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF000 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-512, %%esi \n\t" + "mov $-512, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(wm1100)", %%mm3 \n\t" "movq "MANGLE(wm0101)", %%mm4 \n\t" "movq "MANGLE(wm1010)", %%mm5 \n\t" "pxor %%mm6, %%mm6 \n\t" "1: \n\t" - "movq (%1, %%esi, 2), %%mm0 \n\t" - "movq 8(%1, %%esi, 2), %%mm1 \n\t" - "leal (%%esi, %%esi, 4), %%edi \n\t" + "movq (%1, %%"REG_S", 2), %%mm0 \n\t" + "movq 8(%1, %%"REG_S", 2), %%mm1\n\t" + "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "packssdw %%mm1, %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "pand %%mm4, %%mm0 \n\t" "pand %%mm5, %%mm1 \n\t" - "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 - "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 + "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0 + "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0 "pand %%mm3, %%mm0 \n\t" - "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 - "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B + "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0 + "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B "pand %%mm3, %%mm1 \n\t" - "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 - "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 - "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B - "addl $8, %%esi \n\t" + "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0 + "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0 + "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B + "add $8, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1280), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 5*256; } @@ -54,29 +57,29 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it #ifdef HAVE_SSE asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "1: \n\t" - "cvtps2pi (%1, %%esi), %%mm0 \n\t" - "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" + "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t" + "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t" "movq %%mm0, %%mm1 \n\t" "punpcklwd %%mm2, %%mm0 \n\t" "punpckhwd %%mm2, %%mm1 \n\t" - "movq %%mm0, (%0, %%esi) \n\t" - "movq %%mm1, 8(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm1, 8(%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+512), "r" (f+256) - :"%esi", "memory" + :"%"REG_S, "memory" );*/ asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "1: \n\t" - "movq (%1, %%esi), %%mm0 \n\t" - "movq 8(%1, %%esi), %%mm1 \n\t" - "movq 1024(%1, %%esi), %%mm2 \n\t" - "movq 1032(%1, %%esi), %%mm3 \n\t" + "movq (%1, %%"REG_S"), %%mm0 \n\t" + "movq 8(%1, %%"REG_S"), %%mm1 \n\t" + "movq 1024(%1, %%"REG_S"), %%mm2\n\t" + "movq 1032(%1, %%"REG_S"), %%mm3\n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" @@ -86,13 +89,13 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ "movq %%mm0, %%mm1 \n\t" "punpcklwd %%mm2, %%mm0 \n\t" "punpckhwd %%mm2, %%mm1 \n\t" - "movq %%mm0, (%0, %%esi) \n\t" - "movq %%mm1, 8(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm1, 8(%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+512), "r" (f+256) - :"%esi", "memory" + :"%"REG_S, "memory" ); return 2*256; } @@ -100,23 +103,23 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "pxor %%mm6, %%mm6 \n\t" "movq %%mm7, %%mm5 \n\t" "punpckldq %%mm6, %%mm5 \n\t" "1: \n\t" - "movd (%1, %%esi), %%mm0 \n\t" - "punpckldq 2048(%1, %%esi), %%mm0\n\t" - "movd 1024(%1, %%esi), %%mm1 \n\t" - "punpckldq 4(%1, %%esi), %%mm1 \n\t" - "movd 2052(%1, %%esi), %%mm2 \n\t" + "movd (%1, %%"REG_S"), %%mm0 \n\t" + "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" + "movd 1024(%1, %%"REG_S"), %%mm1\n\t" + "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t" + "movd 2052(%1, %%"REG_S"), %%mm2\n\t" "movq %%mm7, %%mm3 \n\t" - "punpckldq 1028(%1, %%esi), %%mm3\n\t" - "movd 8(%1, %%esi), %%mm4 \n\t" - "punpckldq 2056(%1, %%esi), %%mm4\n\t" - "leal (%%esi, %%esi, 4), %%edi \n\t" - "sarl $1, %%edi \n\t" + "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t" + "movd 8(%1, %%"REG_S"), %%mm4 \n\t" + "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t" + "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" + "sar $1, %%"REG_D" \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm5, %%mm2 \n\t" @@ -125,29 +128,28 @@ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ "packssdw %%mm6, %%mm0 \n\t" "packssdw %%mm2, %%mm1 \n\t" "packssdw %%mm4, %%mm3 \n\t" - "movq %%mm0, (%0, %%edi) \n\t" - "movq %%mm1, 8(%0, %%edi) \n\t" - "movq %%mm3, 16(%0, %%edi) \n\t" - - "movd 1032(%1, %%esi), %%mm1 \n\t" - "punpckldq 12(%1, %%esi), %%mm1\n\t" - "movd 2060(%1, %%esi), %%mm2 \n\t" + "movq %%mm0, (%0, %%"REG_D") \n\t" + "movq %%mm1, 8(%0, %%"REG_D") \n\t" + "movq %%mm3, 16(%0, %%"REG_D") \n\t" + "movd 1032(%1, %%"REG_S"), %%mm1\n\t" + "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" + "movd 2060(%1, %%"REG_S"), %%mm2\n\t" "movq %%mm7, %%mm3 \n\t" - "punpckldq 1036(%1, %%esi), %%mm3\n\t" + "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" "pxor %%mm0, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm5, %%mm2 \n\t" "psubd %%mm7, %%mm3 \n\t" "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" - "movq %%mm0, 24(%0, %%edi) \n\t" - "movq %%mm2, 32(%0, %%edi) \n\t" + "movq %%mm0, 24(%0, %%"REG_D") \n\t" + "movq %%mm2, 32(%0, %%"REG_D") \n\t" - "addl $16, %%esi \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1280), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 5*256; } @@ -155,23 +157,23 @@ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "1: \n\t" - "movq (%1, %%esi), %%mm0 \n\t" - "movq 8(%1, %%esi), %%mm1 \n\t" - "movq 1024(%1, %%esi), %%mm2 \n\t" - "movq 1032(%1, %%esi), %%mm3 \n\t" + "movq (%1, %%"REG_S"), %%mm0 \n\t" + "movq 8(%1, %%"REG_S"), %%mm1 \n\t" + "movq 1024(%1, %%"REG_S"), %%mm2\n\t" + "movq 1032(%1, %%"REG_S"), %%mm3\n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" "psubd %%mm7, %%mm3 \n\t" "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" - "movq 2048(%1, %%esi), %%mm3 \n\t" - "movq 2056(%1, %%esi), %%mm4 \n\t" - "movq 3072(%1, %%esi), %%mm5 \n\t" - "movq 3080(%1, %%esi), %%mm6 \n\t" + "movq 2048(%1, %%"REG_S"), %%mm3\n\t" + "movq 2056(%1, %%"REG_S"), %%mm4\n\t" + "movq 3072(%1, %%"REG_S"), %%mm5\n\t" + "movq 3080(%1, %%"REG_S"), %%mm6\n\t" "psubd %%mm7, %%mm3 \n\t" "psubd %%mm7, %%mm4 \n\t" "psubd %%mm7, %%mm5 \n\t" @@ -190,15 +192,15 @@ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ "punpckhdq %%mm3, %%mm2 \n\t" "punpckldq %%mm4, %%mm1 \n\t" "punpckhdq %%mm4, %%mm5 \n\t" - "movq %%mm0, (%0, %%esi,2) \n\t" - "movq %%mm2, 8(%0, %%esi,2) \n\t" - "movq %%mm1, 16(%0, %%esi,2) \n\t" - "movq %%mm5, 24(%0, %%esi,2) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_S",2) \n\t" + "movq %%mm2, 8(%0, %%"REG_S",2) \n\t" + "movq %%mm1, 16(%0, %%"REG_S",2)\n\t" + "movq %%mm5, 24(%0, %%"REG_S",2)\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1024), "r" (f+256) - :"%esi", "memory" + :"%"REG_S, "memory" ); return 4*256; } @@ -206,23 +208,23 @@ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "1: \n\t" - "movd (%1, %%esi), %%mm0 \n\t" - "punpckldq 2048(%1, %%esi), %%mm0\n\t" - "movd 3072(%1, %%esi), %%mm1 \n\t" - "punpckldq 4096(%1, %%esi), %%mm1\n\t" - "movd 1024(%1, %%esi), %%mm2 \n\t" - "punpckldq 4(%1, %%esi), %%mm2 \n\t" - "movd 2052(%1, %%esi), %%mm3 \n\t" - "punpckldq 3076(%1, %%esi), %%mm3\n\t" - "movd 4100(%1, %%esi), %%mm4 \n\t" - "punpckldq 1028(%1, %%esi), %%mm4\n\t" - "movd 8(%1, %%esi), %%mm5 \n\t" - "punpckldq 2056(%1, %%esi), %%mm5\n\t" - "leal (%%esi, %%esi, 4), %%edi \n\t" - "sarl $1, %%edi \n\t" + "movd (%1, %%"REG_S"), %%mm0 \n\t" + "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" + "movd 3072(%1, %%"REG_S"), %%mm1\n\t" + "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t" + "movd 1024(%1, %%"REG_S"), %%mm2\n\t" + "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t" + "movd 2052(%1, %%"REG_S"), %%mm3\n\t" + "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t" + "movd 4100(%1, %%"REG_S"), %%mm4\n\t" + "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t" + "movd 8(%1, %%"REG_S"), %%mm5 \n\t" + "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t" + "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" + "sar $1, %%"REG_D" \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" @@ -232,32 +234,32 @@ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" "packssdw %%mm5, %%mm4 \n\t" - "movq %%mm0, (%0, %%edi) \n\t" - "movq %%mm2, 8(%0, %%edi) \n\t" - "movq %%mm4, 16(%0, %%edi) \n\t" + "movq %%mm0, (%0, %%"REG_D") \n\t" + "movq %%mm2, 8(%0, %%"REG_D") \n\t" + "movq %%mm4, 16(%0, %%"REG_D") \n\t" - "movd 3080(%1, %%esi), %%mm0 \n\t" - "punpckldq 4104(%1, %%esi), %%mm0\n\t" - "movd 1032(%1, %%esi), %%mm1 \n\t" - "punpckldq 12(%1, %%esi), %%mm1\n\t" - "movd 2060(%1, %%esi), %%mm2 \n\t" - "punpckldq 3084(%1, %%esi), %%mm2\n\t" - "movd 4108(%1, %%esi), %%mm3 \n\t" - "punpckldq 1036(%1, %%esi), %%mm3\n\t" + "movd 3080(%1, %%"REG_S"), %%mm0\n\t" + "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t" + "movd 1032(%1, %%"REG_S"), %%mm1\n\t" + "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" + "movd 2060(%1, %%"REG_S"), %%mm2\n\t" + "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t" + "movd 4108(%1, %%"REG_S"), %%mm3\n\t" + "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" "psubd %%mm7, %%mm3 \n\t" "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" - "movq %%mm0, 24(%0, %%edi) \n\t" - "movq %%mm2, 32(%0, %%edi) \n\t" + "movq %%mm0, 24(%0, %%"REG_D") \n\t" + "movq %%mm2, 32(%0, %%"REG_D") \n\t" - "addl $16, %%esi \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1280), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 5*256; } @@ -265,14 +267,14 @@ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "pxor %%mm6, %%mm6 \n\t" "1: \n\t" - "movq 1024(%1, %%esi), %%mm0 \n\t" - "movq 1032(%1, %%esi), %%mm1 \n\t" - "movq (%1, %%esi), %%mm2 \n\t" - "movq 8(%1, %%esi), %%mm3 \n\t" + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 1032(%1, %%"REG_S"), %%mm1\n\t" + "movq (%1, %%"REG_S"), %%mm2 \n\t" + "movq 8(%1, %%"REG_S"), %%mm3 \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" @@ -282,22 +284,22 @@ static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ "movq %%mm0, %%mm1 \n\t" "punpcklwd %%mm2, %%mm0 \n\t" "punpckhwd %%mm2, %%mm1 \n\t" - "leal (%%esi, %%esi, 2), %%edi \n\t" - "movq %%mm6, (%0, %%edi) \n\t" - "movd %%mm0, 8(%0, %%edi) \n\t" + "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" + "movq %%mm6, (%0, %%"REG_D") \n\t" + "movd %%mm0, 8(%0, %%"REG_D") \n\t" "punpckhdq %%mm0, %%mm0 \n\t" - "movq %%mm6, 12(%0, %%edi) \n\t" - "movd %%mm0, 20(%0, %%edi) \n\t" - "movq %%mm6, 24(%0, %%edi) \n\t" - "movd %%mm1, 32(%0, %%edi) \n\t" + "movq %%mm6, 12(%0, %%"REG_D") \n\t" + "movd %%mm0, 20(%0, %%"REG_D") \n\t" + "movq %%mm6, 24(%0, %%"REG_D") \n\t" + "movd %%mm1, 32(%0, %%"REG_D") \n\t" "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm6, 36(%0, %%edi) \n\t" - "movd %%mm1, 44(%0, %%edi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm6, 36(%0, %%"REG_D") \n\t" + "movd %%mm1, 44(%0, %%"REG_D") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1536), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 6*256; } @@ -305,17 +307,17 @@ static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "pxor %%mm6, %%mm6 \n\t" "1: \n\t" - "movq 1024(%1, %%esi), %%mm0 \n\t" - "movq 2048(%1, %%esi), %%mm1 \n\t" - "movq (%1, %%esi), %%mm5 \n\t" + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 2048(%1, %%"REG_S"), %%mm1\n\t" + "movq (%1, %%"REG_S"), %%mm5 \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm5 \n\t" - "leal (%%esi, %%esi, 2), %%edi \n\t" + "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" "pxor %%mm4, %%mm4 \n\t" "packssdw %%mm5, %%mm0 \n\t" // FfAa @@ -327,15 +329,15 @@ static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ "punpckldq %%mm6, %%mm0 \n\t" // 00ba "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 - "movq %%mm0, (%0, %%edi) \n\t" // 00ba + "movq %%mm0, (%0, %%"REG_D") \n\t" // 00ba "punpckhdq %%mm4, %%mm0 \n\t" // F000 - "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0 - "movq %%mm0, 16(%0, %%edi) \n\t" // F000 - "addl $8, %%esi \n\t" + "movq %%mm3, 8(%0, %%"REG_D") \n\t" // BAf0 + "movq %%mm0, 16(%0, %%"REG_D") \n\t" // F000 + "add $8, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1536), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 6*256; } @@ -343,19 +345,19 @@ static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "pxor %%mm6, %%mm6 \n\t" "1: \n\t" - "movq 1024(%1, %%esi), %%mm0 \n\t" - "movq 3072(%1, %%esi), %%mm1 \n\t" - "movq 2048(%1, %%esi), %%mm4 \n\t" - "movq (%1, %%esi), %%mm5 \n\t" + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 3072(%1, %%"REG_S"), %%mm1\n\t" + "movq 2048(%1, %%"REG_S"), %%mm4\n\t" + "movq (%1, %%"REG_S"), %%mm5 \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm4 \n\t" "psubd %%mm7, %%mm5 \n\t" - "leal (%%esi, %%esi, 2), %%edi \n\t" + "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" "packssdw %%mm4, %%mm0 \n\t" // EeAa "packssdw %%mm5, %%mm1 \n\t" // FfBb @@ -366,16 +368,16 @@ static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ "punpckldq %%mm6, %%mm0 \n\t" // 00ba "punpckhdq %%mm1, %%mm1 \n\t" // BABA - "movq %%mm0, (%0, %%edi) \n\t" + "movq %%mm0, (%0, %%"REG_D") \n\t" "punpckhdq %%mm2, %%mm0 \n\t" // FE00 "punpckldq %%mm1, %%mm2 \n\t" // BAfe - "movq %%mm2, 8(%0, %%edi) \n\t" - "movq %%mm0, 16(%0, %%edi) \n\t" - "addl $8, %%esi \n\t" + "movq %%mm2, 8(%0, %%"REG_D") \n\t" + "movq %%mm0, 16(%0, %%"REG_D") \n\t" + "add $8, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1536), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 6*256; } @@ -383,21 +385,21 @@ static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" // "pxor %%mm6, %%mm6 \n\t" "1: \n\t" - "movq 1024(%1, %%esi), %%mm0 \n\t" - "movq 2048(%1, %%esi), %%mm1 \n\t" - "movq 3072(%1, %%esi), %%mm2 \n\t" - "movq 4096(%1, %%esi), %%mm3 \n\t" - "movq (%1, %%esi), %%mm5 \n\t" + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 2048(%1, %%"REG_S"), %%mm1\n\t" + "movq 3072(%1, %%"REG_S"), %%mm2\n\t" + "movq 4096(%1, %%"REG_S"), %%mm3\n\t" + "movq (%1, %%"REG_S"), %%mm5 \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" "psubd %%mm7, %%mm3 \n\t" "psubd %%mm7, %%mm5 \n\t" - "leal (%%esi, %%esi, 2), %%edi \n\t" + "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" "packssdw %%mm2, %%mm0 \n\t" // CcAa "packssdw %%mm3, %%mm1 \n\t" // DdBb @@ -414,14 +416,14 @@ static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ "punpckldq %%mm1, %%mm4 \n\t" // BAf0 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC - "movq %%mm0, (%0, %%edi) \n\t" - "movq %%mm4, 8(%0, %%edi) \n\t" - "movq %%mm2, 16(%0, %%edi) \n\t" - "addl $8, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_D") \n\t" + "movq %%mm4, 8(%0, %%"REG_D") \n\t" + "movq %%mm2, 16(%0, %%"REG_D") \n\t" + "add $8, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1536), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 6*256; } @@ -429,23 +431,23 @@ static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" // "pxor %%mm6, %%mm6 \n\t" "1: \n\t" - "movq 1024(%1, %%esi), %%mm0 \n\t" - "movq 3072(%1, %%esi), %%mm1 \n\t" - "movq 4096(%1, %%esi), %%mm2 \n\t" - "movq 5120(%1, %%esi), %%mm3 \n\t" - "movq 2048(%1, %%esi), %%mm4 \n\t" - "movq (%1, %%esi), %%mm5 \n\t" + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 3072(%1, %%"REG_S"), %%mm1\n\t" + "movq 4096(%1, %%"REG_S"), %%mm2\n\t" + "movq 5120(%1, %%"REG_S"), %%mm3\n\t" + "movq 2048(%1, %%"REG_S"), %%mm4\n\t" + "movq (%1, %%"REG_S"), %%mm5 \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" "psubd %%mm7, %%mm3 \n\t" "psubd %%mm7, %%mm4 \n\t" "psubd %%mm7, %%mm5 \n\t" - "leal (%%esi, %%esi, 2), %%edi \n\t" + "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" "packssdw %%mm2, %%mm0 \n\t" // CcAa "packssdw %%mm3, %%mm1 \n\t" // DdBb @@ -462,14 +464,14 @@ static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ "punpckldq %%mm1, %%mm4 \n\t" // BAfe "punpckhdq %%mm3, %%mm2 \n\t" // FEDC - "movq %%mm0, (%0, %%edi) \n\t" - "movq %%mm4, 8(%0, %%edi) \n\t" - "movq %%mm2, 16(%0, %%edi) \n\t" - "addl $8, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_D") \n\t" + "movq %%mm4, 8(%0, %%"REG_D") \n\t" + "movq %%mm2, 16(%0, %%"REG_D") \n\t" + "add $8, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1536), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 6*256; } |