From 236d514567d5681fe2f7df110d7b1f46a7be701f Mon Sep 17 00:00:00 2001 From: aurel Date: Fri, 5 Aug 2005 13:33:50 +0000 Subject: liba52 asm optimizations ported to amd64 git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@16174 b3059339-0415-0410-9bf9-f77b7e298cf2 --- liba52/a52_internal.h | 14 ++ liba52/downmix.c | 620 +++++++++++++++++++++++++------------------------- liba52/imdct.c | 274 +++++++++++----------- liba52/resample.c | 4 +- liba52/resample_mmx.c | 320 +++++++++++++------------- 5 files changed, 624 insertions(+), 608 deletions(-) (limited to 'liba52') diff --git a/liba52/a52_internal.h b/liba52/a52_internal.h index 428bbd1a2a..91fc54a300 100644 --- a/liba52/a52_internal.h +++ b/liba52/a52_internal.h @@ -41,6 +41,20 @@ #define DELTA_BIT_NONE (2) #define DELTA_BIT_RESERVED (3) +#ifdef ARCH_X86_64 +# define REG_a "rax" +# define REG_d "rdx" +# define REG_S "rsi" +# define REG_D "rdi" +# define REG_BP "rbp" +#else +# define REG_a "eax" +# define REG_d "edx" +# define REG_S "esi" +# define REG_D "edi" +# define REG_BP "ebp" +#endif + void bit_allocate (a52_state_t * state, a52_ba_t * ba, int bndstart, int start, int end, int fastleak, int slowleak, uint8_t * exp, int8_t * bap); diff --git a/liba52/downmix.c b/liba52/downmix.c index 55e2536c99..52955c6335 100644 --- a/liba52/downmix.c +++ b/liba52/downmix.c @@ -56,7 +56,7 @@ void downmix_accel_init(uint32_t mm_accel) { upmix= upmix_C; downmix= downmix_C; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX; if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE; if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow; @@ -684,27 +684,27 @@ static void upmix_C (sample_t * samples, int acmod, int output) } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) { asm volatile( "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps (%0, %%esi), %%xmm0 \n\t" - "movaps 16(%0, %%esi), %%xmm1 \n\t" - "addps (%1, %%esi), %%xmm0 \n\t" - "addps 16(%1, %%esi), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "addps (%1, %%"REG_S"), %%xmm0 \n\t" + "addps 16(%1, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" "addps %%xmm7, %%xmm1 \n\t" - "movaps %%xmm0, (%1, %%esi) \n\t" - "movaps %%xmm1, 16(%1, %%esi) \n\t" - "addl $32, %%esi \n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" + "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" + "add $32, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (src+256), "r" (dest+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -713,19 +713,19 @@ static void mix3to1_SSE (sample_t * samples, sample_t bias) asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps (%0, %%esi), %%xmm0 \n\t" - "movaps 1024(%0, %%esi), %%xmm1 \n\t" - "addps 2048(%0, %%esi), %%xmm0 \n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm1, %%xmm0 \n\t" - "movaps %%xmm0, (%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -734,20 +734,20 @@ static void mix4to1_SSE (sample_t * samples, sample_t bias) asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps (%0, %%esi), %%xmm0 \n\t" - "movaps 1024(%0, %%esi), %%xmm1 \n\t" - "addps 2048(%0, %%esi), %%xmm0 \n\t" - "addps 3072(%0, %%esi), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" "addps %%xmm1, %%xmm0 \n\t" - "movaps %%xmm0, (%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -756,21 +756,21 @@ static void mix5to1_SSE (sample_t * samples, sample_t bias) asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps (%0, %%esi), %%xmm0 \n\t" - "movaps 1024(%0, %%esi), %%xmm1 \n\t" - "addps 2048(%0, %%esi), %%xmm0 \n\t" - "addps 3072(%0, %%esi), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" - "addps 4096(%0, %%esi), %%xmm1 \n\t" + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm1, %%xmm0 \n\t" - "movaps %%xmm0, (%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -779,21 +779,21 @@ static void mix3to2_SSE (sample_t * samples, sample_t bias) asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps 1024(%0, %%esi), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" //common - "movaps (%0, %%esi), %%xmm1 \n\t" - "movaps 2048(%0, %%esi), %%xmm2 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" - "movaps %%xmm1, (%0, %%esi) \n\t" - "movaps %%xmm2, 1024(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm1, (%0, %%"REG_S") \n\t" + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -802,21 +802,21 @@ static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) asm volatile( "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps 1024(%1, %%esi), %%xmm0 \n\t" + "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" //common - "movaps (%0, %%esi), %%xmm1 \n\t" - "movaps (%1, %%esi), %%xmm2 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%1, %%"REG_S"), %%xmm2 \n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" - "movaps %%xmm1, (%0, %%esi) \n\t" - "movaps %%xmm2, (%1, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm1, (%0, %%"REG_S") \n\t" + "movaps %%xmm2, (%1, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (left+256), "r" (right+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -825,22 +825,22 @@ static void mix21toS_SSE (sample_t * samples, sample_t bias) asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps 2048(%0, %%esi), %%xmm0 \n\t" // surround - "movaps (%0, %%esi), %%xmm1 \n\t" - "movaps 1024(%0, %%esi), %%xmm2 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm7, %%xmm2 \n\t" "subps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" - "movaps %%xmm1, (%0, %%esi) \n\t" - "movaps %%xmm2, 1024(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm1, (%0, %%"REG_S") \n\t" + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -849,22 +849,22 @@ static void mix31to2_SSE (sample_t * samples, sample_t bias) asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps 1024(%0, %%esi), %%xmm0 \n\t" - "addps 3072(%0, %%esi), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" // common - "movaps (%0, %%esi), %%xmm1 \n\t" - "movaps 2048(%0, %%esi), %%xmm2 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" - "movaps %%xmm1, (%0, %%esi) \n\t" - "movaps %%xmm2, 1024(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm1, (%0, %%"REG_S") \n\t" + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -873,24 +873,24 @@ static void mix31toS_SSE (sample_t * samples, sample_t bias) asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps 1024(%0, %%esi), %%xmm0 \n\t" - "movaps 3072(%0, %%esi), %%xmm3 \n\t" // surround + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround "addps %%xmm7, %%xmm0 \n\t" // common - "movaps (%0, %%esi), %%xmm1 \n\t" - "movaps 2048(%0, %%esi), %%xmm2 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" "subps %%xmm3, %%xmm1 \n\t" "addps %%xmm3, %%xmm2 \n\t" - "movaps %%xmm1, (%0, %%esi) \n\t" - "movaps %%xmm2, 1024(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm1, (%0, %%"REG_S") \n\t" + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -899,23 +899,23 @@ static void mix22toS_SSE (sample_t * samples, sample_t bias) asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps 2048(%0, %%esi), %%xmm0 \n\t" - "addps 3072(%0, %%esi), %%xmm0 \n\t" // surround - "movaps (%0, %%esi), %%xmm1 \n\t" - "movaps 1024(%0, %%esi), %%xmm2 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm7, %%xmm2 \n\t" "subps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" - "movaps %%xmm1, (%0, %%esi) \n\t" - "movaps %%xmm2, 1024(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm1, (%0, %%"REG_S") \n\t" + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -924,22 +924,22 @@ static void mix32to2_SSE (sample_t * samples, sample_t bias) asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps 1024(%0, %%esi), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" // common "movaps %%xmm0, %%xmm1 \n\t" // common - "addps (%0, %%esi), %%xmm0 \n\t" - "addps 2048(%0, %%esi), %%xmm1 \n\t" - "addps 3072(%0, %%esi), %%xmm0 \n\t" - "addps 4096(%0, %%esi), %%xmm1 \n\t" - "movaps %%xmm0, (%0, %%esi) \n\t" - "movaps %%xmm1, 1024(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "addps (%0, %%"REG_S"), %%xmm0 \n\t" + "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -948,25 +948,25 @@ static void mix32toS_SSE (sample_t * samples, sample_t bias) asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps 1024(%0, %%esi), %%xmm0 \n\t" - "movaps 3072(%0, %%esi), %%xmm2 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm0 \n\t" // common - "addps 4096(%0, %%esi), %%xmm2 \n\t" // surround - "movaps (%0, %%esi), %%xmm1 \n\t" - "movaps 2048(%0, %%esi), %%xmm3 \n\t" + "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" "subps %%xmm2, %%xmm1 \n\t" "addps %%xmm2, %%xmm3 \n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm3 \n\t" - "movaps %%xmm1, (%0, %%esi) \n\t" - "movaps %%xmm3, 1024(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm1, (%0, %%"REG_S") \n\t" + "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -975,40 +975,40 @@ static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) asm volatile( "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movaps (%0, %%esi), %%xmm0 \n\t" - "movaps 16(%0, %%esi), %%xmm1 \n\t" - "addps 1024(%0, %%esi), %%xmm0 \n\t" - "addps 1040(%0, %%esi), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" "addps %%xmm7, %%xmm1 \n\t" - "movaps %%xmm0, (%1, %%esi) \n\t" - "movaps %%xmm1, 16(%1, %%esi) \n\t" - "addl $32, %%esi \n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" + "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" + "add $32, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (src+256), "r" (dest+256), "m" (bias) - : "%esi" + : "%"REG_S ); } static void zero_MMX(sample_t * samples) { asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "pxor %%mm0, %%mm0 \n\t" ".balign 16\n\t" "1: \n\t" - "movq %%mm0, (%0, %%esi) \n\t" - "movq %%mm0, 8(%0, %%esi) \n\t" - "movq %%mm0, 16(%0, %%esi) \n\t" - "movq %%mm0, 24(%0, %%esi) \n\t" - "addl $32, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm0, 8(%0, %%"REG_S") \n\t" + "movq %%mm0, 16(%0, %%"REG_S") \n\t" + "movq %%mm0, 24(%0, %%"REG_S") \n\t" + "add $32, %%"REG_S" \n\t" " jnz 1b \n\t" "emms" :: "r" (samples+256) - : "%esi" + : "%"REG_S ); } @@ -1257,29 +1257,29 @@ static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) asm volatile( "movd %2, %%mm7 \n\t" "punpckldq %2, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq (%0, %%esi), %%mm0 \n\t" - "movq 8(%0, %%esi), %%mm1 \n\t" - "movq 16(%0, %%esi), %%mm2 \n\t" - "movq 24(%0, %%esi), %%mm3 \n\t" - "pfadd (%1, %%esi), %%mm0 \n\t" - "pfadd 8(%1, %%esi), %%mm1 \n\t" - "pfadd 16(%1, %%esi), %%mm2 \n\t" - "pfadd 24(%1, %%esi), %%mm3 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" + "pfadd (%1, %%"REG_S"), %%mm0 \n\t" + "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" + "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" + "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" "pfadd %%mm7, %%mm2 \n\t" "pfadd %%mm7, %%mm3 \n\t" - "movq %%mm0, (%1, %%esi) \n\t" - "movq %%mm1, 8(%1, %%esi) \n\t" - "movq %%mm2, 16(%1, %%esi) \n\t" - "movq %%mm3, 24(%1, %%esi) \n\t" - "addl $32, %%esi \n\t" + "movq %%mm0, (%1, %%"REG_S") \n\t" + "movq %%mm1, 8(%1, %%"REG_S") \n\t" + "movq %%mm2, 16(%1, %%"REG_S") \n\t" + "movq %%mm3, 24(%1, %%"REG_S") \n\t" + "add $32, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (src+256), "r" (dest+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1288,25 +1288,25 @@ static void mix3to1_3dnow (sample_t * samples, sample_t bias) asm volatile( "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq (%0, %%esi), %%mm0 \n\t" - "movq 8(%0, %%esi), %%mm1 \n\t" - "movq 1024(%0, %%esi), %%mm2 \n\t" - "movq 1032(%0, %%esi), %%mm3 \n\t" - "pfadd 2048(%0, %%esi), %%mm0 \n\t" - "pfadd 2056(%0, %%esi), %%mm1 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" "pfadd %%mm2, %%mm0 \n\t" "pfadd %%mm3, %%mm1 \n\t" - "movq %%mm0, (%0, %%esi) \n\t" - "movq %%mm1, 8(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm1, 8(%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1315,27 +1315,27 @@ static void mix4to1_3dnow (sample_t * samples, sample_t bias) asm volatile( "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq (%0, %%esi), %%mm0 \n\t" - "movq 8(%0, %%esi), %%mm1 \n\t" - "movq 1024(%0, %%esi), %%mm2 \n\t" - "movq 1032(%0, %%esi), %%mm3 \n\t" - "pfadd 2048(%0, %%esi), %%mm0 \n\t" - "pfadd 2056(%0, %%esi), %%mm1 \n\t" - "pfadd 3072(%0, %%esi), %%mm2 \n\t" - "pfadd 3080(%0, %%esi), %%mm3 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" "pfadd %%mm2, %%mm0 \n\t" "pfadd %%mm3, %%mm1 \n\t" - "movq %%mm0, (%0, %%esi) \n\t" - "movq %%mm1, 8(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm1, 8(%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1344,29 +1344,29 @@ static void mix5to1_3dnow (sample_t * samples, sample_t bias) asm volatile( "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq (%0, %%esi), %%mm0 \n\t" - "movq 8(%0, %%esi), %%mm1 \n\t" - "movq 1024(%0, %%esi), %%mm2 \n\t" - "movq 1032(%0, %%esi), %%mm3 \n\t" - "pfadd 2048(%0, %%esi), %%mm0 \n\t" - "pfadd 2056(%0, %%esi), %%mm1 \n\t" - "pfadd 3072(%0, %%esi), %%mm2 \n\t" - "pfadd 3080(%0, %%esi), %%mm3 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" - "pfadd 4096(%0, %%esi), %%mm2 \n\t" - "pfadd 4104(%0, %%esi), %%mm3 \n\t" + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm2, %%mm0 \n\t" "pfadd %%mm3, %%mm1 \n\t" - "movq %%mm0, (%0, %%esi) \n\t" - "movq %%mm1, 8(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm1, 8(%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1375,29 +1375,29 @@ static void mix3to2_3dnow (sample_t * samples, sample_t bias) asm volatile( "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq 1024(%0, %%esi), %%mm0 \n\t" - "movq 1032(%0, %%esi), %%mm1 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" //common "pfadd %%mm7, %%mm1 \n\t" //common - "movq (%0, %%esi), %%mm2 \n\t" - "movq 8(%0, %%esi), %%mm3 \n\t" - "movq 2048(%0, %%esi), %%mm4 \n\t" - "movq 2056(%0, %%esi), %%mm5 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" "pfadd %%mm0, %%mm2 \n\t" "pfadd %%mm1, %%mm3 \n\t" "pfadd %%mm0, %%mm4 \n\t" "pfadd %%mm1, %%mm5 \n\t" - "movq %%mm2, (%0, %%esi) \n\t" - "movq %%mm3, 8(%0, %%esi) \n\t" - "movq %%mm4, 1024(%0, %%esi) \n\t" - "movq %%mm5, 1032(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm2, (%0, %%"REG_S") \n\t" + "movq %%mm3, 8(%0, %%"REG_S") \n\t" + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1406,29 +1406,29 @@ static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) asm volatile( "movd %2, %%mm7 \n\t" "punpckldq %2, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq 1024(%1, %%esi), %%mm0 \n\t" - "movq 1032(%1, %%esi), %%mm1 \n\t" + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 1032(%1, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" //common "pfadd %%mm7, %%mm1 \n\t" //common - "movq (%0, %%esi), %%mm2 \n\t" - "movq 8(%0, %%esi), %%mm3 \n\t" - "movq (%1, %%esi), %%mm4 \n\t" - "movq 8(%1, %%esi), %%mm5 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq (%1, %%"REG_S"), %%mm4 \n\t" + "movq 8(%1, %%"REG_S"), %%mm5 \n\t" "pfadd %%mm0, %%mm2 \n\t" "pfadd %%mm1, %%mm3 \n\t" "pfadd %%mm0, %%mm4 \n\t" "pfadd %%mm1, %%mm5 \n\t" - "movq %%mm2, (%0, %%esi) \n\t" - "movq %%mm3, 8(%0, %%esi) \n\t" - "movq %%mm4, (%1, %%esi) \n\t" - "movq %%mm5, 8(%1, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm2, (%0, %%"REG_S") \n\t" + "movq %%mm3, 8(%0, %%"REG_S") \n\t" + "movq %%mm4, (%1, %%"REG_S") \n\t" + "movq %%mm5, 8(%1, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (left+256), "r" (right+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1437,15 +1437,15 @@ static void mix21toS_3dnow (sample_t * samples, sample_t bias) asm volatile( "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq 2048(%0, %%esi), %%mm0 \n\t" // surround - "movq 2056(%0, %%esi), %%mm1 \n\t" // surround - "movq (%0, %%esi), %%mm2 \n\t" - "movq 8(%0, %%esi), %%mm3 \n\t" - "movq 1024(%0, %%esi), %%mm4 \n\t" - "movq 1032(%0, %%esi), %%mm5 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround + "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround + "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm4\n\t" + "movq 1032(%0, %%"REG_S"), %%mm5\n\t" "pfadd %%mm7, %%mm2 \n\t" "pfadd %%mm7, %%mm3 \n\t" "pfadd %%mm7, %%mm4 \n\t" @@ -1454,14 +1454,14 @@ static void mix21toS_3dnow (sample_t * samples, sample_t bias) "pfsub %%mm1, %%mm3 \n\t" "pfadd %%mm0, %%mm4 \n\t" "pfadd %%mm1, %%mm5 \n\t" - "movq %%mm2, (%0, %%esi) \n\t" - "movq %%mm3, 8(%0, %%esi) \n\t" - "movq %%mm4, 1024(%0, %%esi) \n\t" - "movq %%mm5, 1032(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm2, (%0, %%"REG_S") \n\t" + "movq %%mm3, 8(%0, %%"REG_S") \n\t" + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1470,31 +1470,31 @@ static void mix31to2_3dnow (sample_t * samples, sample_t bias) asm volatile( "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq 1024(%0, %%esi), %%mm0 \n\t" - "movq 1032(%0, %%esi), %%mm1 \n\t" - "pfadd 3072(%0, %%esi), %%mm0 \n\t" - "pfadd 3080(%0, %%esi), %%mm1 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "movq (%0, %%esi), %%mm2 \n\t" - "movq 8(%0, %%esi), %%mm3 \n\t" - "movq 2048(%0, %%esi), %%mm4 \n\t" - "movq 2056(%0, %%esi), %%mm5 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" "pfadd %%mm0, %%mm2 \n\t" "pfadd %%mm1, %%mm3 \n\t" "pfadd %%mm0, %%mm4 \n\t" "pfadd %%mm1, %%mm5 \n\t" - "movq %%mm2, (%0, %%esi) \n\t" - "movq %%mm3, 8(%0, %%esi) \n\t" - "movq %%mm4, 1024(%0, %%esi) \n\t" - "movq %%mm5, 1032(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm2, (%0, %%"REG_S") \n\t" + "movq %%mm3, 8(%0, %%"REG_S") \n\t" + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1503,35 +1503,35 @@ static void mix31toS_3dnow (sample_t * samples, sample_t bias) asm volatile( "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq 1024(%0, %%esi), %%mm0 \n\t" - "movq 1032(%0, %%esi), %%mm1 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "movq (%0, %%esi), %%mm2 \n\t" - "movq 8(%0, %%esi), %%mm3 \n\t" - "movq 2048(%0, %%esi), %%mm4 \n\t" - "movq 2056(%0, %%esi), %%mm5 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" "pfadd %%mm0, %%mm2 \n\t" "pfadd %%mm1, %%mm3 \n\t" "pfadd %%mm0, %%mm4 \n\t" "pfadd %%mm1, %%mm5 \n\t" - "movq 3072(%0, %%esi), %%mm0 \n\t" // surround - "movq 3080(%0, %%esi), %%mm1 \n\t" // surround + "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround + "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround "pfsub %%mm0, %%mm2 \n\t" "pfsub %%mm1, %%mm3 \n\t" "pfadd %%mm0, %%mm4 \n\t" "pfadd %%mm1, %%mm5 \n\t" - "movq %%mm2, (%0, %%esi) \n\t" - "movq %%mm3, 8(%0, %%esi) \n\t" - "movq %%mm4, 1024(%0, %%esi) \n\t" - "movq %%mm5, 1032(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm2, (%0, %%"REG_S") \n\t" + "movq %%mm3, 8(%0, %%"REG_S") \n\t" + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1540,17 +1540,17 @@ static void mix22toS_3dnow (sample_t * samples, sample_t bias) asm volatile( "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq 2048(%0, %%esi), %%mm0 \n\t" - "movq 2056(%0, %%esi), %%mm1 \n\t" - "pfadd 3072(%0, %%esi), %%mm0 \n\t" // surround - "pfadd 3080(%0, %%esi), %%mm1 \n\t" // surround - "movq (%0, %%esi), %%mm2 \n\t" - "movq 8(%0, %%esi), %%mm3 \n\t" - "movq 1024(%0, %%esi), %%mm4 \n\t" - "movq 1032(%0, %%esi), %%mm5 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" + "movq 2056(%0, %%"REG_S"), %%mm1\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround + "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm4\n\t" + "movq 1032(%0, %%"REG_S"), %%mm5\n\t" "pfadd %%mm7, %%mm2 \n\t" "pfadd %%mm7, %%mm3 \n\t" "pfadd %%mm7, %%mm4 \n\t" @@ -1559,14 +1559,14 @@ static void mix22toS_3dnow (sample_t * samples, sample_t bias) "pfsub %%mm1, %%mm3 \n\t" "pfadd %%mm0, %%mm4 \n\t" "pfadd %%mm1, %%mm5 \n\t" - "movq %%mm2, (%0, %%esi) \n\t" - "movq %%mm3, 8(%0, %%esi) \n\t" - "movq %%mm4, 1024(%0, %%esi) \n\t" - "movq %%mm5, 1032(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm2, (%0, %%"REG_S") \n\t" + "movq %%mm3, 8(%0, %%"REG_S") \n\t" + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1575,31 +1575,31 @@ static void mix32to2_3dnow (sample_t * samples, sample_t bias) asm volatile( "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq 1024(%0, %%esi), %%mm0 \n\t" - "movq 1032(%0, %%esi), %%mm1 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common "movq %%mm0, %%mm2 \n\t" // common "movq %%mm1, %%mm3 \n\t" // common - "pfadd (%0, %%esi), %%mm0 \n\t" - "pfadd 8(%0, %%esi), %%mm1 \n\t" - "pfadd 2048(%0, %%esi), %%mm2 \n\t" - "pfadd 2056(%0, %%esi), %%mm3 \n\t" - "pfadd 3072(%0, %%esi), %%mm0 \n\t" - "pfadd 3080(%0, %%esi), %%mm1 \n\t" - "pfadd 4096(%0, %%esi), %%mm2 \n\t" - "pfadd 4104(%0, %%esi), %%mm3 \n\t" - "movq %%mm0, (%0, %%esi) \n\t" - "movq %%mm1, 8(%0, %%esi) \n\t" - "movq %%mm2, 1024(%0, %%esi) \n\t" - "movq %%mm3, 1032(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "pfadd (%0, %%"REG_S"), %%mm0 \n\t" + "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm1, 8(%0, %%"REG_S") \n\t" + "movq %%mm2, 1024(%0, %%"REG_S")\n\t" + "movq %%mm3, 1032(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1607,23 +1607,23 @@ static void mix32to2_3dnow (sample_t * samples, sample_t bias) static void mix32toS_3dnow (sample_t * samples, sample_t bias) { asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movq 1024(%0, %%esi), %%mm0 \n\t" - "movq 1032(%0, %%esi), %%mm1 \n\t" - "movq 3072(%0, %%esi), %%mm4 \n\t" - "movq 3080(%0, %%esi), %%mm5 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "movq 3072(%0, %%"REG_S"), %%mm4\n\t" + "movq 3080(%0, %%"REG_S"), %%mm5\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "pfadd 4096(%0, %%esi), %%mm4 \n\t" // surround - "pfadd 4104(%0, %%esi), %%mm5 \n\t" // surround - "movq (%0, %%esi), %%mm2 \n\t" - "movq 8(%0, %%esi), %%mm3 \n\t" - "movq 2048(%0, %%esi), %%mm6 \n\t" - "movq 2056(%0, %%esi), %%mm7 \n\t" + "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround + "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround + "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm6\n\t" + "movq 2056(%0, %%"REG_S"), %%mm7\n\t" "pfsub %%mm4, %%mm2 \n\t" "pfsub %%mm5, %%mm3 \n\t" "pfadd %%mm4, %%mm6 \n\t" @@ -1632,14 +1632,14 @@ static void mix32toS_3dnow (sample_t * samples, sample_t bias) "pfadd %%mm1, %%mm3 \n\t" "pfadd %%mm0, %%mm6 \n\t" "pfadd %%mm1, %%mm7 \n\t" - "movq %%mm2, (%0, %%esi) \n\t" - "movq %%mm3, 8(%0, %%esi) \n\t" - "movq %%mm6, 1024(%0, %%esi) \n\t" - "movq %%mm7, 1032(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm2, (%0, %%"REG_S") \n\t" + "movq %%mm3, 8(%0, %%"REG_S") \n\t" + "movq %%mm6, 1024(%0, %%"REG_S")\n\t" + "movq %%mm7, 1032(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (samples+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1648,29 +1648,29 @@ static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) asm volatile( "movd %2, %%mm7 \n\t" "punpckldq %2, %%mm7 \n\t" - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16\n\t" "1: \n\t" - "movq (%0, %%esi), %%mm0 \n\t" - "movq 8(%0, %%esi), %%mm1 \n\t" - "movq 16(%0, %%esi), %%mm2 \n\t" - "movq 24(%0, %%esi), %%mm3 \n\t" - "pfadd 1024(%0, %%esi), %%mm0 \n\t" - "pfadd 1032(%0, %%esi), %%mm1 \n\t" - "pfadd 1040(%0, %%esi), %%mm2 \n\t" - "pfadd 1048(%0, %%esi), %%mm3 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" + "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" "pfadd %%mm7, %%mm2 \n\t" "pfadd %%mm7, %%mm3 \n\t" - "movq %%mm0, (%1, %%esi) \n\t" - "movq %%mm1, 8(%1, %%esi) \n\t" - "movq %%mm2, 16(%1, %%esi) \n\t" - "movq %%mm3, 24(%1, %%esi) \n\t" - "addl $32, %%esi \n\t" + "movq %%mm0, (%1, %%"REG_S") \n\t" + "movq %%mm1, 8(%1, %%"REG_S") \n\t" + "movq %%mm2, 16(%1, %%"REG_S") \n\t" + "movq %%mm3, 24(%1, %%"REG_S") \n\t" + "add $32, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (src+256), "r" (dest+256), "m" (bias) - : "%esi" + : "%"REG_S ); } @@ -1816,4 +1816,4 @@ static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t b __asm __volatile("femms":::"memory"); } -#endif //ARCH_X86 +#endif // ARCH_X86 || ARCH_X86_64 diff --git a/liba52/imdct.c b/liba52/imdct.c index 68140fc816..ce8cf24743 100644 --- a/liba52/imdct.c +++ b/liba52/imdct.c @@ -101,7 +101,7 @@ static uint8_t bit_reverse_256[] = { 0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b, 0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f}; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) // NOTE: SSE needs 16byte alignment or it will segfault // static complex_t __attribute__((aligned(16))) buf[128]; @@ -442,8 +442,8 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) int k; int p,q; int m; - int two_m; - int two_m_plus_one; + long two_m; + long two_m_plus_one; sample_t tmp_b_i; sample_t tmp_b_r; @@ -747,7 +747,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) // Stuff below this line is borrowed from libac3 #include "srfftp.h" -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #ifndef HAVE_3DNOW #define HAVE_3DNOW 1 #endif @@ -768,9 +768,9 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) /* int i,k; int p,q;*/ int m; - int two_m; - int two_m_plus_one; - int two_m_plus_one_shl3; + long two_m; + long two_m_plus_one; + long two_m_plus_one_shl3; complex_t *buf_offset; /* sample_t tmp_a_i; @@ -788,33 +788,33 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ /* Bit reversed shuffling */ asm volatile( - "xorl %%esi, %%esi \n\t" - "leal "MANGLE(bit_reverse_512)", %%eax \n\t" - "movl $1008, %%edi \n\t" - "pushl %%ebp \n\t" //use ebp without telling gcc + "xor %%"REG_S", %%"REG_S" \n\t" + "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" + "mov $1008, %%"REG_D" \n\t" + "push %%"REG_BP" \n\t" //use ebp without telling gcc ".balign 16 \n\t" "1: \n\t" - "movlps (%0, %%esi), %%xmm0 \n\t" // XXXI - "movhps 8(%0, %%edi), %%xmm0 \n\t" // RXXI - "movlps 8(%0, %%esi), %%xmm1 \n\t" // XXXi - "movhps (%0, %%edi), %%xmm1 \n\t" // rXXi + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI + "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi + "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR - "movaps "MANGLE(sseSinCos1c)"(%%esi), %%xmm2\n\t" + "movaps "MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm2\n\t" "mulps %%xmm0, %%xmm2 \n\t" "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI - "mulps "MANGLE(sseSinCos1d)"(%%esi), %%xmm0\n\t" + "mulps "MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" "subps %%xmm0, %%xmm2 \n\t" - "movzbl (%%eax), %%edx \n\t" - "movzbl 1(%%eax), %%ebp \n\t" - "movlps %%xmm2, (%1, %%edx,8) \n\t" - "movhps %%xmm2, (%1, %%ebp,8) \n\t" - "addl $16, %%esi \n\t" - "addl $2, %%eax \n\t" // avoid complex addressing for P4 crap - "subl $16, %%edi \n\t" - " jnc 1b \n\t" - "popl %%ebp \n\t"//no we didnt touch ebp *g* - :: "b" (data), "c" (buf) - : "%esi", "%edi", "%eax", "%edx" + "movzb (%%"REG_a"), %%"REG_d" \n\t" + "movzb 1(%%"REG_a"), %%"REG_BP" \n\t" + "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t" + "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t" + "add $16, %%"REG_S" \n\t" + "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap + "sub $16, %%"REG_D" \n\t" + "jnc 1b \n\t" + "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g* + :: "r" (data), "r" (buf) + : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d ); @@ -850,44 +850,44 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) asm volatile( "xorps %%xmm1, %%xmm1 \n\t" "xorps %%xmm2, %%xmm2 \n\t" - "movl %0, %%esi \n\t" + "mov %0, %%"REG_S" \n\t" ".balign 16 \n\t" "1: \n\t" - "movlps (%%esi), %%xmm0 \n\t" //buf[p] - "movlps 8(%%esi), %%xmm1\n\t" //buf[q] - "movhps (%%esi), %%xmm0 \n\t" //buf[p] - "movhps 8(%%esi), %%xmm2\n\t" //buf[q] + "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p] + "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q] + "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p] + "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q] "addps %%xmm1, %%xmm0 \n\t" "subps %%xmm2, %%xmm0 \n\t" - "movaps %%xmm0, (%%esi) \n\t" - "addl $16, %%esi \n\t" - "cmpl %1, %%esi \n\t" + "movaps %%xmm0, (%%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" + "cmp %1, %%"REG_S" \n\t" " jb 1b \n\t" :: "g" (buf), "r" (buf + 128) - : "%esi" + : "%"REG_S ); /* 2. iteration */ // Note w[1]={{1,0}, {0,-1}} asm volatile( "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 - "movl %0, %%esi \n\t" + "mov %0, %%"REG_S" \n\t" ".balign 16 \n\t" "1: \n\t" - "movaps 16(%%esi), %%xmm2 \n\t" //r2,i2,r3,i3 + "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3 - "movaps (%%esi), %%xmm0 \n\t" //r0,i0,r1,i1 - "movaps (%%esi), %%xmm1 \n\t" //r0,i0,r1,i1 + "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 + "movaps (%%"REG_S"), %%xmm1 \n\t" //r0,i0,r1,i1 "addps %%xmm2, %%xmm0 \n\t" "subps %%xmm2, %%xmm1 \n\t" - "movaps %%xmm0, (%%esi) \n\t" - "movaps %%xmm1, 16(%%esi) \n\t" - "addl $32, %%esi \n\t" - "cmpl %1, %%esi \n\t" + "movaps %%xmm0, (%%"REG_S") \n\t" + "movaps %%xmm1, 16(%%"REG_S") \n\t" + "add $32, %%"REG_S" \n\t" + "cmp %1, %%"REG_S" \n\t" " jb 1b \n\t" :: "g" (buf), "r" (buf + 128) - : "%esi" + : "%"REG_S ); /* 3. iteration */ @@ -902,11 +902,11 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" "xorps %%xmm5, %%xmm5 \n\t" "xorps %%xmm2, %%xmm2 \n\t" - "movl %0, %%esi \n\t" + "mov %0, %%"REG_S" \n\t" ".balign 16 \n\t" "1: \n\t" - "movaps 32(%%esi), %%xmm2 \n\t" //r4,i4,r5,i5 - "movaps 48(%%esi), %%xmm3 \n\t" //r6,i6,r7,i7 + "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5 + "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7 "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5 "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7 "mulps %%xmm2, %%xmm4 \n\t" @@ -915,8 +915,8 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7 "mulps %%xmm6, %%xmm3 \n\t" "mulps %%xmm7, %%xmm2 \n\t" - "movaps (%%esi), %%xmm0 \n\t" //r0,i0,r1,i1 - "movaps 16(%%esi), %%xmm1 \n\t" //r2,i2,r3,i3 + "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 + "movaps 16(%%"REG_S"), %%xmm1 \n\t" //r2,i2,r3,i3 "addps %%xmm4, %%xmm2 \n\t" "addps %%xmm5, %%xmm3 \n\t" "movaps %%xmm2, %%xmm4 \n\t" @@ -925,15 +925,15 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "addps %%xmm1, %%xmm3 \n\t" "subps %%xmm4, %%xmm0 \n\t" "subps %%xmm5, %%xmm1 \n\t" - "movaps %%xmm2, (%%esi) \n\t" - "movaps %%xmm3, 16(%%esi) \n\t" - "movaps %%xmm0, 32(%%esi) \n\t" - "movaps %%xmm1, 48(%%esi) \n\t" - "addl $64, %%esi \n\t" - "cmpl %1, %%esi \n\t" + "movaps %%xmm2, (%%"REG_S") \n\t" + "movaps %%xmm3, 16(%%"REG_S") \n\t" + "movaps %%xmm0, 32(%%"REG_S") \n\t" + "movaps %%xmm1, 48(%%"REG_S") \n\t" + "add $64, %%"REG_S" \n\t" + "cmp %1, %%"REG_S" \n\t" " jb 1b \n\t" :: "g" (buf), "r" (buf + 128) - : "%esi" + : "%"REG_S ); /* 4-7. iterations */ @@ -943,52 +943,52 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) two_m_plus_one_shl3 = (two_m_plus_one<<3); buf_offset = buf+128; asm volatile( - "movl %0, %%esi \n\t" + "mov %0, %%"REG_S" \n\t" ".balign 16 \n\t" "1: \n\t" - "xorl %%edi, %%edi \n\t" // k - "leal (%%esi, %3), %%edx \n\t" + "xor %%"REG_D", %%"REG_D" \n\t" // k + "lea (%%"REG_S", %3), %%"REG_d" \n\t" "2: \n\t" - "movaps (%%edx, %%edi), %%xmm1 \n\t" - "movaps (%4, %%edi, 2), %%xmm2 \n\t" + "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t" + "movaps (%4, %%"REG_D", 2), %%xmm2 \n\t" "mulps %%xmm1, %%xmm2 \n\t" "shufps $0xB1, %%xmm1, %%xmm1 \n\t" - "mulps 16(%4, %%edi, 2), %%xmm1 \n\t" - "movaps (%%esi, %%edi), %%xmm0 \n\t" + "mulps 16(%4, %%"REG_D", 2), %%xmm1 \n\t" + "movaps (%%"REG_S", %%"REG_D"), %%xmm0 \n\t" "addps %%xmm2, %%xmm1 \n\t" "movaps %%xmm1, %%xmm2 \n\t" "addps %%xmm0, %%xmm1 \n\t" "subps %%xmm2, %%xmm0 \n\t" - "movaps %%xmm1, (%%esi, %%edi) \n\t" - "movaps %%xmm0, (%%edx, %%edi) \n\t" - "addl $16, %%edi \n\t" - "cmpl %3, %%edi \n\t" //FIXME (opt) count against 0 - " jb 2b \n\t" - "addl %2, %%esi \n\t" - "cmpl %1, %%esi \n\t" + "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" + "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" + "add $16, %%"REG_D" \n\t" + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 + "jb 2b \n\t" + "add %2, %%"REG_S" \n\t" + "cmp %1, %%"REG_S" \n\t" " jb 1b \n\t" :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3), "r" (sseW[m]) - : "%esi", "%edi", "%edx" + : "%"REG_S, "%"REG_D, "%"REG_d ); } /* Post IFFT complex multiply plus IFFT complex conjugate*/ asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" ".balign 16 \n\t" "1: \n\t" - "movaps (%0, %%esi), %%xmm0 \n\t" - "movaps (%0, %%esi), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "shufps $0xB1, %%xmm0, %%xmm0 \n\t" - "mulps 1024+"MANGLE(sseSinCos1c)"(%%esi), %%xmm1\n\t" - "mulps 1024+"MANGLE(sseSinCos1d)"(%%esi), %%xmm0\n\t" + "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t" + "mulps 1024+"MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" "addps %%xmm1, %%xmm0 \n\t" - "movaps %%xmm0, (%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" :: "r" (buf+128) - : "%esi" + : "%"REG_S ); @@ -998,54 +998,54 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) /* Window and convert to real valued signal */ asm volatile( - "xorl %%edi, %%edi \n\t" // 0 - "xorl %%esi, %%esi \n\t" // 0 + "xor %%"REG_D", %%"REG_D" \n\t" // 0 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 "movss %3, %%xmm2 \n\t" // bias "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... ".balign 16 \n\t" "1: \n\t" - "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ? - "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ? - "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ? - "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ? + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A - "mulps "MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" - "addps (%2, %%esi), %%xmm0 \n\t" + "mulps "MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" + "addps (%2, %%"REG_S"), %%xmm0 \n\t" "addps %%xmm2, %%xmm0 \n\t" - "movaps %%xmm0, (%1, %%esi) \n\t" - "addl $16, %%esi \n\t" - "subl $16, %%edi \n\t" - "cmpl $512, %%esi \n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" + "sub $16, %%"REG_D" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias) - : "%esi", "%edi" + : "%"REG_S, "%"REG_D ); data_ptr+=128; delay_ptr+=128; // window_ptr+=128; asm volatile( - "movl $1024, %%edi \n\t" // 512 - "xorl %%esi, %%esi \n\t" // 0 + "mov $1024, %%"REG_D" \n\t" // 512 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 "movss %3, %%xmm2 \n\t" // bias "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... ".balign 16 \n\t" "1: \n\t" - "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A - "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C - "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C - "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A - "mulps 512+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" - "addps (%2, %%esi), %%xmm0 \n\t" + "mulps 512+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" + "addps (%2, %%"REG_S"), %%xmm0 \n\t" "addps %%xmm2, %%xmm0 \n\t" - "movaps %%xmm0, (%1, %%esi) \n\t" - "addl $16, %%esi \n\t" - "subl $16, %%edi \n\t" - "cmpl $512, %%esi \n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" + "sub $16, %%"REG_D" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias) - : "%esi", "%edi" + : "%"REG_S, "%"REG_D ); data_ptr+=128; // window_ptr+=128; @@ -1054,48 +1054,48 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) delay_ptr = delay; asm volatile( - "xorl %%edi, %%edi \n\t" // 0 - "xorl %%esi, %%esi \n\t" // 0 + "xor %%"REG_D", %%"REG_D" \n\t" // 0 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 ".balign 16 \n\t" "1: \n\t" - "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A - "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C - "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C - "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A - "mulps 1024+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" - "movaps %%xmm0, (%1, %%esi) \n\t" - "addl $16, %%esi \n\t" - "subl $16, %%edi \n\t" - "cmpl $512, %%esi \n\t" + "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" + "sub $16, %%"REG_D" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf+64), "r" (delay_ptr) - : "%esi", "%edi" + : "%"REG_S, "%"REG_D ); delay_ptr+=128; // window_ptr-=128; asm volatile( - "movl $1024, %%edi \n\t" // 1024 - "xorl %%esi, %%esi \n\t" // 0 + "mov $1024, %%"REG_D" \n\t" // 1024 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 ".balign 16 \n\t" "1: \n\t" - "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ? - "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ? - "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ? - "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ? + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A - "mulps 1536+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" - "movaps %%xmm0, (%1, %%esi) \n\t" - "addl $16, %%esi \n\t" - "subl $16, %%edi \n\t" - "cmpl $512, %%esi \n\t" + "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" + "sub $16, %%"REG_D" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf), "r" (delay_ptr) - : "%esi", "%edi" + : "%"REG_S, "%"REG_D ); } -#endif //arch_x86 +#endif // ARCH_X86 || ARCH_X86_64 void imdct_do_256(sample_t data[],sample_t delay[],sample_t bias) @@ -1242,7 +1242,7 @@ void imdct_init (uint32_t mm_accel) xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) for (i = 0; i < 128; i++) { sseSinCos1c[2*i+0]= xcos1[i]; sseSinCos1c[2*i+1]= -xcos1[i]; @@ -1264,7 +1264,7 @@ void imdct_init (uint32_t mm_accel) w[i][k].imag = sin (-M_PI * k / j); } } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) for (i = 1; i < 7; i++) { j = 1 << i; for (k = 0; k < j; k+=2) { @@ -1307,10 +1307,10 @@ void imdct_init (uint32_t mm_accel) sseWindow[384 + 2*i+0]= imdct_window[126 - 2*i+1]; sseWindow[384 + 2*i+1]= -imdct_window[126 - 2*i+0]; } -#endif // arch_x86 +#endif // ARCH_X86 || ARCH_X86_64 imdct_512 = imdct_do_512; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if(mm_accel & MM_ACCEL_X86_SSE) { fprintf (stderr, "Using SSE optimized IMDCT transform\n"); @@ -1329,7 +1329,7 @@ void imdct_init (uint32_t mm_accel) imdct_512 = imdct_do_512_3dnow; } else -#endif // arch_x86 +#endif // ARCH_X86 || ARCH_X86_64 #ifdef HAVE_ALTIVEC if (mm_accel & MM_ACCEL_PPC_ALTIVEC) { diff --git a/liba52/resample.c b/liba52/resample.c index 03210840b5..e130afef86 100644 --- a/liba52/resample.c +++ b/liba52/resample.c @@ -15,7 +15,7 @@ int (* a52_resample) (float * _f, int16_t * s16)=NULL; #include "resample_c.c" -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #include "resample_mmx.c" #endif @@ -26,7 +26,7 @@ int (* a52_resample) (float * _f, int16_t * s16)=NULL; void* a52_resample_init(uint32_t mm_accel,int flags,int chans){ void* tmp; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if(mm_accel&MM_ACCEL_X86_MMX){ tmp=a52_resample_MMX(flags,chans); if(tmp){ diff --git a/liba52/resample_mmx.c b/liba52/resample_mmx.c index 6f45d88ea7..799b2e3683 100644 --- a/liba52/resample_mmx.c +++ b/liba52/resample_mmx.c @@ -7,6 +7,9 @@ and it would mean (C / MMX2 / MMX / 3DNOW) versions */ +#include "a52_internal.h" + + static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; @@ -15,36 +18,36 @@ static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF000 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-512, %%esi \n\t" + "mov $-512, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(wm1100)", %%mm3 \n\t" "movq "MANGLE(wm0101)", %%mm4 \n\t" "movq "MANGLE(wm1010)", %%mm5 \n\t" "pxor %%mm6, %%mm6 \n\t" "1: \n\t" - "movq (%1, %%esi, 2), %%mm0 \n\t" - "movq 8(%1, %%esi, 2), %%mm1 \n\t" - "leal (%%esi, %%esi, 4), %%edi \n\t" + "movq (%1, %%"REG_S", 2), %%mm0 \n\t" + "movq 8(%1, %%"REG_S", 2), %%mm1\n\t" + "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "packssdw %%mm1, %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "pand %%mm4, %%mm0 \n\t" "pand %%mm5, %%mm1 \n\t" - "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 - "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 + "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0 + "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0 "pand %%mm3, %%mm0 \n\t" - "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 - "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B + "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0 + "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B "pand %%mm3, %%mm1 \n\t" - "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 - "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 - "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B - "addl $8, %%esi \n\t" + "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0 + "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0 + "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B + "add $8, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1280), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 5*256; } @@ -54,29 +57,29 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it #ifdef HAVE_SSE asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "1: \n\t" - "cvtps2pi (%1, %%esi), %%mm0 \n\t" - "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" + "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t" + "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t" "movq %%mm0, %%mm1 \n\t" "punpcklwd %%mm2, %%mm0 \n\t" "punpckhwd %%mm2, %%mm1 \n\t" - "movq %%mm0, (%0, %%esi) \n\t" - "movq %%mm1, 8(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm1, 8(%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+512), "r" (f+256) - :"%esi", "memory" + :"%"REG_S, "memory" );*/ asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "1: \n\t" - "movq (%1, %%esi), %%mm0 \n\t" - "movq 8(%1, %%esi), %%mm1 \n\t" - "movq 1024(%1, %%esi), %%mm2 \n\t" - "movq 1032(%1, %%esi), %%mm3 \n\t" + "movq (%1, %%"REG_S"), %%mm0 \n\t" + "movq 8(%1, %%"REG_S"), %%mm1 \n\t" + "movq 1024(%1, %%"REG_S"), %%mm2\n\t" + "movq 1032(%1, %%"REG_S"), %%mm3\n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" @@ -86,13 +89,13 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ "movq %%mm0, %%mm1 \n\t" "punpcklwd %%mm2, %%mm0 \n\t" "punpckhwd %%mm2, %%mm1 \n\t" - "movq %%mm0, (%0, %%esi) \n\t" - "movq %%mm1, 8(%0, %%esi) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm1, 8(%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+512), "r" (f+256) - :"%esi", "memory" + :"%"REG_S, "memory" ); return 2*256; } @@ -100,23 +103,23 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "pxor %%mm6, %%mm6 \n\t" "movq %%mm7, %%mm5 \n\t" "punpckldq %%mm6, %%mm5 \n\t" "1: \n\t" - "movd (%1, %%esi), %%mm0 \n\t" - "punpckldq 2048(%1, %%esi), %%mm0\n\t" - "movd 1024(%1, %%esi), %%mm1 \n\t" - "punpckldq 4(%1, %%esi), %%mm1 \n\t" - "movd 2052(%1, %%esi), %%mm2 \n\t" + "movd (%1, %%"REG_S"), %%mm0 \n\t" + "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" + "movd 1024(%1, %%"REG_S"), %%mm1\n\t" + "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t" + "movd 2052(%1, %%"REG_S"), %%mm2\n\t" "movq %%mm7, %%mm3 \n\t" - "punpckldq 1028(%1, %%esi), %%mm3\n\t" - "movd 8(%1, %%esi), %%mm4 \n\t" - "punpckldq 2056(%1, %%esi), %%mm4\n\t" - "leal (%%esi, %%esi, 4), %%edi \n\t" - "sarl $1, %%edi \n\t" + "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t" + "movd 8(%1, %%"REG_S"), %%mm4 \n\t" + "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t" + "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" + "sar $1, %%"REG_D" \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm5, %%mm2 \n\t" @@ -125,29 +128,28 @@ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ "packssdw %%mm6, %%mm0 \n\t" "packssdw %%mm2, %%mm1 \n\t" "packssdw %%mm4, %%mm3 \n\t" - "movq %%mm0, (%0, %%edi) \n\t" - "movq %%mm1, 8(%0, %%edi) \n\t" - "movq %%mm3, 16(%0, %%edi) \n\t" - - "movd 1032(%1, %%esi), %%mm1 \n\t" - "punpckldq 12(%1, %%esi), %%mm1\n\t" - "movd 2060(%1, %%esi), %%mm2 \n\t" + "movq %%mm0, (%0, %%"REG_D") \n\t" + "movq %%mm1, 8(%0, %%"REG_D") \n\t" + "movq %%mm3, 16(%0, %%"REG_D") \n\t" + "movd 1032(%1, %%"REG_S"), %%mm1\n\t" + "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" + "movd 2060(%1, %%"REG_S"), %%mm2\n\t" "movq %%mm7, %%mm3 \n\t" - "punpckldq 1036(%1, %%esi), %%mm3\n\t" + "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" "pxor %%mm0, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm5, %%mm2 \n\t" "psubd %%mm7, %%mm3 \n\t" "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" - "movq %%mm0, 24(%0, %%edi) \n\t" - "movq %%mm2, 32(%0, %%edi) \n\t" + "movq %%mm0, 24(%0, %%"REG_D") \n\t" + "movq %%mm2, 32(%0, %%"REG_D") \n\t" - "addl $16, %%esi \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1280), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 5*256; } @@ -155,23 +157,23 @@ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "1: \n\t" - "movq (%1, %%esi), %%mm0 \n\t" - "movq 8(%1, %%esi), %%mm1 \n\t" - "movq 1024(%1, %%esi), %%mm2 \n\t" - "movq 1032(%1, %%esi), %%mm3 \n\t" + "movq (%1, %%"REG_S"), %%mm0 \n\t" + "movq 8(%1, %%"REG_S"), %%mm1 \n\t" + "movq 1024(%1, %%"REG_S"), %%mm2\n\t" + "movq 1032(%1, %%"REG_S"), %%mm3\n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" "psubd %%mm7, %%mm3 \n\t" "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" - "movq 2048(%1, %%esi), %%mm3 \n\t" - "movq 2056(%1, %%esi), %%mm4 \n\t" - "movq 3072(%1, %%esi), %%mm5 \n\t" - "movq 3080(%1, %%esi), %%mm6 \n\t" + "movq 2048(%1, %%"REG_S"), %%mm3\n\t" + "movq 2056(%1, %%"REG_S"), %%mm4\n\t" + "movq 3072(%1, %%"REG_S"), %%mm5\n\t" + "movq 3080(%1, %%"REG_S"), %%mm6\n\t" "psubd %%mm7, %%mm3 \n\t" "psubd %%mm7, %%mm4 \n\t" "psubd %%mm7, %%mm5 \n\t" @@ -190,15 +192,15 @@ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ "punpckhdq %%mm3, %%mm2 \n\t" "punpckldq %%mm4, %%mm1 \n\t" "punpckhdq %%mm4, %%mm5 \n\t" - "movq %%mm0, (%0, %%esi,2) \n\t" - "movq %%mm2, 8(%0, %%esi,2) \n\t" - "movq %%mm1, 16(%0, %%esi,2) \n\t" - "movq %%mm5, 24(%0, %%esi,2) \n\t" - "addl $16, %%esi \n\t" + "movq %%mm0, (%0, %%"REG_S",2) \n\t" + "movq %%mm2, 8(%0, %%"REG_S",2) \n\t" + "movq %%mm1, 16(%0, %%"REG_S",2)\n\t" + "movq %%mm5, 24(%0, %%"REG_S",2)\n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1024), "r" (f+256) - :"%esi", "memory" + :"%"REG_S, "memory" ); return 4*256; } @@ -206,23 +208,23 @@ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "1: \n\t" - "movd (%1, %%esi), %%mm0 \n\t" - "punpckldq 2048(%1, %%esi), %%mm0\n\t" - "movd 3072(%1, %%esi), %%mm1 \n\t" - "punpckldq 4096(%1, %%esi), %%mm1\n\t" - "movd 1024(%1, %%esi), %%mm2 \n\t" - "punpckldq 4(%1, %%esi), %%mm2 \n\t" - "movd 2052(%1, %%esi), %%mm3 \n\t" - "punpckldq 3076(%1, %%esi), %%mm3\n\t" - "movd 4100(%1, %%esi), %%mm4 \n\t" - "punpckldq 1028(%1, %%esi), %%mm4\n\t" - "movd 8(%1, %%esi), %%mm5 \n\t" - "punpckldq 2056(%1, %%esi), %%mm5\n\t" - "leal (%%esi, %%esi, 4), %%edi \n\t" - "sarl $1, %%edi \n\t" + "movd (%1, %%"REG_S"), %%mm0 \n\t" + "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" + "movd 3072(%1, %%"REG_S"), %%mm1\n\t" + "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t" + "movd 1024(%1, %%"REG_S"), %%mm2\n\t" + "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t" + "movd 2052(%1, %%"REG_S"), %%mm3\n\t" + "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t" + "movd 4100(%1, %%"REG_S"), %%mm4\n\t" + "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t" + "movd 8(%1, %%"REG_S"), %%mm5 \n\t" + "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t" + "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" + "sar $1, %%"REG_D" \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" @@ -232,32 +234,32 @@ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" "packssdw %%mm5, %%mm4 \n\t" - "movq %%mm0, (%0, %%edi) \n\t" - "movq %%mm2, 8(%0, %%edi) \n\t" - "movq %%mm4, 16(%0, %%edi) \n\t" + "movq %%mm0, (%0, %%"REG_D") \n\t" + "movq %%mm2, 8(%0, %%"REG_D") \n\t" + "movq %%mm4, 16(%0, %%"REG_D") \n\t" - "movd 3080(%1, %%esi), %%mm0 \n\t" - "punpckldq 4104(%1, %%esi), %%mm0\n\t" - "movd 1032(%1, %%esi), %%mm1 \n\t" - "punpckldq 12(%1, %%esi), %%mm1\n\t" - "movd 2060(%1, %%esi), %%mm2 \n\t" - "punpckldq 3084(%1, %%esi), %%mm2\n\t" - "movd 4108(%1, %%esi), %%mm3 \n\t" - "punpckldq 1036(%1, %%esi), %%mm3\n\t" + "movd 3080(%1, %%"REG_S"), %%mm0\n\t" + "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t" + "movd 1032(%1, %%"REG_S"), %%mm1\n\t" + "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" + "movd 2060(%1, %%"REG_S"), %%mm2\n\t" + "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t" + "movd 4108(%1, %%"REG_S"), %%mm3\n\t" + "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" "psubd %%mm7, %%mm3 \n\t" "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" - "movq %%mm0, 24(%0, %%edi) \n\t" - "movq %%mm2, 32(%0, %%edi) \n\t" + "movq %%mm0, 24(%0, %%"REG_D") \n\t" + "movq %%mm2, 32(%0, %%"REG_D") \n\t" - "addl $16, %%esi \n\t" + "add $16, %%"REG_S" \n\t" " jnz 1b \n\t" "emms \n\t" :: "r" (s16+1280), "r" (f+256) - :"%esi", "%edi", "memory" + :"%"REG_S, "%"REG_D, "memory" ); return 5*256; } @@ -265,14 +267,14 @@ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ int32_t * f = (int32_t *) _f; asm volatile( - "movl $-1024, %%esi \n\t" + "mov $-1024, %%"REG_S" \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t" "pxor %%mm6, %%mm6 \n\t" "1: \n\t" - "movq 1024(%1, %%esi), %%mm0 \n\t" - "movq 1032(%1, %%esi), %%mm1 \n\t" - "mov