summaryrefslogtreecommitdiffstats
path: root/liba52/resample_mmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'liba52/resample_mmx.c')
-rw-r--r--liba52/resample_mmx.c320
1 files changed, 161 insertions, 159 deletions
diff --git a/liba52/resample_mmx.c b/liba52/resample_mmx.c
index 6f45d88ea7..799b2e3683 100644
--- a/liba52/resample_mmx.c
+++ b/liba52/resample_mmx.c
@@ -7,6 +7,9 @@
and it would mean (C / MMX2 / MMX / 3DNOW) versions
*/
+#include "a52_internal.h"
+
+
static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL;
static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL;
static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL;
@@ -15,36 +18,36 @@ static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF000
static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
- "movl $-512, %%esi \n\t"
+ "mov $-512, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"movq "MANGLE(wm1100)", %%mm3 \n\t"
"movq "MANGLE(wm0101)", %%mm4 \n\t"
"movq "MANGLE(wm1010)", %%mm5 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
- "movq (%1, %%esi, 2), %%mm0 \n\t"
- "movq 8(%1, %%esi, 2), %%mm1 \n\t"
- "leal (%%esi, %%esi, 4), %%edi \n\t"
+ "movq (%1, %%"REG_S", 2), %%mm0 \n\t"
+ "movq 8(%1, %%"REG_S", 2), %%mm1\n\t"
+ "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
"pand %%mm4, %%mm0 \n\t"
"pand %%mm5, %%mm1 \n\t"
- "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0
- "movd %%mm0, 8(%0, %%edi) \n\t" // A 0
+ "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0
+ "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0
"pand %%mm3, %%mm0 \n\t"
- "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0
- "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B
+ "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0
+ "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B
"pand %%mm3, %%mm1 \n\t"
- "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0
- "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0
- "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B
- "addl $8, %%esi \n\t"
+ "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0
+ "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0
+ "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B
+ "add $8, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1280), "r" (f+256)
- :"%esi", "%edi", "memory"
+ :"%"REG_S, "%"REG_D, "memory"
);
return 5*256;
}
@@ -54,29 +57,29 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
/* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
#ifdef HAVE_SSE
asm volatile(
- "movl $-1024, %%esi \n\t"
+ "mov $-1024, %%"REG_S" \n\t"
"1: \n\t"
- "cvtps2pi (%1, %%esi), %%mm0 \n\t"
- "cvtps2pi 1024(%1, %%esi), %%mm2\n\t"
+ "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t"
+ "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t"
"movq %%mm0, %%mm1 \n\t"
"punpcklwd %%mm2, %%mm0 \n\t"
"punpckhwd %%mm2, %%mm1 \n\t"
- "movq %%mm0, (%0, %%esi) \n\t"
- "movq %%mm1, 8(%0, %%esi) \n\t"
- "addl $16, %%esi \n\t"
+ "movq %%mm0, (%0, %%"REG_S") \n\t"
+ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
+ "add $16, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+512), "r" (f+256)
- :"%esi", "memory"
+ :"%"REG_S, "memory"
);*/
asm volatile(
- "movl $-1024, %%esi \n\t"
+ "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"1: \n\t"
- "movq (%1, %%esi), %%mm0 \n\t"
- "movq 8(%1, %%esi), %%mm1 \n\t"
- "movq 1024(%1, %%esi), %%mm2 \n\t"
- "movq 1032(%1, %%esi), %%mm3 \n\t"
+ "movq (%1, %%"REG_S"), %%mm0 \n\t"
+ "movq 8(%1, %%"REG_S"), %%mm1 \n\t"
+ "movq 1024(%1, %%"REG_S"), %%mm2\n\t"
+ "movq 1032(%1, %%"REG_S"), %%mm3\n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
@@ -86,13 +89,13 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
"movq %%mm0, %%mm1 \n\t"
"punpcklwd %%mm2, %%mm0 \n\t"
"punpckhwd %%mm2, %%mm1 \n\t"
- "movq %%mm0, (%0, %%esi) \n\t"
- "movq %%mm1, 8(%0, %%esi) \n\t"
- "addl $16, %%esi \n\t"
+ "movq %%mm0, (%0, %%"REG_S") \n\t"
+ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
+ "add $16, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+512), "r" (f+256)
- :"%esi", "memory"
+ :"%"REG_S, "memory"
);
return 2*256;
}
@@ -100,23 +103,23 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
- "movl $-1024, %%esi \n\t"
+ "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"movq %%mm7, %%mm5 \n\t"
"punpckldq %%mm6, %%mm5 \n\t"
"1: \n\t"
- "movd (%1, %%esi), %%mm0 \n\t"
- "punpckldq 2048(%1, %%esi), %%mm0\n\t"
- "movd 1024(%1, %%esi), %%mm1 \n\t"
- "punpckldq 4(%1, %%esi), %%mm1 \n\t"
- "movd 2052(%1, %%esi), %%mm2 \n\t"
+ "movd (%1, %%"REG_S"), %%mm0 \n\t"
+ "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t"
+ "movd 1024(%1, %%"REG_S"), %%mm1\n\t"
+ "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t"
+ "movd 2052(%1, %%"REG_S"), %%mm2\n\t"
"movq %%mm7, %%mm3 \n\t"
- "punpckldq 1028(%1, %%esi), %%mm3\n\t"
- "movd 8(%1, %%esi), %%mm4 \n\t"
- "punpckldq 2056(%1, %%esi), %%mm4\n\t"
- "leal (%%esi, %%esi, 4), %%edi \n\t"
- "sarl $1, %%edi \n\t"
+ "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t"
+ "movd 8(%1, %%"REG_S"), %%mm4 \n\t"
+ "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t"
+ "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
+ "sar $1, %%"REG_D" \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm5, %%mm2 \n\t"
@@ -125,29 +128,28 @@ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
"packssdw %%mm6, %%mm0 \n\t"
"packssdw %%mm2, %%mm1 \n\t"
"packssdw %%mm4, %%mm3 \n\t"
- "movq %%mm0, (%0, %%edi) \n\t"
- "movq %%mm1, 8(%0, %%edi) \n\t"
- "movq %%mm3, 16(%0, %%edi) \n\t"
-
- "movd 1032(%1, %%esi), %%mm1 \n\t"
- "punpckldq 12(%1, %%esi), %%mm1\n\t"
- "movd 2060(%1, %%esi), %%mm2 \n\t"
+ "movq %%mm0, (%0, %%"REG_D") \n\t"
+ "movq %%mm1, 8(%0, %%"REG_D") \n\t"
+ "movq %%mm3, 16(%0, %%"REG_D") \n\t"
+ "movd 1032(%1, %%"REG_S"), %%mm1\n\t"
+ "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t"
+ "movd 2060(%1, %%"REG_S"), %%mm2\n\t"
"movq %%mm7, %%mm3 \n\t"
- "punpckldq 1036(%1, %%esi), %%mm3\n\t"
+ "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t"
"pxor %%mm0, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm5, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
- "movq %%mm0, 24(%0, %%edi) \n\t"
- "movq %%mm2, 32(%0, %%edi) \n\t"
+ "movq %%mm0, 24(%0, %%"REG_D") \n\t"
+ "movq %%mm2, 32(%0, %%"REG_D") \n\t"
- "addl $16, %%esi \n\t"
+ "add $16, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1280), "r" (f+256)
- :"%esi", "%edi", "memory"
+ :"%"REG_S, "%"REG_D, "memory"
);
return 5*256;
}
@@ -155,23 +157,23 @@ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
- "movl $-1024, %%esi \n\t"
+ "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"1: \n\t"
- "movq (%1, %%esi), %%mm0 \n\t"
- "movq 8(%1, %%esi), %%mm1 \n\t"
- "movq 1024(%1, %%esi), %%mm2 \n\t"
- "movq 1032(%1, %%esi), %%mm3 \n\t"
+ "movq (%1, %%"REG_S"), %%mm0 \n\t"
+ "movq 8(%1, %%"REG_S"), %%mm1 \n\t"
+ "movq 1024(%1, %%"REG_S"), %%mm2\n\t"
+ "movq 1032(%1, %%"REG_S"), %%mm3\n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
- "movq 2048(%1, %%esi), %%mm3 \n\t"
- "movq 2056(%1, %%esi), %%mm4 \n\t"
- "movq 3072(%1, %%esi), %%mm5 \n\t"
- "movq 3080(%1, %%esi), %%mm6 \n\t"
+ "movq 2048(%1, %%"REG_S"), %%mm3\n\t"
+ "movq 2056(%1, %%"REG_S"), %%mm4\n\t"
+ "movq 3072(%1, %%"REG_S"), %%mm5\n\t"
+ "movq 3080(%1, %%"REG_S"), %%mm6\n\t"
"psubd %%mm7, %%mm3 \n\t"
"psubd %%mm7, %%mm4 \n\t"
"psubd %%mm7, %%mm5 \n\t"
@@ -190,15 +192,15 @@ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
"punpckhdq %%mm3, %%mm2 \n\t"
"punpckldq %%mm4, %%mm1 \n\t"
"punpckhdq %%mm4, %%mm5 \n\t"
- "movq %%mm0, (%0, %%esi,2) \n\t"
- "movq %%mm2, 8(%0, %%esi,2) \n\t"
- "movq %%mm1, 16(%0, %%esi,2) \n\t"
- "movq %%mm5, 24(%0, %%esi,2) \n\t"
- "addl $16, %%esi \n\t"
+ "movq %%mm0, (%0, %%"REG_S",2) \n\t"
+ "movq %%mm2, 8(%0, %%"REG_S",2) \n\t"
+ "movq %%mm1, 16(%0, %%"REG_S",2)\n\t"
+ "movq %%mm5, 24(%0, %%"REG_S",2)\n\t"
+ "add $16, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1024), "r" (f+256)
- :"%esi", "memory"
+ :"%"REG_S, "memory"
);
return 4*256;
}
@@ -206,23 +208,23 @@ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
- "movl $-1024, %%esi \n\t"
+ "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"1: \n\t"
- "movd (%1, %%esi), %%mm0 \n\t"
- "punpckldq 2048(%1, %%esi), %%mm0\n\t"
- "movd 3072(%1, %%esi), %%mm1 \n\t"
- "punpckldq 4096(%1, %%esi), %%mm1\n\t"
- "movd 1024(%1, %%esi), %%mm2 \n\t"
- "punpckldq 4(%1, %%esi), %%mm2 \n\t"
- "movd 2052(%1, %%esi), %%mm3 \n\t"
- "punpckldq 3076(%1, %%esi), %%mm3\n\t"
- "movd 4100(%1, %%esi), %%mm4 \n\t"
- "punpckldq 1028(%1, %%esi), %%mm4\n\t"
- "movd 8(%1, %%esi), %%mm5 \n\t"
- "punpckldq 2056(%1, %%esi), %%mm5\n\t"
- "leal (%%esi, %%esi, 4), %%edi \n\t"
- "sarl $1, %%edi \n\t"
+ "movd (%1, %%"REG_S"), %%mm0 \n\t"
+ "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t"
+ "movd 3072(%1, %%"REG_S"), %%mm1\n\t"
+ "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t"
+ "movd 1024(%1, %%"REG_S"), %%mm2\n\t"
+ "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t"
+ "movd 2052(%1, %%"REG_S"), %%mm3\n\t"
+ "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t"
+ "movd 4100(%1, %%"REG_S"), %%mm4\n\t"
+ "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t"
+ "movd 8(%1, %%"REG_S"), %%mm5 \n\t"
+ "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t"
+ "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
+ "sar $1, %%"REG_D" \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
@@ -232,32 +234,32 @@ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"packssdw %%mm5, %%mm4 \n\t"
- "movq %%mm0, (%0, %%edi) \n\t"
- "movq %%mm2, 8(%0, %%edi) \n\t"
- "movq %%mm4, 16(%0, %%edi) \n\t"
+ "movq %%mm0, (%0, %%"REG_D") \n\t"
+ "movq %%mm2, 8(%0, %%"REG_D") \n\t"
+ "movq %%mm4, 16(%0, %%"REG_D") \n\t"
- "movd 3080(%1, %%esi), %%mm0 \n\t"
- "punpckldq 4104(%1, %%esi), %%mm0\n\t"
- "movd 1032(%1, %%esi), %%mm1 \n\t"
- "punpckldq 12(%1, %%esi), %%mm1\n\t"
- "movd 2060(%1, %%esi), %%mm2 \n\t"
- "punpckldq 3084(%1, %%esi), %%mm2\n\t"
- "movd 4108(%1, %%esi), %%mm3 \n\t"
- "punpckldq 1036(%1, %%esi), %%mm3\n\t"
+ "movd 3080(%1, %%"REG_S"), %%mm0\n\t"
+ "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t"
+ "movd 1032(%1, %%"REG_S"), %%mm1\n\t"
+ "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t"
+ "movd 2060(%1, %%"REG_S"), %%mm2\n\t"
+ "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t"
+ "movd 4108(%1, %%"REG_S"), %%mm3\n\t"
+ "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
- "movq %%mm0, 24(%0, %%edi) \n\t"
- "movq %%mm2, 32(%0, %%edi) \n\t"
+ "movq %%mm0, 24(%0, %%"REG_D") \n\t"
+ "movq %%mm2, 32(%0, %%"REG_D") \n\t"
- "addl $16, %%esi \n\t"
+ "add $16, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1280), "r" (f+256)
- :"%esi", "%edi", "memory"
+ :"%"REG_S, "%"REG_D, "memory"
);
return 5*256;
}
@@ -265,14 +267,14 @@ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
- "movl $-1024, %%esi \n\t"
+ "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
- "movq 1024(%1, %%esi), %%mm0 \n\t"
- "movq 1032(%1, %%esi), %%mm1 \n\t"
- "movq (%1, %%esi), %%mm2 \n\t"
- "movq 8(%1, %%esi), %%mm3 \n\t"
+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
+ "movq 1032(%1, %%"REG_S"), %%mm1\n\t"
+ "movq (%1, %%"REG_S"), %%mm2 \n\t"
+ "movq 8(%1, %%"REG_S"), %%mm3 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
@@ -282,22 +284,22 @@ static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
"movq %%mm0, %%mm1 \n\t"
"punpcklwd %%mm2, %%mm0 \n\t"
"punpckhwd %%mm2, %%mm1 \n\t"
- "leal (%%esi, %%esi, 2), %%edi \n\t"
- "movq %%mm6, (%0, %%edi) \n\t"
- "movd %%mm0, 8(%0, %%edi) \n\t"
+ "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
+ "movq %%mm6, (%0, %%"REG_D") \n\t"
+ "movd %%mm0, 8(%0, %%"REG_D") \n\t"
"punpckhdq %%mm0, %%mm0 \n\t"
- "movq %%mm6, 12(%0, %%edi) \n\t"
- "movd %%mm0, 20(%0, %%edi) \n\t"
- "movq %%mm6, 24(%0, %%edi) \n\t"
- "movd %%mm1, 32(%0, %%edi) \n\t"
+ "movq %%mm6, 12(%0, %%"REG_D") \n\t"
+ "movd %%mm0, 20(%0, %%"REG_D") \n\t"
+ "movq %%mm6, 24(%0, %%"REG_D") \n\t"
+ "movd %%mm1, 32(%0, %%"REG_D") \n\t"
"punpckhdq %%mm1, %%mm1 \n\t"
- "movq %%mm6, 36(%0, %%edi) \n\t"
- "movd %%mm1, 44(%0, %%edi) \n\t"
- "addl $16, %%esi \n\t"
+ "movq %%mm6, 36(%0, %%"REG_D") \n\t"
+ "movd %%mm1, 44(%0, %%"REG_D") \n\t"
+ "add $16, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1536), "r" (f+256)
- :"%esi", "%edi", "memory"
+ :"%"REG_S, "%"REG_D, "memory"
);
return 6*256;
}
@@ -305,17 +307,17 @@ static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
- "movl $-1024, %%esi \n\t"
+ "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
- "movq 1024(%1, %%esi), %%mm0 \n\t"
- "movq 2048(%1, %%esi), %%mm1 \n\t"
- "movq (%1, %%esi), %%mm5 \n\t"
+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
+ "movq 2048(%1, %%"REG_S"), %%mm1\n\t"
+ "movq (%1, %%"REG_S"), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm5 \n\t"
- "leal (%%esi, %%esi, 2), %%edi \n\t"
+ "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
"pxor %%mm4, %%mm4 \n\t"
"packssdw %%mm5, %%mm0 \n\t" // FfAa
@@ -327,15 +329,15 @@ static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
"punpckldq %%mm6, %%mm0 \n\t" // 00ba
"punpckhdq %%mm1, %%mm3 \n\t" // BAf0
- "movq %%mm0, (%0, %%edi) \n\t" // 00ba
+ "movq %%mm0, (%0, %%"REG_D") \n\t" // 00ba
"punpckhdq %%mm4, %%mm0 \n\t" // F000
- "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0
- "movq %%mm0, 16(%0, %%edi) \n\t" // F000
- "addl $8, %%esi \n\t"
+ "movq %%mm3, 8(%0, %%"REG_D") \n\t" // BAf0
+ "movq %%mm0, 16(%0, %%"REG_D") \n\t" // F000
+ "add $8, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1536), "r" (f+256)
- :"%esi", "%edi", "memory"
+ :"%"REG_S, "%"REG_D, "memory"
);
return 6*256;
}
@@ -343,19 +345,19 @@ static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
- "movl $-1024, %%esi \n\t"
+ "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
- "movq 1024(%1, %%esi), %%mm0 \n\t"
- "movq 3072(%1, %%esi), %%mm1 \n\t"
- "movq 2048(%1, %%esi), %%mm4 \n\t"
- "movq (%1, %%esi), %%mm5 \n\t"
+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
+ "movq 3072(%1, %%"REG_S"), %%mm1\n\t"
+ "movq 2048(%1, %%"REG_S"), %%mm4\n\t"
+ "movq (%1, %%"REG_S"), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm4 \n\t"
"psubd %%mm7, %%mm5 \n\t"
- "leal (%%esi, %%esi, 2), %%edi \n\t"
+ "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
"packssdw %%mm4, %%mm0 \n\t" // EeAa
"packssdw %%mm5, %%mm1 \n\t" // FfBb
@@ -366,16 +368,16 @@ static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
"punpckldq %%mm6, %%mm0 \n\t" // 00ba
"punpckhdq %%mm1, %%mm1 \n\t" // BABA
- "movq %%mm0, (%0, %%edi) \n\t"
+ "movq %%mm0, (%0, %%"REG_D") \n\t"
"punpckhdq %%mm2, %%mm0 \n\t" // FE00
"punpckldq %%mm1, %%mm2 \n\t" // BAfe
- "movq %%mm2, 8(%0, %%edi) \n\t"
- "movq %%mm0, 16(%0, %%edi) \n\t"
- "addl $8, %%esi \n\t"
+ "movq %%mm2, 8(%0, %%"REG_D") \n\t"
+ "movq %%mm0, 16(%0, %%"REG_D") \n\t"
+ "add $8, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1536), "r" (f+256)
- :"%esi", "%edi", "memory"
+ :"%"REG_S, "%"REG_D, "memory"
);
return 6*256;
}
@@ -383,21 +385,21 @@ static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
- "movl $-1024, %%esi \n\t"
+ "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
// "pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
- "movq 1024(%1, %%esi), %%mm0 \n\t"
- "movq 2048(%1, %%esi), %%mm1 \n\t"
- "movq 3072(%1, %%esi), %%mm2 \n\t"
- "movq 4096(%1, %%esi), %%mm3 \n\t"
- "movq (%1, %%esi), %%mm5 \n\t"
+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
+ "movq 2048(%1, %%"REG_S"), %%mm1\n\t"
+ "movq 3072(%1, %%"REG_S"), %%mm2\n\t"
+ "movq 4096(%1, %%"REG_S"), %%mm3\n\t"
+ "movq (%1, %%"REG_S"), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"psubd %%mm7, %%mm5 \n\t"
- "leal (%%esi, %%esi, 2), %%edi \n\t"
+ "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
"packssdw %%mm2, %%mm0 \n\t" // CcAa
"packssdw %%mm3, %%mm1 \n\t" // DdBb
@@ -414,14 +416,14 @@ static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
"punpckldq %%mm1, %%mm4 \n\t" // BAf0
"punpckhdq %%mm3, %%mm2 \n\t" // F0DC
- "movq %%mm0, (%0, %%edi) \n\t"
- "movq %%mm4, 8(%0, %%edi) \n\t"
- "movq %%mm2, 16(%0, %%edi) \n\t"
- "addl $8, %%esi \n\t"
+ "movq %%mm0, (%0, %%"REG_D") \n\t"
+ "movq %%mm4, 8(%0, %%"REG_D") \n\t"
+ "movq %%mm2, 16(%0, %%"REG_D") \n\t"
+ "add $8, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1536), "r" (f+256)
- :"%esi", "%edi", "memory"
+ :"%"REG_S, "%"REG_D, "memory"
);
return 6*256;
}
@@ -429,23 +431,23 @@ static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
- "movl $-1024, %%esi \n\t"
+ "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
// "pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
- "movq 1024(%1, %%esi), %%mm0 \n\t"
- "movq 3072(%1, %%esi), %%mm1 \n\t"
- "movq 4096(%1, %%esi), %%mm2 \n\t"
- "movq 5120(%1, %%esi), %%mm3 \n\t"
- "movq 2048(%1, %%esi), %%mm4 \n\t"
- "movq (%1, %%esi), %%mm5 \n\t"
+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
+ "movq 3072(%1, %%"REG_S"), %%mm1\n\t"
+ "movq 4096(%1, %%"REG_S"), %%mm2\n\t"
+ "movq 5120(%1, %%"REG_S"), %%mm3\n\t"
+ "movq 2048(%1, %%"REG_S"), %%mm4\n\t"
+ "movq (%1, %%"REG_S"), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"psubd %%mm7, %%mm4 \n\t"
"psubd %%mm7, %%mm5 \n\t"
- "leal (%%esi, %%esi, 2), %%edi \n\t"
+ "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
"packssdw %%mm2, %%mm0 \n\t" // CcAa
"packssdw %%mm3, %%mm1 \n\t" // DdBb
@@ -462,14 +464,14 @@ static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
"punpckldq %%mm1, %%mm4 \n\t" // BAfe
"punpckhdq %%mm3, %%mm2 \n\t" // FEDC
- "movq %%mm0, (%0, %%edi) \n\t"
- "movq %%mm4, 8(%0, %%edi) \n\t"
- "movq %%mm2, 16(%0, %%edi) \n\t"
- "addl $8, %%esi \n\t"
+ "movq %%mm0, (%0, %%"REG_D") \n\t"
+ "movq %%mm4, 8(%0, %%"REG_D") \n\t"
+ "movq %%mm2, 16(%0, %%"REG_D") \n\t"
+ "add $8, %%"REG_S" \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1536), "r" (f+256)
- :"%esi", "%edi", "memory"
+ :"%"REG_S, "%"REG_D, "memory"
);
return 6*256;
}