summaryrefslogtreecommitdiffstats
path: root/liba52/liba52_changes.diff
diff options
context:
space:
mode:
Diffstat (limited to 'liba52/liba52_changes.diff')
-rw-r--r--liba52/liba52_changes.diff2028
1 files changed, 760 insertions, 1268 deletions
diff --git a/liba52/liba52_changes.diff b/liba52/liba52_changes.diff
index ceb1de2576..09eefbd617 100644
--- a/liba52/liba52_changes.diff
+++ b/liba52/liba52_changes.diff
@@ -1,71 +1,81 @@
---- include/a52.h 2005-03-22 19:58:53.000000000 +0100
-+++ a52.h 2004-03-19 01:15:49.000000000 +0100
-@@ -19,6 +25,9 @@
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
+--- liba52-0.7.4/a52.h 2006-06-12 15:04:57.000000000 +0200
++++ liba52/a52.h 2006-06-05 02:23:02.000000000 +0200
+@@ -59,4 +63,9 @@
+ int a52_block (a52_state_t * state);
+ void a52_free (a52_state_t * state);
-+#ifndef A52_H
-+#define A52_H
-+
- #ifndef LIBA52_DOUBLE
- typedef float sample_t;
- #else
-@@ -113,3 +122,10 @@
- void a52_dynrng (a52_state_t * state,
- sample_t (* call) (sample_t, void *), void * data);
- int a52_block (a52_state_t * state, sample_t * samples);
-+
+void* a52_resample_init(uint32_t mm_accel,int flags,int chans);
+extern int (* a52_resample) (float * _f, int16_t * s16);
+
+uint16_t crc16_block(uint8_t *data,uint32_t num_bytes);
+
-+#endif /* A52_H */
---- liba52/a52_internal.h 2005-03-22 19:59:35.000000000 +0100
-+++ a52_internal.h 2004-03-19 01:15:49.000000000 +0100
-@@ -41,11 +43,12 @@
+ #endif /* A52_H */
+--- liba52-0.7.4/a52_internal.h 2006-06-12 15:05:07.000000000 +0200
++++ liba52/a52_internal.h 2006-06-05 02:23:02.000000000 +0200
+@@ -103,18 +107,34 @@
+ #define DELTA_BIT_NONE (2)
+ #define DELTA_BIT_RESERVED (3)
+
++#ifdef ARCH_X86_64
++# define REG_a "rax"
++# define REG_d "rdx"
++# define REG_S "rsi"
++# define REG_D "rdi"
++# define REG_BP "rbp"
++#else
++# define REG_a "eax"
++# define REG_d "edx"
++# define REG_S "esi"
++# define REG_D "edi"
++# define REG_BP "ebp"
++#endif
++
+ void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
+ int start, int end, int fastleak, int slowleak,
+ expbap_t * expbap);
- int downmix_init (int input, int flags, sample_t * level,
+ int a52_downmix_init (int input, int flags, sample_t * level,
sample_t clev, sample_t slev);
+void downmix_accel_init(uint32_t mm_accel);
- int downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
+ int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
sample_t clev, sample_t slev);
--void downmix (sample_t * samples, int acmod, int output, sample_t bias,
-+extern void (*downmix) (sample_t * samples, int acmod, int output, sample_t bias,
+-void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
++extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias,
sample_t clev, sample_t slev);
--void upmix (sample_t * samples, int acmod, int output);
-+extern void (*upmix) (sample_t * samples, int acmod, int output);
-
- void imdct_init (uint32_t mm_accel);
- extern void (* imdct_256) (sample_t * data, sample_t * delay, sample_t bias);
---- liba52/bitstream.c 2005-03-22 19:59:35.000000000 +0100
-+++ bitstream.c 2004-03-19 01:15:49.000000000 +0100
-@@ -29,7 +35,12 @@
+-void a52_upmix (sample_t * samples, int acmod, int output);
++extern void (*a52_upmix) (sample_t * samples, int acmod, int output);
+
+ void a52_imdct_init (uint32_t mm_accel);
+ void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias);
+-void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias);
++extern void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias);
++void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias);
+--- liba52-0.7.4/bitstream.c 2006-06-12 15:05:07.000000000 +0200
++++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200
+@@ -31,6 +35,10 @@
#define BUFFER_SIZE 4096
+#ifdef ALT_BITSTREAM_READER
+int indx=0;
-+uint32_t * buffer_start;
-+#else
- static uint32_t * buffer_start;
+#endif
-
- uint32_t bits_left;
- uint32_t current_word;
-@@ -41,6 +52,9 @@
- align = (int)buf & 3;
- buffer_start = (uint32_t *) (buf - align);
- bits_left = 0;
++
+ void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf)
+ {
+ int align;
+@@ -38,6 +46,9 @@
+ align = (long)buf & 3;
+ state->buffer_start = (uint32_t *) (buf - align);
+ state->bits_left = 0;
+#ifdef ALT_BITSTREAM_READER
+ indx=0;
+#endif
- bitstream_get (align * 8);
+ bitstream_get (state, align * 8);
}
---- liba52/bitstream.h 2005-03-22 19:59:35.000000000 +0100
-+++ bitstream.h 2004-03-19 01:15:49.000000000 +0100
-@@ -19,6 +25,48 @@
+--- liba52-0.7.4/bitstream.h 2006-06-12 15:05:07.000000000 +0200
++++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200
+@@ -21,6 +25,48 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
@@ -114,16 +124,16 @@
/* (stolen from the kernel) */
#ifdef WORDS_BIGENDIAN
-@@ -29,7 +77,7 @@
- # if defined (__i386__)
+@@ -28,7 +74,7 @@
+
+ #else
+
+-# if 0 && defined (__i386__)
++# if defined (__i386__)
# define swab32(x) __i386_swab32(x)
-- static inline const uint32_t __i386_swab32(uint32_t x)
-+ static always_inline const uint32_t __i386_swab32(uint32_t x)
- {
- __asm__("bswap %0" : "=r" (x) : "0" (x));
- return x;
-@@ -37,25 +85,42 @@
+ static inline const uint32_t __i386_swab32(uint32_t x)
+@@ -39,19 +85,34 @@
# else
@@ -141,24 +151,17 @@
#endif
+#ifdef ALT_BITSTREAM_READER
-+extern uint32_t *buffer_start;
+extern int indx;
-+#else
- extern uint32_t bits_left;
- extern uint32_t current_word;
+#endif
-
- void bitstream_set_ptr (uint8_t * buf);
- uint32_t bitstream_get_bh(uint32_t num_bits);
- int32_t bitstream_get_bh_2(uint32_t num_bits);
-
+
- static inline uint32_t
--bitstream_get(uint32_t num_bits)
-+bitstream_get(uint32_t num_bits) // note num_bits is practically a constant due to inlineing
+ void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf);
+ uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits);
+ int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits);
+
+ static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits)
{
+#ifdef ALT_BITSTREAM_READER
-+ uint32_t result= swab32( unaligned32(((uint8_t *)buffer_start)+(indx>>3)) );
++ uint32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) );
+
+ result<<= (indx&0x07);
+ result>>= 32 - num_bits;
@@ -167,32 +170,28 @@
+ return result;
+#else
uint32_t result;
--
-+
- if(num_bits < bits_left) {
- result = (current_word << (32 - bits_left)) >> (32 - num_bits);
- bits_left -= num_bits;
-@@ -63,11 +128,30 @@
+
+ if (num_bits < state->bits_left) {
+@@ -61,10 +122,29 @@
}
- return bitstream_get_bh(num_bits);
+ return a52_bitstream_get_bh (state, num_bits);
+#endif
+}
+
-+static inline void bitstream_skip(int num_bits)
++static inline void bitstream_skip(a52_state_t * state, int num_bits)
+{
+#ifdef ALT_BITSTREAM_READER
+ indx+= num_bits;
+#else
-+ bitstream_get(num_bits);
++ bitstream_get(state, num_bits);
+#endif
}
- static inline int32_t
- bitstream_get_2(uint32_t num_bits)
+ static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits)
{
+#ifdef ALT_BITSTREAM_READER
-+ int32_t result= swab32( unaligned32(((uint8_t *)buffer_start)+(indx>>3)) );
++ int32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) );
+
+ result<<= (indx&0x07);
+ result>>= 32 - num_bits;
@@ -202,16 +201,16 @@
+#else
int32_t result;
- if(num_bits < bits_left) {
-@@ -77,4 +161,5 @@
+ if (num_bits < state->bits_left) {
+@@ -74,4 +154,5 @@
}
- return bitstream_get_bh_2(num_bits);
+ return a52_bitstream_get_bh_2 (state, num_bits);
+#endif
}
---- liba52/downmix.c 2005-03-22 19:59:35.000000000 +0100
-+++ downmix.c 2004-04-12 18:42:14.000000000 +0200
-@@ -17,18 +23,46 @@
+--- liba52-0.7.4/downmix.c 2006-06-12 15:17:53.000000000 +0200
++++ liba52/downmix.c 2006-06-05 02:23:02.000000000 +0200
+@@ -23,18 +23,47 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@@ -220,10 +219,10 @@
*/
#include "config.h"
++#include "asmalign.h"
--#include <inttypes.h>
#include <string.h>
-+#include <inttypes.h>
+ #include <inttypes.h>
#include "a52.h"
#include "a52_internal.h"
@@ -232,9 +231,9 @@
#define CONVERT(acmod,output) (((output) << 3) + (acmod))
+
-+void (*downmix)(sample_t * samples, int acmod, int output, sample_t bias,
++void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias,
+ sample_t clev, sample_t slev)= NULL;
-+void (*upmix)(sample_t * samples, int acmod, int output)= NULL;
++void (*a52_upmix)(sample_t * samples, int acmod, int output)= NULL;
+
+static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
+ sample_t clev, sample_t slev);
@@ -247,50 +246,28 @@
+
+void downmix_accel_init(uint32_t mm_accel)
+{
-+ upmix= upmix_C;
-+ downmix= downmix_C;
-+#ifdef ARCH_X86
-+ if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX;
-+ if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE;
-+ if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow;
++ a52_upmix= upmix_C;
++ a52_downmix= downmix_C;
++#if defined(ARCH_X86) || defined(ARCH_X86_64)
++ if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX;
++ if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE;
++ if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow;
+#endif
+}
+
- int downmix_init (int input, int flags, sample_t * level,
+ int a52_downmix_init (int input, int flags, sample_t * level,
sample_t clev, sample_t slev)
{
-@@ -61,7 +95,7 @@
- output = flags & A52_CHANNEL_MASK;
- if (output > A52_DOLBY)
- return -1;
--
-+
- output = table[output][input & 7];
-
- if ((output == A52_STEREO) &&
-@@ -145,7 +179,6 @@
- *level *= 1 / (1 + 3 * LEVEL_3DB);
- break;
- }
--
- return output;
- }
-
-@@ -440,12 +473,11 @@
- static void zero (sample_t * samples)
- {
- int i;
--
- for (i = 0; i < 256; i++)
+@@ -451,7 +480,7 @@
samples[i] = 0;
}
--void downmix (sample_t * samples, int acmod, int output, sample_t bias,
-+static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
+-void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
++void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
sample_t clev, sample_t slev)
{
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-@@ -557,7 +589,7 @@
+@@ -563,7 +592,7 @@
break;
case CONVERT (A52_3F2R, A52_2F1R):
@@ -299,7 +276,7 @@
move2to1 (samples + 768, samples + 512, bias);
break;
-@@ -581,12 +613,12 @@
+@@ -587,12 +616,12 @@
break;
case CONVERT (A52_3F1R, A52_3F2R):
@@ -309,37 +286,37 @@
}
}
--void upmix (sample_t * samples, int acmod, int output)
-+static void upmix_C (sample_t * samples, int acmod, int output)
+-void a52_upmix (sample_t * samples, int acmod, int output)
++void upmix_C (sample_t * samples, int acmod, int output)
{
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-@@ -651,3 +683,1137 @@
+@@ -657,3 +686,1137 @@
goto mix_31to21;
}
}
+
-+#ifdef ARCH_X86
++#if defined(ARCH_X86) || defined(ARCH_X86_64)
+static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
+{
+ asm volatile(
+ "movlps %2, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps (%0, %%esi), %%xmm0 \n\t"
-+ "movaps 16(%0, %%esi), %%xmm1 \n\t"
-+ "addps (%1, %%esi), %%xmm0 \n\t"
-+ "addps 16(%1, %%esi), %%xmm1 \n\t"
++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
++ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
++ "addps (%1, %%"REG_S"), %%xmm0 \n\t"
++ "addps 16(%1, %%"REG_S"), %%xmm1\n\t"
+ "addps %%xmm7, %%xmm0 \n\t"
+ "addps %%xmm7, %%xmm1 \n\t"
-+ "movaps %%xmm0, (%1, %%esi) \n\t"
-+ "movaps %%xmm1, 16(%1, %%esi) \n\t"
-+ "addl $32, %%esi \n\t"
++ "movaps %%xmm0, (%1, %%"REG_S") \n\t"
++ "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
++ "add $32, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (src+256), "r" (dest+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -348,19 +325,19 @@
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps (%0, %%esi), %%xmm0 \n\t"
-+ "movaps 1024(%0, %%esi), %%xmm1 \n\t"
-+ "addps 2048(%0, %%esi), %%xmm0 \n\t"
++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
+ "addps %%xmm7, %%xmm1 \n\t"
+ "addps %%xmm1, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -369,20 +346,20 @@
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps (%0, %%esi), %%xmm0 \n\t"
-+ "movaps 1024(%0, %%esi), %%xmm1 \n\t"
-+ "addps 2048(%0, %%esi), %%xmm0 \n\t"
-+ "addps 3072(%0, %%esi), %%xmm1 \n\t"
++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
++ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps %%xmm7, %%xmm0 \n\t"
+ "addps %%xmm1, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -391,21 +368,21 @@
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps (%0, %%esi), %%xmm0 \n\t"
-+ "movaps 1024(%0, %%esi), %%xmm1 \n\t"
-+ "addps 2048(%0, %%esi), %%xmm0 \n\t"
-+ "addps 3072(%0, %%esi), %%xmm1 \n\t"
++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
++ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps %%xmm7, %%xmm0 \n\t"
-+ "addps 4096(%0, %%esi), %%xmm1 \n\t"
++ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps %%xmm1, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -414,21 +391,21 @@
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps 1024(%0, %%esi), %%xmm0 \n\t"
++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
+ "addps %%xmm7, %%xmm0 \n\t" //common
-+ "movaps (%0, %%esi), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%esi), %%xmm2 \n\t"
++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
++ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
+ "addps %%xmm0, %%xmm1 \n\t"
+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%esi) \n\t"
-+ "movaps %%xmm2, 1024(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
++ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -437,21 +414,21 @@
+ asm volatile(
+ "movlps %2, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps 1024(%1, %%esi), %%xmm0 \n\t"
++ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t"
+ "addps %%xmm7, %%xmm0 \n\t" //common
-+ "movaps (%0, %%esi), %%xmm1 \n\t"
-+ "movaps (%1, %%esi), %%xmm2 \n\t"
++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
++ "movaps (%1, %%"REG_S"), %%xmm2 \n\t"
+ "addps %%xmm0, %%xmm1 \n\t"
+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%esi) \n\t"
-+ "movaps %%xmm2, (%1, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
++ "movaps %%xmm2, (%1, %%"REG_S") \n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (left+256), "r" (right+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -460,22 +437,22 @@
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps 2048(%0, %%esi), %%xmm0 \n\t" // surround
-+ "movaps (%0, %%esi), %%xmm1 \n\t"
-+ "movaps 1024(%0, %%esi), %%xmm2 \n\t"
++ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround
++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
++ "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
+ "addps %%xmm7, %%xmm1 \n\t"
+ "addps %%xmm7, %%xmm2 \n\t"
+ "subps %%xmm0, %%xmm1 \n\t"
+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%esi) \n\t"
-+ "movaps %%xmm2, 1024(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
++ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -484,22 +461,22 @@
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps 1024(%0, %%esi), %%xmm0 \n\t"
-+ "addps 3072(%0, %%esi), %%xmm0 \n\t"
++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "movaps (%0, %%esi), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%esi), %%xmm2 \n\t"
++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
++ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
+ "addps %%xmm0, %%xmm1 \n\t"
+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%esi) \n\t"
-+ "movaps %%xmm2, 1024(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
++ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -508,24 +485,24 @@
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps 1024(%0, %%esi), %%xmm0 \n\t"
-+ "movaps 3072(%0, %%esi), %%xmm3 \n\t" // surround
++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
++ "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround
+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "movaps (%0, %%esi), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%esi), %%xmm2 \n\t"
++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
++ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
+ "addps %%xmm0, %%xmm1 \n\t"
+ "addps %%xmm0, %%xmm2 \n\t"
+ "subps %%xmm3, %%xmm1 \n\t"
+ "addps %%xmm3, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%esi) \n\t"
-+ "movaps %%xmm2, 1024(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
++ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -534,23 +511,23 @@
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps 2048(%0, %%esi), %%xmm0 \n\t"
-+ "addps 3072(%0, %%esi), %%xmm0 \n\t" // surround
-+ "movaps (%0, %%esi), %%xmm1 \n\t"
-+ "movaps 1024(%0, %%esi), %%xmm2 \n\t"
++ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t"
++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround
++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
++ "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
+ "addps %%xmm7, %%xmm1 \n\t"
+ "addps %%xmm7, %%xmm2 \n\t"
+ "subps %%xmm0, %%xmm1 \n\t"
+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%esi) \n\t"
-+ "movaps %%xmm2, 1024(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
++ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -559,22 +536,22 @@
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps 1024(%0, %%esi), %%xmm0 \n\t"
++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
+ "addps %%xmm7, %%xmm0 \n\t" // common
+ "movaps %%xmm0, %%xmm1 \n\t" // common
-+ "addps (%0, %%esi), %%xmm0 \n\t"
-+ "addps 2048(%0, %%esi), %%xmm1 \n\t"
-+ "addps 3072(%0, %%esi), %%xmm0 \n\t"
-+ "addps 4096(%0, %%esi), %%xmm1 \n\t"
-+ "movaps %%xmm0, (%0, %%esi) \n\t"
-+ "movaps %%xmm1, 1024(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "addps (%0, %%"REG_S"), %%xmm0 \n\t"
++ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t"
++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
++ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
++ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
++ "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -583,25 +560,25 @@
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps 1024(%0, %%esi), %%xmm0 \n\t"
-+ "movaps 3072(%0, %%esi), %%xmm2 \n\t"
++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
++ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t"
+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "addps 4096(%0, %%esi), %%xmm2 \n\t" // surround
-+ "movaps (%0, %%esi), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%esi), %%xmm3 \n\t"
++ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround
++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
++ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t"
+ "subps %%xmm2, %%xmm1 \n\t"
+ "addps %%xmm2, %%xmm3 \n\t"
+ "addps %%xmm0, %%xmm1 \n\t"
+ "addps %%xmm0, %%xmm3 \n\t"
-+ "movaps %%xmm1, (%0, %%esi) \n\t"
-+ "movaps %%xmm3, 1024(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
++ "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -610,40 +587,40 @@
+ asm volatile(
+ "movlps %2, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movaps (%0, %%esi), %%xmm0 \n\t"
-+ "movaps 16(%0, %%esi), %%xmm1 \n\t"
-+ "addps 1024(%0, %%esi), %%xmm0 \n\t"
-+ "addps 1040(%0, %%esi), %%xmm1 \n\t"
++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
++ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
++ "addps 1024(%0, %%"REG_S"), %%xmm0\n\t"
++ "addps 1040(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps %%xmm7, %%xmm0 \n\t"
+ "addps %%xmm7, %%xmm1 \n\t"
-+ "movaps %%xmm0, (%1, %%esi) \n\t"
-+ "movaps %%xmm1, 16(%1, %%esi) \n\t"
-+ "addl $32, %%esi \n\t"
++ "movaps %%xmm0, (%1, %%"REG_S") \n\t"
++ "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
++ "add $32, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (src+256), "r" (dest+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
+static void zero_MMX(sample_t * samples)
+{
+ asm volatile(
-+ "movl $-1024, %%esi \n\t"
++ "mov $-1024, %%"REG_S" \n\t"
+ "pxor %%mm0, %%mm0 \n\t"
-+ ".balign 16\n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movq %%mm0, (%0, %%esi) \n\t"
-+ "movq %%mm0, 8(%0, %%esi) \n\t"
-+ "movq %%mm0, 16(%0, %%esi) \n\t"
-+ "movq %%mm0, 24(%0, %%esi) \n\t"
-+ "addl $32, %%esi \n\t"
++ "movq %%mm0, (%0, %%"REG_S") \n\t"
++ "movq %%mm0, 8(%0, %%"REG_S") \n\t"
++ "movq %%mm0, 16(%0, %%"REG_S") \n\t"
++ "movq %%mm0, 24(%0, %%"REG_S") \n\t"
++ "add $32, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ "emms"
+ :: "r" (samples+256)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -892,29 +869,29 @@
+ asm volatile(
+ "movd %2, %%mm7 \n\t"
+ "punpckldq %2, %%mm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movq (%0, %%esi), %%mm0 \n\t"
-+ "movq 8(%0, %%esi), %%mm1 \n\t"
-+ "movq 16(%0, %%esi), %%mm2 \n\t"
-+ "movq 24(%0, %%esi), %%mm3 \n\t"
-+ "pfadd (%1, %%esi), %%mm0 \n\t"
-+ "pfadd 8(%1, %%esi), %%mm1 \n\t"
-+ "pfadd 16(%1, %%esi), %%mm2 \n\t"
-+ "pfadd 24(%1, %%esi), %%mm3 \n\t"
++ "movq (%0, %%"REG_S"), %%mm0 \n\t"
++ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
++ "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
++ "movq 24(%0, %%"REG_S"), %%mm3 \n\t"
++ "pfadd (%1, %%"REG_S"), %%mm0 \n\t"
++ "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t"
++ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t"
++ "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t"
+ "pfadd %%mm7, %%mm0 \n\t"
+ "pfadd %%mm7, %%mm1 \n\t"
+ "pfadd %%mm7, %%mm2 \n\t"
+ "pfadd %%mm7, %%mm3 \n\t"
-+ "movq %%mm0, (%1, %%esi) \n\t"
-+ "movq %%mm1, 8(%1, %%esi) \n\t"
-+ "movq %%mm2, 16(%1, %%esi) \n\t"
-+ "movq %%mm3, 24(%1, %%esi) \n\t"
-+ "addl $32, %%esi \n\t"
++ "movq %%mm0, (%1, %%"REG_S") \n\t"
++ "movq %%mm1, 8(%1, %%"REG_S") \n\t"
++ "movq %%mm2, 16(%1, %%"REG_S") \n\t"
++ "movq %%mm3, 24(%1, %%"REG_S") \n\t"
++ "add $32, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (src+256), "r" (dest+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -923,25 +900,25 @@
+ asm volatile(
+ "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movq (%0, %%esi), %%mm0 \n\t"
-+ "movq 8(%0, %%esi), %%mm1 \n\t"
-+ "movq 1024(%0, %%esi), %%mm2 \n\t"
-+ "movq 1032(%0, %%esi), %%mm3 \n\t"
-+ "pfadd 2048(%0, %%esi), %%mm0 \n\t"
-+ "pfadd 2056(%0, %%esi), %%mm1 \n\t"
++ "movq (%0, %%"REG_S"), %%mm0 \n\t"
++ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
++ "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
++ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
+ "pfadd %%mm7, %%mm0 \n\t"
+ "pfadd %%mm7, %%mm1 \n\t"
+ "pfadd %%mm2, %%mm0 \n\t"
+ "pfadd %%mm3, %%mm1 \n\t"
-+ "movq %%mm0, (%0, %%esi) \n\t"
-+ "movq %%mm1, 8(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movq %%mm0, (%0, %%"REG_S") \n\t"
++ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -950,27 +927,27 @@
+ asm volatile(
+ "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movq (%0, %%esi), %%mm0 \n\t"
-+ "movq 8(%0, %%esi), %%mm1 \n\t"
-+ "movq 1024(%0, %%esi), %%mm2 \n\t"
-+ "movq 1032(%0, %%esi), %%mm3 \n\t"
-+ "pfadd 2048(%0, %%esi), %%mm0 \n\t"
-+ "pfadd 2056(%0, %%esi), %%mm1 \n\t"
-+ "pfadd 3072(%0, %%esi), %%mm2 \n\t"
-+ "pfadd 3080(%0, %%esi), %%mm3 \n\t"
++ "movq (%0, %%"REG_S"), %%mm0 \n\t"
++ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
++ "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
++ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
++ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
++ "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
+ "pfadd %%mm7, %%mm0 \n\t"
+ "pfadd %%mm7, %%mm1 \n\t"
+ "pfadd %%mm2, %%mm0 \n\t"
+ "pfadd %%mm3, %%mm1 \n\t"
-+ "movq %%mm0, (%0, %%esi) \n\t"
-+ "movq %%mm1, 8(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movq %%mm0, (%0, %%"REG_S") \n\t"
++ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -979,29 +956,29 @@
+ asm volatile(
+ "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movq (%0, %%esi), %%mm0 \n\t"
-+ "movq 8(%0, %%esi), %%mm1 \n\t"
-+ "movq 1024(%0, %%esi), %%mm2 \n\t"
-+ "movq 1032(%0, %%esi), %%mm3 \n\t"
-+ "pfadd 2048(%0, %%esi), %%mm0 \n\t"
-+ "pfadd 2056(%0, %%esi), %%mm1 \n\t"
-+ "pfadd 3072(%0, %%esi), %%mm2 \n\t"
-+ "pfadd 3080(%0, %%esi), %%mm3 \n\t"
++ "movq (%0, %%"REG_S"), %%mm0 \n\t"
++ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
++ "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
++ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
++ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
++ "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
+ "pfadd %%mm7, %%mm0 \n\t"
+ "pfadd %%mm7, %%mm1 \n\t"
-+ "pfadd 4096(%0, %%esi), %%mm2 \n\t"
-+ "pfadd 4104(%0, %%esi), %%mm3 \n\t"
++ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
++ "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
+ "pfadd %%mm2, %%mm0 \n\t"
+ "pfadd %%mm3, %%mm1 \n\t"
-+ "movq %%mm0, (%0, %%esi) \n\t"
-+ "movq %%mm1, 8(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movq %%mm0, (%0, %%"REG_S") \n\t"
++ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -1010,29 +987,29 @@
+ asm volatile(
+ "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movq 1024(%0, %%esi), %%mm0 \n\t"
-+ "movq 1032(%0, %%esi), %%mm1 \n\t"
++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
++ "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
+ "pfadd %%mm7, %%mm0 \n\t" //common
+ "pfadd %%mm7, %%mm1 \n\t" //common
-+ "movq (%0, %%esi), %%mm2 \n\t"
-+ "movq 8(%0, %%esi), %%mm3 \n\t"
-+ "movq 2048(%0, %%esi), %%mm4 \n\t"
-+ "movq 2056(%0, %%esi), %%mm5 \n\t"
++ "movq (%0, %%"REG_S"), %%mm2 \n\t"
++ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
++ "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
++ "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
+ "pfadd %%mm0, %%mm2 \n\t"
+ "pfadd %%mm1, %%mm3 \n\t"
+ "pfadd %%mm0, %%mm4 \n\t"
+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%esi) \n\t"
-+ "movq %%mm3, 8(%0, %%esi) \n\t"
-+ "movq %%mm4, 1024(%0, %%esi) \n\t"
-+ "movq %%mm5, 1032(%0, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movq %%mm2, (%0, %%"REG_S") \n\t"
++ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
++ "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
++ "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -1041,29 +1018,29 @@
+ asm volatile(
+ "movd %2, %%mm7 \n\t"
+ "punpckldq %2, %%mm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movq 1024(%1, %%esi), %%mm0 \n\t"
-+ "movq 1032(%1, %%esi), %%mm1 \n\t"
++ "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
++ "movq 1032(%1, %%"REG_S"), %%mm1\n\t"
+ "pfadd %%mm7, %%mm0 \n\t" //common
+ "pfadd %%mm7, %%mm1 \n\t" //common
-+ "movq (%0, %%esi), %%mm2 \n\t"
-+ "movq 8(%0, %%esi), %%mm3 \n\t"
-+ "movq (%1, %%esi), %%mm4 \n\t"
-+ "movq 8(%1, %%esi), %%mm5 \n\t"
++ "movq (%0, %%"REG_S"), %%mm2 \n\t"
++ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
++ "movq (%1, %%"REG_S"), %%mm4 \n\t"
++ "movq 8(%1, %%"REG_S"), %%mm5 \n\t"
+ "pfadd %%mm0, %%mm2 \n\t"
+ "pfadd %%mm1, %%mm3 \n\t"
+ "pfadd %%mm0, %%mm4 \n\t"
+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%esi) \n\t"
-+ "movq %%mm3, 8(%0, %%esi) \n\t"
-+ "movq %%mm4, (%1, %%esi) \n\t"
-+ "movq %%mm5, 8(%1, %%esi) \n\t"
-+ "addl $16, %%esi \n\t"
++ "movq %%mm2, (%0, %%"REG_S") \n\t"
++ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
++ "movq %%mm4, (%1, %%"REG_S") \n\t"
++ "movq %%mm5, 8(%1, %%"REG_S") \n\t"
++ "add $16, %%"REG_S" \n\t"
+ " jnz 1b \n\t"
+ :: "r" (left+256), "r" (right+256), "m" (bias)
-+ : "%esi"
++ : "%"REG_S
+ );
+}
+
@@ -1072,15 +1049,15 @@
+ asm volatile(
+ "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t"
-+ "movl $-1024, %%esi \n\t"
-+ ".balign 16\n\t"
++ "mov $-1024, %%"REG_S" \n\t"
++ ASMALIGN16
+ "1: \n\t"
-+ "movq 2048(%0, %%esi), %%mm0 \n\t" // surround
-+ "movq 2056(%0, %%esi), %%mm1 \n\t" // surround
-+ "movq (%0, %%esi), %%mm2 \n\t"
-+ "movq 8(%0, %%esi), %%mm3 \n\t"
-+ "movq 1024(%0, %%esi), %%mm4 \n\t"
-+ "movq 1032(%0, %%esi), %%mm5 \n\t"
++ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround
++ "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround
++ "movq (%0, %%"REG_S"), %%mm2 \n\t"
++ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
++ "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
++ "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
+ "pfadd %%mm7, %%mm2 \n\t"
+ "pfadd %%mm7, %%mm3 \n\t"
+ "pfadd %%mm7, %%mm4 \n\t"
@@ -1089,14 +1066,14 @@
+ "pfsub %%mm1, %%mm3 \n\t"
+ "pfadd %%mm0, %%mm4 \n\t"
+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%esi) \n\t"
-+ "movq %%mm3, 8(%0, %%esi) \n\t"
-+ "movq %%mm4, 1024(%0, %%esi) \n\t"
-+ "movq %%mm5, 1032(%0, %%esi) \n\t"
-+ "addl $