summaryrefslogtreecommitdiffstats
path: root/liba52/liba52_changes.diff
diff options
context:
space:
mode:
Diffstat (limited to 'liba52/liba52_changes.diff')
-rw-r--r--liba52/liba52_changes.diff2473
1 files changed, 0 insertions, 2473 deletions
diff --git a/liba52/liba52_changes.diff b/liba52/liba52_changes.diff
deleted file mode 100644
index 13e4eacbac..0000000000
--- a/liba52/liba52_changes.diff
+++ /dev/null
@@ -1,2473 +0,0 @@
---- include/a52.h 2006-06-12 15:04:57.000000000 +0200
-+++ liba52/a52.h 2006-06-05 02:23:02.000000000 +0200
-@@ -59,4 +66,9 @@
- int a52_block (a52_state_t * state);
- void a52_free (a52_state_t * state);
-
-+void* a52_resample_init(uint32_t mm_accel,int flags,int chans);
-+extern int (* a52_resample) (float * _f, int16_t * s16);
-+
-+uint16_t crc16_block(uint8_t *data,uint32_t num_bytes);
-+
- #endif /* A52_H */
---- liba52/a52_internal.h 2006-06-12 15:05:07.000000000 +0200
-+++ liba52/a52_internal.h 2006-06-05 02:23:02.000000000 +0200
-@@ -103,18 +107,34 @@
- #define DELTA_BIT_NONE (2)
- #define DELTA_BIT_RESERVED (3)
-
-+#if ARCH_X86_64
-+# define REG_a "rax"
-+# define REG_d "rdx"
-+# define REG_S "rsi"
-+# define REG_D "rdi"
-+# define REG_BP "rbp"
-+#else
-+# define REG_a "eax"
-+# define REG_d "edx"
-+# define REG_S "esi"
-+# define REG_D "edi"
-+# define REG_BP "ebp"
-+#endif
-+
- void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
- int start, int end, int fastleak, int slowleak,
- expbap_t * expbap);
-
- int a52_downmix_init (int input, int flags, sample_t * level,
- sample_t clev, sample_t slev);
-+void downmix_accel_init(uint32_t mm_accel);
- int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
- sample_t clev, sample_t slev);
--void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
-+extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev);
--void a52_upmix (sample_t * samples, int acmod, int output);
-+extern void (*a52_upmix) (sample_t * samples, int acmod, int output);
-
- void a52_imdct_init (uint32_t mm_accel);
- void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias);
--void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias);
-+extern void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias);
-+void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias);
---- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200
-+++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200
-@@ -31,6 +35,10 @@
-
- #define BUFFER_SIZE 4096
-
-+#ifdef ALT_BITSTREAM_READER
-+int indx=0;
-+#endif
-+
- void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf)
- {
- int align;
-@@ -38,6 +46,9 @@
- align = (long)buf & 3;
- state->buffer_start = (uint32_t *) (buf - align);
- state->bits_left = 0;
-+#ifdef ALT_BITSTREAM_READER
-+ indx=0;
-+#endif
- bitstream_get (state, align * 8);
- }
-
---- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200
-+++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200
-@@ -21,6 +25,42 @@
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-+/* code from ffmpeg/libavcodec */
-+#if defined(__sparc__) || defined(hpux)
-+/*
-+ * the alt bitstream reader performs unaligned memory accesses; that doesn't work
-+ * on sparc/hpux. For now, disable ALT_BITSTREAM_READER.
-+ */
-+#undef ALT_BITSTREAM_READER
-+#else
-+// alternative (faster) bitstram reader (reades upto 3 bytes over the end of the input)
-+#define ALT_BITSTREAM_READER
-+
-+/* used to avoid misaligned exceptions on some archs (alpha, ...) */
-+#if ARCH_X86 || HAVE_ARMV6
-+# define unaligned32(a) (*(uint32_t*)(a))
-+#else
-+# ifdef __GNUC__
-+static inline uint32_t unaligned32(const void *v) {
-+ struct Unaligned {
-+ uint32_t i;
-+ } __attribute__((packed));
-+
-+ return ((const struct Unaligned *) v)->i;
-+}
-+# elif defined(__DECC)
-+static inline uint32_t unaligned32(const void *v) {
-+ return *(const __unaligned uint32_t *) v;
-+}
-+# else
-+static inline uint32_t unaligned32(const void *v) {
-+ return *(const uint32_t *) v;
-+}
-+# endif
-+#endif //!ARCH_X86
-+
-+#endif
-+
- /* (stolen from the kernel) */
- #if HAVE_BIGENDIAN
-
-@@ -28,7 +68,7 @@
-
- #else
-
--# if 0 && defined (__i386__)
-+# if defined (__i386__)
-
- # define swab32(x) __i386_swab32(x)
- static inline const uint32_t __i386_swab32(uint32_t x)
-@@ -39,19 +79,34 @@
-
- # else
-
--# define swab32(x)\
--((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \
-- (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3]))
--
-+# define swab32(x) __generic_swab32(x)
-+ static inline const uint32_t __generic_swab32(uint32_t x)
-+ {
-+ return ((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) |
-+ (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3]));
-+ }
- # endif
- #endif
-
-+#ifdef ALT_BITSTREAM_READER
-+extern int indx;
-+#endif
-+
- void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf);
- uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits);
- int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits);
-
- static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits)
- {
-+#ifdef ALT_BITSTREAM_READER
-+ uint32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) );
-+
-+ result<<= (indx&0x07);
-+ result>>= 32 - num_bits;
-+ indx+= num_bits;
-+
-+ return result;
-+#else
- uint32_t result;
-
- if (num_bits < state->bits_left) {
-@@ -61,10 +116,29 @@
- }
-
- return a52_bitstream_get_bh (state, num_bits);
-+#endif
-+}
-+
-+static inline void bitstream_skip(a52_state_t * state, int num_bits)
-+{
-+#ifdef ALT_BITSTREAM_READER
-+ indx+= num_bits;
-+#else
-+ bitstream_get(state, num_bits);
-+#endif
- }
-
- static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits)
- {
-+#ifdef ALT_BITSTREAM_READER
-+ int32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) );
-+
-+ result<<= (indx&0x07);
-+ result>>= 32 - num_bits;
-+ indx+= num_bits;
-+
-+ return result;
-+#else
- int32_t result;
-
- if (num_bits < state->bits_left) {
-@@ -74,4 +148,5 @@
- }
-
- return a52_bitstream_get_bh_2 (state, num_bits);
-+#endif
- }
---- liba52/downmix.c 2006-06-12 15:17:53.000000000 +0200
-+++ liba52/downmix.c 2006-06-05 02:23:02.000000000 +0200
-@@ -19,18 +23,46 @@
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+ *
-+ * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
- */
-
- #include "config.h"
-
- #include <string.h>
- #include <inttypes.h>
-
- #include "a52.h"
- #include "a52_internal.h"
-+#include "mm_accel.h"
-
- #define CONVERT(acmod,output) (((output) << 3) + (acmod))
-
-+
-+void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev)= NULL;
-+void (*a52_upmix)(sample_t * samples, int acmod, int output)= NULL;
-+
-+static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev);
-+static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev);
-+static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev);
-+static void upmix_MMX (sample_t * samples, int acmod, int output);
-+static void upmix_C (sample_t * samples, int acmod, int output);
-+
-+void downmix_accel_init(uint32_t mm_accel)
-+{
-+ a52_upmix= upmix_C;
-+ a52_downmix= downmix_C;
-+#if ARCH_X86 || ARCH_X86_64
-+ if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX;
-+ if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE;
-+ if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow;
-+#endif
-+}
-+
- int a52_downmix_init (int input, int flags, sample_t * level,
- sample_t clev, sample_t slev)
- {
-@@ -447,7 +479,7 @@
- samples[i] = 0;
- }
-
--void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
-+void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev)
- {
- switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-@@ -559,7 +591,7 @@
- break;
-
- case CONVERT (A52_3F2R, A52_2F1R):
-- mix3to2 (samples, bias);
-+ mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
- move2to1 (samples + 768, samples + 512, bias);
- break;
-
-@@ -583,12 +615,12 @@
- break;
-
- case CONVERT (A52_3F1R, A52_3F2R):
-- memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t));
-+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
- break;
- }
- }
-
--void a52_upmix (sample_t * samples, int acmod, int output)
-+void upmix_C (sample_t * samples, int acmod, int output)
- {
- switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-
-@@ -653,3 +685,1104 @@
- goto mix_31to21;
- }
- }
-+
-+#if ARCH_X86 || ARCH_X86_64
-+static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %2, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps (%1, %%"REG_S"), %%xmm0 \n\t"
-+ "addps 16(%1, %%"REG_S"), %%xmm1\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t"
-+ "addps %%xmm7, %%xmm1 \n\t"
-+ "movaps %%xmm0, (%1, %%"REG_S") \n\t"
-+ "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
-+ "add $32, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (src+256), "r" (dest+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix3to1_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm7, %%xmm1 \n\t"
-+ "addps %%xmm1, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix4to1_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t"
-+ "addps %%xmm1, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix5to1_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t"
-+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps %%xmm1, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix3to2_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t" //common
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %2, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t" //common
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps (%1, %%"REG_S"), %%xmm2 \n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, (%1, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (left+256), "r" (right+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix21toS_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm7, %%xmm1 \n\t"
-+ "addps %%xmm7, %%xmm2 \n\t"
-+ "subps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix31to2_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix31toS_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround
-+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "subps %%xmm3, %%xmm1 \n\t"
-+ "addps %%xmm3, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix22toS_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm7, %%xmm1 \n\t"
-+ "addps %%xmm7, %%xmm2 \n\t"
-+ "subps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix32to2_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "movaps %%xmm0, %%xmm1 \n\t" // common
-+ "addps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
-+ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix32toS_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t"
-+ "subps %%xmm2, %%xmm1 \n\t"
-+ "addps %%xmm2, %%xmm3 \n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm3 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %2, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 1040(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t"
-+ "addps %%xmm7, %%xmm1 \n\t"
-+ "movaps %%xmm0, (%1, %%"REG_S") \n\t"
-+ "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
-+ "add $32, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (src+256), "r" (dest+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void zero_MMX(sample_t * samples)
-+{
-+ __asm__ volatile(
-+ "mov $-1024, %%"REG_S" \n\t"
-+ "pxor %%mm0, %%mm0 \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq %%mm0, (%0, %%"REG_S") \n\t"
-+ "movq %%mm0, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm0, 16(%0, %%"REG_S") \n\t"
-+ "movq %%mm0, 24(%0, %%"REG_S") \n\t"
-+ "add $32, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ "emms"
-+ :: "r" (samples+256)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev)
-+{
-+ switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-+
-+ case CONVERT (A52_CHANNEL, A52_CHANNEL2):
-+ memcpy (samples, samples + 256, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_CHANNEL, A52_MONO):
-+ case CONVERT (A52_STEREO, A52_MONO):
-+ mix_2to1_SSE:
-+ mix2to1_SSE (samples, samples + 256, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_2to1_SSE;
-+ case CONVERT (A52_3F, A52_MONO):
-+ mix_3to1_SSE:
-+ mix3to1_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_3to1_SSE;
-+ case CONVERT (A52_2F2R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_2to1_SSE;
-+ mix4to1_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_3to1_SSE;
-+ mix5to1_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_MONO, A52_DOLBY):
-+ memcpy (samples + 256, samples, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F, A52_STEREO):
-+ case CONVERT (A52_3F, A52_DOLBY):
-+ mix_3to2_SSE:
-+ mix3to2_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_STEREO):
-+ if (slev == 0)
-+ break;
-+ mix21to2_SSE (samples, samples + 256, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_DOLBY):
-+ mix21toS_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_STEREO):
-+ if (slev == 0)
-+ goto mix_3to2_SSE;
-+ mix31to2_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_DOLBY):
-+ mix31toS_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_STEREO):
-+ if (slev == 0)
-+ break;
-+ mix2to1_SSE (samples, samples + 512, bias);
-+ mix2to1_SSE (samples + 256, samples + 768, bias);
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_DOLBY):
-+ mix22toS_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_STEREO):
-+ if (slev == 0)
-+ goto mix_3to2_SSE;
-+ mix32to2_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_DOLBY):
-+ mix32toS_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_3F):
-+ if (slev == 0)
-+ break;
-+ mix21to2_SSE (samples, samples + 512, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_3F):
-+ if (slev == 0)
-+ break;
-+ mix2to1_SSE (samples, samples + 768, bias);
-+ mix2to1_SSE (samples + 512, samples + 1024, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_2F1R):
-+ mix3to2_SSE (samples, bias);
-+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_2F1R):
-+ mix2to1_SSE (samples + 512, samples + 768, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_2F1R):
-+ mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
-+ move2to1_SSE (samples + 768, samples + 512, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_3F1R):
-+ mix2to1_SSE (samples + 768, samples + 1024, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_2F2R):
-+ memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_2F2R):
-+ mix3to2_SSE (samples, bias);
-+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_2F2R):
-+ mix3to2_SSE (samples, bias);
-+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
-+ memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_3F2R):
-+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
-+ break;
-+ }
-+}
-+
-+static void upmix_MMX (sample_t * samples, int acmod, int output)
-+{
-+ switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-+
-+ case CONVERT (A52_CHANNEL, A52_CHANNEL2):
-+ memcpy (samples + 256, samples, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_MONO):
-+ zero_MMX (samples + 1024);
-+ case CONVERT (A52_3F1R, A52_MONO):
-+ case CONVERT (A52_2F2R, A52_MONO):
-+ zero_MMX (samples + 768);
-+ case CONVERT (A52_3F, A52_MONO):
-+ case CONVERT (A52_2F1R, A52_MONO):
-+ zero_MMX (samples + 512);
-+ case CONVERT (A52_CHANNEL, A52_MONO):
-+ case CONVERT (A52_STEREO, A52_MONO):
-+ zero_MMX (samples + 256);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_STEREO):
-+ case CONVERT (A52_3F2R, A52_DOLBY):
-+ zero_MMX (samples + 1024);
-+ case CONVERT (A52_3F1R, A52_STEREO):
-+ case CONVERT (A52_3F1R, A52_DOLBY):
-+ zero_MMX (samples + 768);
-+ case CONVERT (A52_3F, A52_STEREO):
-+ case CONVERT (A52_3F, A52_DOLBY):
-+ mix_3to2_MMX:
-+ memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
-+ zero_MMX (samples + 256);
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_STEREO):
-+ case CONVERT (A52_2F2R, A52_DOLBY):
-+ zero_MMX (samples + 768);
-+ case CONVERT (A52_2F1R, A52_STEREO):
-+ case CONVERT (A52_2F1R, A52_DOLBY):
-+ zero_MMX (samples + 512);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_3F):
-+ zero_MMX (samples + 1024);
-+ case CONVERT (A52_3F1R, A52_3F):
-+ case CONVERT (A52_2F2R, A52_2F1R):
-+ zero_MMX (samples + 768);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_3F1R):
-+ zero_MMX (samples + 1024);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_2F1R):
-+ zero_MMX (samples + 1024);
-+ case CONVERT (A52_3F1R, A52_2F1R):
-+ mix_31to21_MMX:
-+ memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
-+ goto mix_3to2_MMX;
-+
-+ case CONVERT (A52_3F2R, A52_2F2R):
-+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
-+ goto mix_31to21_MMX;
-+ }
-+}
-+
-+static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %2, %%mm7 \n\t"
-+ "punpckldq %2, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
-+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 24(%0, %%"REG_S"), %%mm3 \n\t"
-+ "pfadd (%1, %%"REG_S"), %%mm0 \n\t"
-+ "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t"
-+ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t"
-+ "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t"
-+ "pfadd %%mm7, %%mm0 \n\t"
-+ "pfadd %%mm7, %%mm1 \n\t"
-+ "pfadd %%mm7, %%mm2 \n\t"
-+ "pfadd %%mm7, %%mm3 \n\t"
-+ "movq %%mm0, (%1, %%"REG_S") \n\t"
-+ "movq %%mm1, 8(%1, %%"REG_S") \n\t"
-+ "movq %%mm2, 16(%1, %%"REG_S") \n\t"
-+ "movq %%mm3, 24(%1, %%"REG_S") \n\t"
-+ "add $32, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (src+256), "r" (dest+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix3to1_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
-+ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t"
-+ "pfadd %%mm7, %%mm1 \n\t"
-+ "pfadd %%mm2, %%mm0 \n\t"
-+ "pfadd %%mm3, %%mm1 \n\t"
-+ "movq %%mm0, (%0, %%"REG_S") \n\t"
-+ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix4to1_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
-+ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
-+ "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t"
-+ "pfadd %%mm7, %%mm1 \n\t"
-+ "pfadd %%mm2, %%mm0 \n\t"
-+ "pfadd %%mm3, %%mm1 \n\t"
-+ "movq %%mm0, (%0, %%"REG_S") \n\t"
-+ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix5to1_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
-+ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
-+ "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t"
-+ "pfadd %%mm7, %%mm1 \n\t"
-+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
-+ "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd %%mm2, %%mm0 \n\t"
-+ "pfadd %%mm3, %%mm1 \n\t"
-+ "movq %%mm0, (%0, %%"REG_S") \n\t"
-+ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix3to2_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t" //common
-+ "pfadd %%mm7, %%mm1 \n\t" //common
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm0, %%mm2 \n\t"
-+ "pfadd %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %2, %%mm7 \n\t"
-+ "punpckldq %2, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
-+ "movq 1032(%1, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t" //common
-+ "pfadd %%mm7, %%mm1 \n\t" //common
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq (%1, %%"REG_S"), %%mm4 \n\t"
-+ "movq 8(%1, %%"REG_S"), %%mm5 \n\t"
-+ "pfadd %%mm0, %%mm2 \n\t"
-+ "pfadd %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, (%1, %%"REG_S") \n\t"
-+ "movq %%mm5, 8(%1, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (left+256), "r" (right+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix21toS_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround
-+ "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm7, %%mm2 \n\t"
-+ "pfadd %%mm7, %%mm3 \n\t"
-+ "pfadd %%mm7, %%mm4 \n\t"
-+ "pfadd %%mm7, %%mm5 \n\t"
-+ "pfsub %%mm0, %%mm2 \n\t"
-+ "pfsub %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix31to2_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
-+ "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t" // common
-+ "pfadd %%mm7, %%mm1 \n\t" // common
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm0, %%mm2 \n\t"
-+ "pfadd %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix31toS_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t" // common
-+ "pfadd %%mm7, %%mm1 \n\t" // common
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm0, %%mm2 \n\t"
-+ "pfadd %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
-+ "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
-+ "pfsub %%mm0, %%mm2 \n\t"
-+ "pfsub %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix22toS_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm0\n\t"
-+ "movq 2056(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
-+ "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm7, %%mm2 \n\t"
-+ "pfadd %%mm7, %%