diff options
Diffstat (limited to 'liba52/liba52_changes.diff')
-rw-r--r-- | liba52/liba52_changes.diff | 2473 |
1 files changed, 0 insertions, 2473 deletions
diff --git a/liba52/liba52_changes.diff b/liba52/liba52_changes.diff deleted file mode 100644 index 13e4eacbac..0000000000 --- a/liba52/liba52_changes.diff +++ /dev/null @@ -1,2473 +0,0 @@ ---- include/a52.h 2006-06-12 15:04:57.000000000 +0200 -+++ liba52/a52.h 2006-06-05 02:23:02.000000000 +0200 -@@ -59,4 +66,9 @@ - int a52_block (a52_state_t * state); - void a52_free (a52_state_t * state); - -+void* a52_resample_init(uint32_t mm_accel,int flags,int chans); -+extern int (* a52_resample) (float * _f, int16_t * s16); -+ -+uint16_t crc16_block(uint8_t *data,uint32_t num_bytes); -+ - #endif /* A52_H */ ---- liba52/a52_internal.h 2006-06-12 15:05:07.000000000 +0200 -+++ liba52/a52_internal.h 2006-06-05 02:23:02.000000000 +0200 -@@ -103,18 +107,34 @@ - #define DELTA_BIT_NONE (2) - #define DELTA_BIT_RESERVED (3) - -+#if ARCH_X86_64 -+# define REG_a "rax" -+# define REG_d "rdx" -+# define REG_S "rsi" -+# define REG_D "rdi" -+# define REG_BP "rbp" -+#else -+# define REG_a "eax" -+# define REG_d "edx" -+# define REG_S "esi" -+# define REG_D "edi" -+# define REG_BP "ebp" -+#endif -+ - void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, - int start, int end, int fastleak, int slowleak, - expbap_t * expbap); - - int a52_downmix_init (int input, int flags, sample_t * level, - sample_t clev, sample_t slev); -+void downmix_accel_init(uint32_t mm_accel); - int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, - sample_t clev, sample_t slev); --void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, -+extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias, - sample_t clev, sample_t slev); --void a52_upmix (sample_t * samples, int acmod, int output); -+extern void (*a52_upmix) (sample_t * samples, int acmod, int output); - - void a52_imdct_init (uint32_t mm_accel); - void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); --void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias); -+extern void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias); -+void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias); ---- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200 -+++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200 -@@ -31,6 +35,10 @@ - - #define BUFFER_SIZE 4096 - -+#ifdef ALT_BITSTREAM_READER -+int indx=0; -+#endif -+ - void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf) - { - int align; -@@ -38,6 +46,9 @@ - align = (long)buf & 3; - state->buffer_start = (uint32_t *) (buf - align); - state->bits_left = 0; -+#ifdef ALT_BITSTREAM_READER -+ indx=0; -+#endif - bitstream_get (state, align * 8); - } - ---- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200 -+++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200 -@@ -21,6 +25,42 @@ - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -+/* code from ffmpeg/libavcodec */ -+#if defined(__sparc__) || defined(hpux) -+/* -+ * the alt bitstream reader performs unaligned memory accesses; that doesn't work -+ * on sparc/hpux. For now, disable ALT_BITSTREAM_READER. -+ */ -+#undef ALT_BITSTREAM_READER -+#else -+// alternative (faster) bitstram reader (reades upto 3 bytes over the end of the input) -+#define ALT_BITSTREAM_READER -+ -+/* used to avoid misaligned exceptions on some archs (alpha, ...) */ -+#if ARCH_X86 || HAVE_ARMV6 -+# define unaligned32(a) (*(uint32_t*)(a)) -+#else -+# ifdef __GNUC__ -+static inline uint32_t unaligned32(const void *v) { -+ struct Unaligned { -+ uint32_t i; -+ } __attribute__((packed)); -+ -+ return ((const struct Unaligned *) v)->i; -+} -+# elif defined(__DECC) -+static inline uint32_t unaligned32(const void *v) { -+ return *(const __unaligned uint32_t *) v; -+} -+# else -+static inline uint32_t unaligned32(const void *v) { -+ return *(const uint32_t *) v; -+} -+# endif -+#endif //!ARCH_X86 -+ -+#endif -+ - /* (stolen from the kernel) */ - #if HAVE_BIGENDIAN - -@@ -28,7 +68,7 @@ - - #else - --# if 0 && defined (__i386__) -+# if defined (__i386__) - - # define swab32(x) __i386_swab32(x) - static inline const uint32_t __i386_swab32(uint32_t x) -@@ -39,19 +79,34 @@ - - # else - --# define swab32(x)\ --((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \ -- (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])) -- -+# define swab32(x) __generic_swab32(x) -+ static inline const uint32_t __generic_swab32(uint32_t x) -+ { -+ return ((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | -+ (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])); -+ } - # endif - #endif - -+#ifdef ALT_BITSTREAM_READER -+extern int indx; -+#endif -+ - void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf); - uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits); - int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits); - - static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) - { -+#ifdef ALT_BITSTREAM_READER -+ uint32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) ); -+ -+ result<<= (indx&0x07); -+ result>>= 32 - num_bits; -+ indx+= num_bits; -+ -+ return result; -+#else - uint32_t result; - - if (num_bits < state->bits_left) { -@@ -61,10 +116,29 @@ - } - - return a52_bitstream_get_bh (state, num_bits); -+#endif -+} -+ -+static inline void bitstream_skip(a52_state_t * state, int num_bits) -+{ -+#ifdef ALT_BITSTREAM_READER -+ indx+= num_bits; -+#else -+ bitstream_get(state, num_bits); -+#endif - } - - static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) - { -+#ifdef ALT_BITSTREAM_READER -+ int32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) ); -+ -+ result<<= (indx&0x07); -+ result>>= 32 - num_bits; -+ indx+= num_bits; -+ -+ return result; -+#else - int32_t result; - - if (num_bits < state->bits_left) { -@@ -74,4 +148,5 @@ - } - - return a52_bitstream_get_bh_2 (state, num_bits); -+#endif - } ---- liba52/downmix.c 2006-06-12 15:17:53.000000000 +0200 -+++ liba52/downmix.c 2006-06-05 02:23:02.000000000 +0200 -@@ -19,18 +23,46 @@ - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) - */ - - #include "config.h" - - #include <string.h> - #include <inttypes.h> - - #include "a52.h" - #include "a52_internal.h" -+#include "mm_accel.h" - - #define CONVERT(acmod,output) (((output) << 3) + (acmod)) - -+ -+void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias, -+ sample_t clev, sample_t slev)= NULL; -+void (*a52_upmix)(sample_t * samples, int acmod, int output)= NULL; -+ -+static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, -+ sample_t clev, sample_t slev); -+static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, -+ sample_t clev, sample_t slev); -+static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, -+ sample_t clev, sample_t slev); -+static void upmix_MMX (sample_t * samples, int acmod, int output); -+static void upmix_C (sample_t * samples, int acmod, int output); -+ -+void downmix_accel_init(uint32_t mm_accel) -+{ -+ a52_upmix= upmix_C; -+ a52_downmix= downmix_C; -+#if ARCH_X86 || ARCH_X86_64 -+ if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX; -+ if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE; -+ if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; -+#endif -+} -+ - int a52_downmix_init (int input, int flags, sample_t * level, - sample_t clev, sample_t slev) - { -@@ -447,7 +479,7 @@ - samples[i] = 0; - } - --void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, -+void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, - sample_t clev, sample_t slev) - { - switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { -@@ -559,7 +591,7 @@ - break; - - case CONVERT (A52_3F2R, A52_2F1R): -- mix3to2 (samples, bias); -+ mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) - move2to1 (samples + 768, samples + 512, bias); - break; - -@@ -583,12 +615,12 @@ - break; - - case CONVERT (A52_3F1R, A52_3F2R): -- memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); -+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); - break; - } - } - --void a52_upmix (sample_t * samples, int acmod, int output) -+void upmix_C (sample_t * samples, int acmod, int output) - { - switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { - -@@ -653,3 +685,1104 @@ - goto mix_31to21; - } - } -+ -+#if ARCH_X86 || ARCH_X86_64 -+static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %2, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps (%1, %%"REG_S"), %%xmm0 \n\t" -+ "addps 16(%1, %%"REG_S"), %%xmm1\n\t" -+ "addps %%xmm7, %%xmm0 \n\t" -+ "addps %%xmm7, %%xmm1 \n\t" -+ "movaps %%xmm0, (%1, %%"REG_S") \n\t" -+ "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" -+ "add $32, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (src+256), "r" (dest+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix3to1_SSE (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %1, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps %%xmm7, %%xmm1 \n\t" -+ "addps %%xmm1, %%xmm0 \n\t" -+ "movaps %%xmm0, (%0, %%"REG_S") \n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix4to1_SSE (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %1, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps %%xmm7, %%xmm0 \n\t" -+ "addps %%xmm1, %%xmm0 \n\t" -+ "movaps %%xmm0, (%0, %%"REG_S") \n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix5to1_SSE (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %1, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps %%xmm7, %%xmm0 \n\t" -+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps %%xmm1, %%xmm0 \n\t" -+ "movaps %%xmm0, (%0, %%"REG_S") \n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix3to2_SSE (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %1, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps %%xmm7, %%xmm0 \n\t" //common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" -+ "addps %%xmm0, %%xmm1 \n\t" -+ "addps %%xmm0, %%xmm2 \n\t" -+ "movaps %%xmm1, (%0, %%"REG_S") \n\t" -+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %2, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" -+ "addps %%xmm7, %%xmm0 \n\t" //common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" -+ "movaps (%1, %%"REG_S"), %%xmm2 \n\t" -+ "addps %%xmm0, %%xmm1 \n\t" -+ "addps %%xmm0, %%xmm2 \n\t" -+ "movaps %%xmm1, (%0, %%"REG_S") \n\t" -+ "movaps %%xmm2, (%1, %%"REG_S") \n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (left+256), "r" (right+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix21toS_SSE (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %1, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" -+ "addps %%xmm7, %%xmm1 \n\t" -+ "addps %%xmm7, %%xmm2 \n\t" -+ "subps %%xmm0, %%xmm1 \n\t" -+ "addps %%xmm0, %%xmm2 \n\t" -+ "movaps %%xmm1, (%0, %%"REG_S") \n\t" -+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix31to2_SSE (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %1, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps %%xmm7, %%xmm0 \n\t" // common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" -+ "addps %%xmm0, %%xmm1 \n\t" -+ "addps %%xmm0, %%xmm2 \n\t" -+ "movaps %%xmm1, (%0, %%"REG_S") \n\t" -+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix31toS_SSE (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %1, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround -+ "addps %%xmm7, %%xmm0 \n\t" // common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" -+ "addps %%xmm0, %%xmm1 \n\t" -+ "addps %%xmm0, %%xmm2 \n\t" -+ "subps %%xmm3, %%xmm1 \n\t" -+ "addps %%xmm3, %%xmm2 \n\t" -+ "movaps %%xmm1, (%0, %%"REG_S") \n\t" -+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix22toS_SSE (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %1, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" -+ "addps %%xmm7, %%xmm1 \n\t" -+ "addps %%xmm7, %%xmm2 \n\t" -+ "subps %%xmm0, %%xmm1 \n\t" -+ "addps %%xmm0, %%xmm2 \n\t" -+ "movaps %%xmm1, (%0, %%"REG_S") \n\t" -+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix32to2_SSE (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %1, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps %%xmm7, %%xmm0 \n\t" // common -+ "movaps %%xmm0, %%xmm1 \n\t" // common -+ "addps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" -+ "movaps %%xmm0, (%0, %%"REG_S") \n\t" -+ "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix32toS_SSE (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %1, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" -+ "addps %%xmm7, %%xmm0 \n\t" // common -+ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" -+ "subps %%xmm2, %%xmm1 \n\t" -+ "addps %%xmm2, %%xmm3 \n\t" -+ "addps %%xmm0, %%xmm1 \n\t" -+ "addps %%xmm0, %%xmm3 \n\t" -+ "movaps %%xmm1, (%0, %%"REG_S") \n\t" -+ "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) -+{ -+ __asm__ volatile( -+ "movlps %2, %%xmm7 \n\t" -+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps %%xmm7, %%xmm0 \n\t" -+ "addps %%xmm7, %%xmm1 \n\t" -+ "movaps %%xmm0, (%1, %%"REG_S") \n\t" -+ "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" -+ "add $32, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (src+256), "r" (dest+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void zero_MMX(sample_t * samples) -+{ -+ __asm__ volatile( -+ "mov $-1024, %%"REG_S" \n\t" -+ "pxor %%mm0, %%mm0 \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq %%mm0, (%0, %%"REG_S") \n\t" -+ "movq %%mm0, 8(%0, %%"REG_S") \n\t" -+ "movq %%mm0, 16(%0, %%"REG_S") \n\t" -+ "movq %%mm0, 24(%0, %%"REG_S") \n\t" -+ "add $32, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ "emms" -+ :: "r" (samples+256) -+ : "%"REG_S -+ ); -+} -+ -+static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, -+ sample_t clev, sample_t slev) -+{ -+ switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { -+ -+ case CONVERT (A52_CHANNEL, A52_CHANNEL2): -+ memcpy (samples, samples + 256, 256 * sizeof (sample_t)); -+ break; -+ -+ case CONVERT (A52_CHANNEL, A52_MONO): -+ case CONVERT (A52_STEREO, A52_MONO): -+ mix_2to1_SSE: -+ mix2to1_SSE (samples, samples + 256, bias); -+ break; -+ -+ case CONVERT (A52_2F1R, A52_MONO): -+ if (slev == 0) -+ goto mix_2to1_SSE; -+ case CONVERT (A52_3F, A52_MONO): -+ mix_3to1_SSE: -+ mix3to1_SSE (samples, bias); -+ break; -+ -+ case CONVERT (A52_3F1R, A52_MONO): -+ if (slev == 0) -+ goto mix_3to1_SSE; -+ case CONVERT (A52_2F2R, A52_MONO): -+ if (slev == 0) -+ goto mix_2to1_SSE; -+ mix4to1_SSE (samples, bias); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_MONO): -+ if (slev == 0) -+ goto mix_3to1_SSE; -+ mix5to1_SSE (samples, bias); -+ break; -+ -+ case CONVERT (A52_MONO, A52_DOLBY): -+ memcpy (samples + 256, samples, 256 * sizeof (sample_t)); -+ break; -+ -+ case CONVERT (A52_3F, A52_STEREO): -+ case CONVERT (A52_3F, A52_DOLBY): -+ mix_3to2_SSE: -+ mix3to2_SSE (samples, bias); -+ break; -+ -+ case CONVERT (A52_2F1R, A52_STEREO): -+ if (slev == 0) -+ break; -+ mix21to2_SSE (samples, samples + 256, bias); -+ break; -+ -+ case CONVERT (A52_2F1R, A52_DOLBY): -+ mix21toS_SSE (samples, bias); -+ break; -+ -+ case CONVERT (A52_3F1R, A52_STEREO): -+ if (slev == 0) -+ goto mix_3to2_SSE; -+ mix31to2_SSE (samples, bias); -+ break; -+ -+ case CONVERT (A52_3F1R, A52_DOLBY): -+ mix31toS_SSE (samples, bias); -+ break; -+ -+ case CONVERT (A52_2F2R, A52_STEREO): -+ if (slev == 0) -+ break; -+ mix2to1_SSE (samples, samples + 512, bias); -+ mix2to1_SSE (samples + 256, samples + 768, bias); -+ break; -+ -+ case CONVERT (A52_2F2R, A52_DOLBY): -+ mix22toS_SSE (samples, bias); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_STEREO): -+ if (slev == 0) -+ goto mix_3to2_SSE; -+ mix32to2_SSE (samples, bias); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_DOLBY): -+ mix32toS_SSE (samples, bias); -+ break; -+ -+ case CONVERT (A52_3F1R, A52_3F): -+ if (slev == 0) -+ break; -+ mix21to2_SSE (samples, samples + 512, bias); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_3F): -+ if (slev == 0) -+ break; -+ mix2to1_SSE (samples, samples + 768, bias); -+ mix2to1_SSE (samples + 512, samples + 1024, bias); -+ break; -+ -+ case CONVERT (A52_3F1R, A52_2F1R): -+ mix3to2_SSE (samples, bias); -+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); -+ break; -+ -+ case CONVERT (A52_2F2R, A52_2F1R): -+ mix2to1_SSE (samples + 512, samples + 768, bias); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_2F1R): -+ mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used) -+ move2to1_SSE (samples + 768, samples + 512, bias); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_3F1R): -+ mix2to1_SSE (samples + 768, samples + 1024, bias); -+ break; -+ -+ case CONVERT (A52_2F1R, A52_2F2R): -+ memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); -+ break; -+ -+ case CONVERT (A52_3F1R, A52_2F2R): -+ mix3to2_SSE (samples, bias); -+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_2F2R): -+ mix3to2_SSE (samples, bias); -+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); -+ memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); -+ break; -+ -+ case CONVERT (A52_3F1R, A52_3F2R): -+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); -+ break; -+ } -+} -+ -+static void upmix_MMX (sample_t * samples, int acmod, int output) -+{ -+ switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { -+ -+ case CONVERT (A52_CHANNEL, A52_CHANNEL2): -+ memcpy (samples + 256, samples, 256 * sizeof (sample_t)); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_MONO): -+ zero_MMX (samples + 1024); -+ case CONVERT (A52_3F1R, A52_MONO): -+ case CONVERT (A52_2F2R, A52_MONO): -+ zero_MMX (samples + 768); -+ case CONVERT (A52_3F, A52_MONO): -+ case CONVERT (A52_2F1R, A52_MONO): -+ zero_MMX (samples + 512); -+ case CONVERT (A52_CHANNEL, A52_MONO): -+ case CONVERT (A52_STEREO, A52_MONO): -+ zero_MMX (samples + 256); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_STEREO): -+ case CONVERT (A52_3F2R, A52_DOLBY): -+ zero_MMX (samples + 1024); -+ case CONVERT (A52_3F1R, A52_STEREO): -+ case CONVERT (A52_3F1R, A52_DOLBY): -+ zero_MMX (samples + 768); -+ case CONVERT (A52_3F, A52_STEREO): -+ case CONVERT (A52_3F, A52_DOLBY): -+ mix_3to2_MMX: -+ memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); -+ zero_MMX (samples + 256); -+ break; -+ -+ case CONVERT (A52_2F2R, A52_STEREO): -+ case CONVERT (A52_2F2R, A52_DOLBY): -+ zero_MMX (samples + 768); -+ case CONVERT (A52_2F1R, A52_STEREO): -+ case CONVERT (A52_2F1R, A52_DOLBY): -+ zero_MMX (samples + 512); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_3F): -+ zero_MMX (samples + 1024); -+ case CONVERT (A52_3F1R, A52_3F): -+ case CONVERT (A52_2F2R, A52_2F1R): -+ zero_MMX (samples + 768); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_3F1R): -+ zero_MMX (samples + 1024); -+ break; -+ -+ case CONVERT (A52_3F2R, A52_2F1R): -+ zero_MMX (samples + 1024); -+ case CONVERT (A52_3F1R, A52_2F1R): -+ mix_31to21_MMX: -+ memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); -+ goto mix_3to2_MMX; -+ -+ case CONVERT (A52_3F2R, A52_2F2R): -+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); -+ goto mix_31to21_MMX; -+ } -+} -+ -+static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) -+{ -+ __asm__ volatile( -+ "movd %2, %%mm7 \n\t" -+ "punpckldq %2, %%mm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" -+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" -+ "movq 24(%0, %%"REG_S"), %%mm3 \n\t" -+ "pfadd (%1, %%"REG_S"), %%mm0 \n\t" -+ "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" -+ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" -+ "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" -+ "pfadd %%mm7, %%mm0 \n\t" -+ "pfadd %%mm7, %%mm1 \n\t" -+ "pfadd %%mm7, %%mm2 \n\t" -+ "pfadd %%mm7, %%mm3 \n\t" -+ "movq %%mm0, (%1, %%"REG_S") \n\t" -+ "movq %%mm1, 8(%1, %%"REG_S") \n\t" -+ "movq %%mm2, 16(%1, %%"REG_S") \n\t" -+ "movq %%mm3, 24(%1, %%"REG_S") \n\t" -+ "add $32, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (src+256), "r" (dest+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix3to1_3dnow (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movd %1, %%mm7 \n\t" -+ "punpckldq %1, %%mm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" -+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" -+ "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" -+ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd %%mm7, %%mm0 \n\t" -+ "pfadd %%mm7, %%mm1 \n\t" -+ "pfadd %%mm2, %%mm0 \n\t" -+ "pfadd %%mm3, %%mm1 \n\t" -+ "movq %%mm0, (%0, %%"REG_S") \n\t" -+ "movq %%mm1, 8(%0, %%"REG_S") \n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix4to1_3dnow (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movd %1, %%mm7 \n\t" -+ "punpckldq %1, %%mm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" -+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" -+ "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" -+ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" -+ "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd %%mm7, %%mm0 \n\t" -+ "pfadd %%mm7, %%mm1 \n\t" -+ "pfadd %%mm2, %%mm0 \n\t" -+ "pfadd %%mm3, %%mm1 \n\t" -+ "movq %%mm0, (%0, %%"REG_S") \n\t" -+ "movq %%mm1, 8(%0, %%"REG_S") \n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix5to1_3dnow (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movd %1, %%mm7 \n\t" -+ "punpckldq %1, %%mm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" -+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" -+ "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" -+ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" -+ "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd %%mm7, %%mm0 \n\t" -+ "pfadd %%mm7, %%mm1 \n\t" -+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" -+ "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd %%mm2, %%mm0 \n\t" -+ "pfadd %%mm3, %%mm1 \n\t" -+ "movq %%mm0, (%0, %%"REG_S") \n\t" -+ "movq %%mm1, 8(%0, %%"REG_S") \n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix3to2_3dnow (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movd %1, %%mm7 \n\t" -+ "punpckldq %1, %%mm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" -+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd %%mm7, %%mm0 \n\t" //common -+ "pfadd %%mm7, %%mm1 \n\t" //common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" -+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" -+ "movq 2048(%0, %%"REG_S"), %%mm4\n\t" -+ "movq 2056(%0, %%"REG_S"), %%mm5\n\t" -+ "pfadd %%mm0, %%mm2 \n\t" -+ "pfadd %%mm1, %%mm3 \n\t" -+ "pfadd %%mm0, %%mm4 \n\t" -+ "pfadd %%mm1, %%mm5 \n\t" -+ "movq %%mm2, (%0, %%"REG_S") \n\t" -+ "movq %%mm3, 8(%0, %%"REG_S") \n\t" -+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t" -+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) -+{ -+ __asm__ volatile( -+ "movd %2, %%mm7 \n\t" -+ "punpckldq %2, %%mm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" -+ "movq 1032(%1, %%"REG_S"), %%mm1\n\t" -+ "pfadd %%mm7, %%mm0 \n\t" //common -+ "pfadd %%mm7, %%mm1 \n\t" //common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" -+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" -+ "movq (%1, %%"REG_S"), %%mm4 \n\t" -+ "movq 8(%1, %%"REG_S"), %%mm5 \n\t" -+ "pfadd %%mm0, %%mm2 \n\t" -+ "pfadd %%mm1, %%mm3 \n\t" -+ "pfadd %%mm0, %%mm4 \n\t" -+ "pfadd %%mm1, %%mm5 \n\t" -+ "movq %%mm2, (%0, %%"REG_S") \n\t" -+ "movq %%mm3, 8(%0, %%"REG_S") \n\t" -+ "movq %%mm4, (%1, %%"REG_S") \n\t" -+ "movq %%mm5, 8(%1, %%"REG_S") \n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (left+256), "r" (right+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix21toS_3dnow (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movd %1, %%mm7 \n\t" -+ "punpckldq %1, %%mm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround -+ "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" -+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm4\n\t" -+ "movq 1032(%0, %%"REG_S"), %%mm5\n\t" -+ "pfadd %%mm7, %%mm2 \n\t" -+ "pfadd %%mm7, %%mm3 \n\t" -+ "pfadd %%mm7, %%mm4 \n\t" -+ "pfadd %%mm7, %%mm5 \n\t" -+ "pfsub %%mm0, %%mm2 \n\t" -+ "pfsub %%mm1, %%mm3 \n\t" -+ "pfadd %%mm0, %%mm4 \n\t" -+ "pfadd %%mm1, %%mm5 \n\t" -+ "movq %%mm2, (%0, %%"REG_S") \n\t" -+ "movq %%mm3, 8(%0, %%"REG_S") \n\t" -+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t" -+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix31to2_3dnow (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movd %1, %%mm7 \n\t" -+ "punpckldq %1, %%mm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" -+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" -+ "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd %%mm7, %%mm0 \n\t" // common -+ "pfadd %%mm7, %%mm1 \n\t" // common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" -+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" -+ "movq 2048(%0, %%"REG_S"), %%mm4\n\t" -+ "movq 2056(%0, %%"REG_S"), %%mm5\n\t" -+ "pfadd %%mm0, %%mm2 \n\t" -+ "pfadd %%mm1, %%mm3 \n\t" -+ "pfadd %%mm0, %%mm4 \n\t" -+ "pfadd %%mm1, %%mm5 \n\t" -+ "movq %%mm2, (%0, %%"REG_S") \n\t" -+ "movq %%mm3, 8(%0, %%"REG_S") \n\t" -+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t" -+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix31toS_3dnow (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movd %1, %%mm7 \n\t" -+ "punpckldq %1, %%mm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" -+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd %%mm7, %%mm0 \n\t" // common -+ "pfadd %%mm7, %%mm1 \n\t" // common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" -+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" -+ "movq 2048(%0, %%"REG_S"), %%mm4\n\t" -+ "movq 2056(%0, %%"REG_S"), %%mm5\n\t" -+ "pfadd %%mm0, %%mm2 \n\t" -+ "pfadd %%mm1, %%mm3 \n\t" -+ "pfadd %%mm0, %%mm4 \n\t" -+ "pfadd %%mm1, %%mm5 \n\t" -+ "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround -+ "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround -+ "pfsub %%mm0, %%mm2 \n\t" -+ "pfsub %%mm1, %%mm3 \n\t" -+ "pfadd %%mm0, %%mm4 \n\t" -+ "pfadd %%mm1, %%mm5 \n\t" -+ "movq %%mm2, (%0, %%"REG_S") \n\t" -+ "movq %%mm3, 8(%0, %%"REG_S") \n\t" -+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t" -+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t" -+ "add $16, %%"REG_S" \n\t" -+ " jnz 1b \n\t" -+ :: "r" (samples+256), "m" (bias) -+ : "%"REG_S -+ ); -+} -+ -+static void mix22toS_3dnow (sample_t * samples, sample_t bias) -+{ -+ __asm__ volatile( -+ "movd %1, %%mm7 \n\t" -+ "punpckldq %1, %%mm7 \n\t" -+ "mov $-1024, %%"REG_S" \n\t" -+ ASMALIGN(4) -+ "1: \n\t" -+ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" -+ "movq 2056(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround -+ "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" -+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm4\n\t" -+ "movq 1032(%0, %%"REG_S"), %%mm5\n\t" -+ "pfadd %%mm7, %%mm2 \n\t" -+ "pfadd %%mm7, %% |