From 6e695dc64f0310745a32e2399a955fbf1368cd1a Mon Sep 17 00:00:00 2001 From: bircoph Date: Wed, 13 May 2009 15:22:13 +0000 Subject: Remove all kind of trailing whitespaces from all MPlayer's files. This affects all kind of spaces (' ',^I,^M,^L,...): actually [:space:] regex character set. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@29306 b3059339-0415-0410-9bf9-f77b7e298cf2 --- liba52/liba52_changes.diff | 464 ++++++++++++++++++++++----------------------- 1 file changed, 232 insertions(+), 232 deletions(-) (limited to 'liba52/liba52_changes.diff') diff --git a/liba52/liba52_changes.diff b/liba52/liba52_changes.diff index f00bb444d2..1b3574f043 100644 --- a/liba52/liba52_changes.diff +++ b/liba52/liba52_changes.diff @@ -3,7 +3,7 @@ @@ -59,4 +66,9 @@ int a52_block (a52_state_t * state); void a52_free (a52_state_t * state); - + +void* a52_resample_init(uint32_t mm_accel,int flags,int chans); +extern int (* a52_resample) (float * _f, int16_t * s16); + @@ -15,7 +15,7 @@ @@ -103,18 +107,34 @@ #define DELTA_BIT_NONE (2) #define DELTA_BIT_RESERVED (3) - + +#if ARCH_X86_64 +# define REG_a "rax" +# define REG_d "rdx" @@ -33,7 +33,7 @@ void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, int start, int end, int fastleak, int slowleak, expbap_t * expbap); - + int a52_downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev); +void downmix_accel_init(uint32_t mm_accel); @@ -44,7 +44,7 @@ sample_t clev, sample_t slev); -void a52_upmix (sample_t * samples, int acmod, int output); +extern void (*a52_upmix) (sample_t * samples, int acmod, int output); - + void a52_imdct_init (uint32_t mm_accel); void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias); @@ -53,9 +53,9 @@ --- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200 +++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200 @@ -31,6 +35,10 @@ - + #define BUFFER_SIZE 4096 - + +#ifdef ALT_BITSTREAM_READER +int indx=0; +#endif @@ -72,13 +72,13 @@ +#endif bitstream_get (state, align * 8); } - + --- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200 +++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200 @@ -21,6 +25,42 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - + +/* code from ffmpeg/libavcodec */ +#if defined(__sparc__) || defined(hpux) +/* @@ -117,20 +117,20 @@ + /* (stolen from the kernel) */ #ifdef WORDS_BIGENDIAN - + @@ -28,7 +68,7 @@ - + #else - + -# if 0 && defined (__i386__) +# if defined (__i386__) - + # define swab32(x) __i386_swab32(x) static inline const uint32_t __i386_swab32(uint32_t x) @@ -39,19 +79,34 @@ - + # else - + -# define swab32(x)\ -((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \ - (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])) @@ -143,7 +143,7 @@ + } # endif #endif - + +#ifdef ALT_BITSTREAM_READER +extern int indx; +#endif @@ -151,7 +151,7 @@ void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf); uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits); int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits); - + static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) { +#ifdef ALT_BITSTREAM_READER @@ -160,15 +160,15 @@ + result<<= (indx&0x07); + result>>= 32 - num_bits; + indx+= num_bits; -+ ++ + return result; +#else uint32_t result; - + if (num_bits < state->bits_left) { @@ -61,10 +116,29 @@ } - + return a52_bitstream_get_bh (state, num_bits); +#endif +} @@ -181,7 +181,7 @@ + bitstream_get(state, num_bits); +#endif } - + static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) { +#ifdef ALT_BITSTREAM_READER @@ -190,15 +190,15 @@ + result<<= (indx&0x07); + result>>= 32 - num_bits; + indx+= num_bits; -+ ++ + return result; +#else int32_t result; - + if (num_bits < state->bits_left) { @@ -74,4 +148,5 @@ } - + return a52_bitstream_get_bh_2 (state, num_bits); +#endif } @@ -211,18 +211,18 @@ + * + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) */ - + #include "config.h" - + #include #include - + #include "a52.h" #include "a52_internal.h" +#include "mm_accel.h" - + #define CONVERT(acmod,output) (((output) << 3) + (acmod)) - + + +void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev)= NULL; @@ -247,14 +247,14 @@ + if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; +#endif +} -+ ++ int a52_downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev) { @@ -447,7 +479,7 @@ samples[i] = 0; } - + -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, +void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, sample_t clev, sample_t slev) @@ -262,28 +262,28 @@ switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { @@ -559,7 +591,7 @@ break; - + case CONVERT (A52_3F2R, A52_2F1R): - mix3to2 (samples, bias); + mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) move2to1 (samples + 768, samples + 512, bias); break; - + @@ -583,12 +615,12 @@ break; - + case CONVERT (A52_3F1R, A52_3F2R): - memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); break; } } - + -void a52_upmix (sample_t * samples, int acmod, int output) +void upmix_C (sample_t * samples, int acmod, int output) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { - + @@ -653,3 +685,1104 @@ goto mix_31to21; } @@ -298,10 +298,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps (%1, %%"REG_S"), %%xmm0 \n\t" -+ "addps 16(%1, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps (%1, %%"REG_S"), %%xmm0 \n\t" ++ "addps 16(%1, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" + "addps %%xmm7, %%xmm1 \n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" @@ -321,9 +321,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm1, %%xmm0 \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -342,10 +342,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" + "addps %%xmm1, %%xmm0 \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -364,12 +364,12 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" -+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm1, %%xmm0 \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" @@ -387,9 +387,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" //common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -410,9 +410,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" //common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%1, %%"REG_S"), %%xmm2 \n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -434,7 +434,7 @@ + ASMALIGN(4) + "1: \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm7, %%xmm2 \n\t" @@ -457,10 +457,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -481,10 +481,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround + "addps %%xmm7, %%xmm0 \n\t" // common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -507,9 +507,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm7, %%xmm2 \n\t" @@ -532,13 +532,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common + "movaps %%xmm0, %%xmm1 \n\t" // common -+ "addps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" @@ -556,16 +556,16 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common -+ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" -+ "subps %%xmm2, %%xmm1 \n\t" -+ "addps %%xmm2, %%xmm3 \n\t" -+ "addps %%xmm0, %%xmm1 \n\t" -+ "addps %%xmm0, %%xmm3 \n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" ++ "subps %%xmm2, %%xmm1 \n\t" ++ "addps %%xmm2, %%xmm3 \n\t" ++ "addps %%xmm0, %%xmm1 \n\t" ++ "addps %%xmm0, %%xmm3 \n\t" + "movaps %%xmm1, (%0, %%"REG_S") \n\t" + "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" @@ -583,8 +583,8 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" @@ -832,13 +832,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" -+ "pfadd (%1, %%"REG_S"), %%mm0 \n\t" ++ "pfadd (%1, %%"REG_S"), %%mm0 \n\t" + "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" -+ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" ++ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" + "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" @@ -863,11 +863,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" @@ -890,13 +890,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" @@ -919,17 +919,17 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" -+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm2, %%mm0 \n\t" + "pfadd %%mm3, %%mm1 \n\t" @@ -950,11 +950,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" //common + "pfadd %%mm7, %%mm1 \n\t" //common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -981,11 +981,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 1032(%1, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" //common + "pfadd %%mm7, %%mm1 \n\t" //common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq (%1, %%"REG_S"), %%mm4 \n\t" + "movq 8(%1, %%"REG_S"), %%mm5 \n\t" @@ -1014,7 +1014,7 @@ + "1: \n\t" + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround + "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm4\n\t" + "movq 1032(%0, %%"REG_S"), %%mm5\n\t" @@ -1045,13 +1045,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1078,11 +1078,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1115,11 +1115,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" + "movq 2056(%0, %%"REG_S"), %%mm1\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm4\n\t" + "movq 1032(%0, %%"REG_S"), %%mm5\n\t" @@ -1150,19 +1150,19 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common + "movq %%mm0, %%mm2 \n\t" // common + "movq %%mm1, %%mm3 \n\t" // common -+ "pfadd (%0, %%"REG_S"), %%mm0 \n\t" ++ "pfadd (%0, %%"REG_S"), %%mm0 \n\t" + "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm1, 8(%0, %%"REG_S") \n\t" @@ -1184,25 +1184,25 @@ + "1: \n\t" + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" -+ "movq 3072(%0, %%"REG_S"), %%mm4\n\t" ++ "movq 3072(%0, %%"REG_S"), %%mm4\n\t" + "movq 3080(%0, %%"REG_S"), %%mm5\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common -+ "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround ++ "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround + "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" -+ "movq 2048(%0, %%"REG_S"), %%mm6\n\t" ++ "movq 2048(%0, %%"REG_S"), %%mm6\n\t" + "movq 2056(%0, %%"REG_S"), %%mm7\n\t" -+ "pfsub %%mm4, %%mm2 \n\t" ++ "pfsub %%mm4, %%mm2 \n\t" + "pfsub %%mm5, %%mm3 \n\t" -+ "pfadd %%mm4, %%mm6 \n\t" ++ "pfadd %%mm4, %%mm6 \n\t" + "pfadd %%mm5, %%mm7 \n\t" -+ "pfadd %%mm0, %%mm2 \n\t" ++ "pfadd %%mm0, %%mm2 \n\t" + "pfadd %%mm1, %%mm3 \n\t" -+ "pfadd %%mm0, %%mm6 \n\t" ++ "pfadd %%mm0, %%mm6 \n\t" + "pfadd %%mm1, %%mm7 \n\t" + "movq %%mm2, (%0, %%"REG_S") \n\t" + "movq %%mm3, 8(%0, %%"REG_S") \n\t" @@ -1223,9 +1223,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" + "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" @@ -1401,7 +1401,7 @@ + * michael did port them from libac3 (untested, perhaps totally broken) + * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org) */ - + #include "config.h" @@ -39,12 +48,50 @@ #include "a52.h" @@ -1415,12 +1415,12 @@ +#undef HAVE_AMD3DNOWEXT +#define HAVE_AMD3DNOWEXT 0 +#endif - + typedef struct complex_s { sample_t real; sample_t imag; } complex_t; - + +static const int pm128[128] attribute_used __attribute__((aligned(16))) = +{ + 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, @@ -1431,24 +1431,24 @@ + 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, + 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, + 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 -+}; ++}; + +static uint8_t attribute_used bit_reverse_512[] = { -+ 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, -+ 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, -+ 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, -+ 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, -+ 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, -+ 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, -+ 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, -+ 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, -+ 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, -+ 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, -+ 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, -+ 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, -+ 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, -+ 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, -+ 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, ++ 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, ++ 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, ++ 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, ++ 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, ++ 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, ++ 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, ++ 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, ++ 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, ++ 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, ++ 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, ++ 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, ++ 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, ++ 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, ++ 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, ++ 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, + 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f}; + static uint8_t fftorder[] = { @@ -1457,7 +1457,7 @@ @@ -56,6 +103,40 @@ 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 }; - + +static complex_t __attribute__((aligned(16))) buf[128]; + +/* Twiddle factor LUT */ @@ -1475,8 +1475,8 @@ +static sample_t __attribute__((aligned(16))) xsin1[128]; + +#if ARCH_X86 || ARCH_X86_64 -+// NOTE: SSE needs 16byte alignment or it will segfault -+// ++// NOTE: SSE needs 16byte alignment or it will segfault ++// +static float __attribute__((aligned(16))) sseSinCos1c[256]; +static float __attribute__((aligned(16))) sseSinCos1d[256]; +static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1}; @@ -1498,7 +1498,7 @@ @@ -241,7 +322,7 @@ ifft_pass (buf, roots128 - 32, 32); } - + -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) { @@ -1507,7 +1507,7 @@ @@ -285,6 +366,702 @@ } } - + +#if HAVE_ALTIVEC + +#ifdef HAVE_ALTIVEC_H @@ -1559,17 +1559,17 @@ + sample_t *data_ptr; + sample_t *delay_ptr; + sample_t *window_ptr; -+ ++ + /* 512 IMDCT with source and dest data in 'data' */ -+ ++ + /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ + for( i=0; i < 128; i++) { -+ /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ ++ /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ + int j= bit_reverse_512[i]; + buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); + buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); + } -+ ++ + /* 1. iteration */ + for(i = 0; i < 128; i += 2) { +#if 0 @@ -1582,7 +1582,7 @@ + buf[i+1].real = tmp_a_r - tmp_b_r; + buf[i+1].imag = tmp_a_i - tmp_b_i; +#else -+ vector float temp, bufv; ++ vector float temp, bufv; + + bufv = vec_ld(i << 3, (float*)buf); + temp = vec_perm(bufv, bufv, vcprm(2,3,0,1)); @@ -1590,7 +1590,7 @@ + vec_st(bufv, i << 3, (float*)buf); +#endif + } -+ ++ + /* 2. iteration */ + // Note w[1]={{1,0}, {0,-1}} + for(i = 0; i < 128; i += 4) { @@ -1614,7 +1614,7 @@ + buf[i+3].imag = tmp_a_i + tmp_b_i; +#else + vector float buf01, buf23, temp1, temp2; -+ ++ + buf01 = vec_ld((i + 0) << 3, (float*)buf); + buf23 = vec_ld((i + 2) << 3, (float*)buf); + buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2)); @@ -1682,14 +1682,14 @@ + buf45 = vec_ld((i + 4) << 3, (float*)buf); + buf67 = vec_ld((i + 6) << 3, (float*)buf); + buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3)); -+ ++ + vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf); + vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf); + vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf); + vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf); +#endif + } -+ ++ + /* 4-7. iterations */ + for (m=3; m < 7; m++) { + two_m = (1 << m); @@ -1742,10 +1742,10 @@ + vecq = vec_madd(temp4, vcii(n,p,n,p), temp3); + // then butterfly with buf[p] and buf[p+1] + vecp = vec_ld(p << 3, (float*)buf); -+ ++ + temp1 = vec_add(vecp, vecq); + temp2 = vec_sub(vecp, vecq); -+ ++ + vec_st(temp1, p << 3, (float*)buf); + vec_st(temp2, q << 3, (float*)buf); +#endif @@ -1802,7 +1802,7 @@ + tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1)); + temp2 = vec_madd(temp1133, tempCS01, vczero); + bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1); -+ ++ + vec_st(bufv_0, (i + 0) << 3, (float*)buf); + + /* idem with bufv_2 and high-order cosv/sinv */ @@ -1816,36 +1816,36 @@ + bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1); + + vec_st(bufv_2, (i + 2) << 3, (float*)buf); -+ ++ +#endif + } -+ ++ + data_ptr = data; + delay_ptr = delay; + window_ptr = a52_imdct_window; + + /* Window and convert to real valued signal */ -+ for(i=0; i< 64; i++) { -+ *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; -+ *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; ++ for(i=0; i< 64; i++) { ++ *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; ++ *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; + } -+ -+ for(i=0; i< 64; i++) { -+ *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; -+ *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; ++ ++ for(i=0; i< 64; i++) { ++ *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; ++ *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; + } -+ ++ + /* The trailing edge of the window goes into the delay line */ + delay_ptr = delay; + -+ for(i=0; i< 64; i++) { -+ *delay_ptr++ = -buf[64+i].real * *--window_ptr; -+ *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; ++ for(i=0; i< 64; i++) { ++ *delay_ptr++ = -buf[64+i].real * *--window_ptr; ++ *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; + } -+ ++ + for(i=0; i<64; i++) { -+ *delay_ptr++ = buf[i].imag * *--window_ptr; -+ *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; ++ *delay_ptr++ = buf[i].imag * *--window_ptr; ++ *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; + } +} +#endif @@ -1858,8 +1858,8 @@ +#define HAVE_AMD3DNOW 1 +#include "srfftp_3dnow.h" + -+const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; -+const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; ++const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; ++const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; +const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 }; + +#undef HAVE_AMD3DNOWEXT @@ -1888,9 +1888,9 @@ + sample_t *data_ptr; + sample_t *delay_ptr; + sample_t *window_ptr; -+ ++ + /* 512 IMDCT with source and dest data in 'data' */ -+ /* see the c version (dct_do_512()), its allmost identical, just in C */ ++ /* see the c version (dct_do_512()), its allmost identical, just in C */ + + /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ + /* Bit reversed shuffling */ @@ -1951,7 +1951,7 @@ + } + } +*/ -+ ++ + /* 1. iteration */ + // Note w[0][0]={1,0} + __asm__ volatile( @@ -1973,7 +1973,7 @@ + :: "g" (buf), "r" (buf + 128) + : "%"REG_S + ); -+ ++ + /* 2. iteration */ + // Note w[1]={{1,0}, {0,-1}} + __asm__ volatile( @@ -2005,8 +2005,8 @@ + Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) +*/ + __asm__ volatile( -+ "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" -+ "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" ++ "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" ++ "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" + "xorps %%xmm5, %%xmm5 \n\t" + "xorps %%xmm2, %%xmm2 \n\t" + "mov %0, %%"REG_S" \n\t" @@ -2032,10 +2032,10 @@ + "addps %%xmm1, %%xmm3 \n\t" + "subps %%xmm4, %%xmm0 \n\t" + "subps %%xmm5, %%xmm1 \n\t" -+ "movaps %%xmm2, (%%"REG_S") \n\t" -+ "movaps %%xmm3, 16(%%"REG_S") \n\t" -+ "movaps %%xmm0, 32(%%"REG_S") \n\t" -+ "movaps %%xmm1, 48(%%"REG_S") \n\t" ++ "movaps %%xmm2, (%%"REG_S") \n\t" ++ "movaps %%xmm3, 16(%%"REG_S") \n\t" ++ "movaps %%xmm0, 32(%%"REG_S") \n\t" ++ "movaps %%xmm1, 48(%%"REG_S") \n\t" + "add $64, %%"REG_S" \n\t" + "cmp %1, %%"REG_S" \n\t" + " jb 1b \n\t" @@ -2069,7 +2069,7 @@ + "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" + "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" + "add $16, %%"REG_D" \n\t" -+ "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 ++ "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 + "jb 2b \n\t" + "add %2, %%"REG_S" \n\t" + "cmp %1, %%"REG_S" \n\t" @@ -2096,9 +2096,9 @@ + " jnz 1b \n\t" + :: "r" (buf+128) + : "%"REG_S -+ ); ++ ); ++ + -+ + data_ptr = data; + delay_ptr = delay; + window_ptr = a52_imdct_window; @@ -2122,7 +2122,7 @@ + "movaps %%xmm0, (%1, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" + "sub $16, %%"REG_D" \n\t" -+ "cmp $512, %%"REG_S" \n\t" ++ "cmp $512, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias) + : "%"REG_S, "%"REG_D @@ -2130,7 +2130,7 @@ + data_ptr+=128; + delay_ptr+=128; +// window_ptr+=128; -+ ++ + __asm__ volatile( + "mov $1024, %%"REG_D" \n\t" // 512 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 @@ -2149,7 +2149,7 @@ + "movaps %%xmm0, (%1, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" + "sub $16, %%"REG_D" \n\t" -+ "cmp $512, %%"REG_S" \n\t" ++ "cmp $512, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias) + : "%"REG_S, "%"REG_D @@ -2167,21 +2167,21 @@ + "1: \n\t" + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C -+ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C -+ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A ++ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C ++ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A + "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A + "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" + "sub $16, %%"REG_D" \n\t" -+ "cmp $512, %%"REG_S" \n\t" ++ "cmp $512, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "r" (buf+64), "r" (delay_ptr) + : "%"REG_S, "%"REG_D + ); + delay_ptr+=128; +// window_ptr-=128; -+ ++ + __asm__ volatile( + "mov $1024, %%"REG_D" \n\t" // 1024 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 @@ -2189,14 +2189,14 @@ + "1: \n\t" + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? -+ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? -+ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? ++ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? ++ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? + "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A + "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" + "sub $16, %%"REG_D" \n\t" -+ "cmp $512, %%"REG_S" \n\t" ++ "cmp $512, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "r" (buf), "r" (delay_ptr) + : "%"REG_S, "%"REG_D @@ -2208,13 +2208,13 @@ { int i, k; @@ -364,7 +1141,7 @@ - + void a52_imdct_init (uint32_t mm_accel) { - int i, k; + int i, j, k; double sum; - + /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ @@ -416,6 +1193,99 @@ post2[i].real = cos ((M_PI / 128) * (i + 0.5)); @@ -2236,12 +2236,12 @@ + sseSinCos1c[2*i+0]= xcos1[i]; + sseSinCos1c[2*i+1]= -xcos1[i]; + sseSinCos1d[2*i+0]= xsin1[i]; -+ sseSinCos1d[2*i+1]= xsin1[i]; ++ sseSinCos1d[2*i+1]= xsin1[i]; + } + for (i = 1; i < 7; i++) { + j = 1 << i; + for (k = 0; k < j; k+=2) { -+ ++ + sseW[i][4*k + 0] = w[i][k+0].real; + sseW[i][4*k + 1] = w[i][k+0].real; + sseW[i][4*k + 2] = w[i][k+1].real; @@ -2250,15 +2250,15 @@ + sseW[i][4*k + 4] = -w[i][k+0].imag; + sseW[i][4*k + 5] = w[i][k+0].imag; + sseW[i][4*k + 6] = -w[i][k+1].imag; -+ sseW[i][4*k + 7] = w[i][k+1].imag; -+ ++ sseW[i][4*k + 7] = w[i][k+1].imag; ++ + //we multiply more or less uninitalized numbers so we need to use exactly 0.0 + if(k==0) + { +// sseW[i][4*k + 0]= sseW[i][4*k + 1]= 1.0; + sseW[i][4*k + 4]= sseW[i][4*k + 5]= 0.0; + } -+ ++ + if(2*k == j) + { + sseW[i][4*k + 0]= sseW[i][4*k + 1]= 0.0; @@ -2270,9 +2270,9 @@ + for(i=0; i<128; i++) + { + sseWindow[2*i+0]= -a52_imdct_window[2*i+0]; -+ sseWindow[2*i+1]= a52_imdct_window[2*i+1]; ++ sseWindow[2*i+1]= a52_imdct_window[2*i+1]; + } -+ ++ + for(i=0; i<64; i++) + { + sseWindow[256 + 2*i+0]= -a52_imdct_window[254 - 2*i+1]; @@ -2313,7 +2313,7 @@ + } + else +#endif - + #ifdef LIBA52_DJBFFT if (mm_accel & MM_ACCEL_DJBFFT) { @@ -426,7 +1296,5 @@ @@ -2336,26 +2336,26 @@ + +/* PPC accelerations */ +#define MM_ACCEL_PPC_ALTIVEC 0x00010000 - + uint32_t mm_accel (void); - + --- liba52/parse.c 2006-12-05 08:08:01.000000000 +0100 +++ liba52/parse.c 2006-12-05 08:08:44.000000000 +0100 @@ -24,6 +28,7 @@ #include "config.h" - + #include +#include #include #include - + @@ -31,13 +36,12 @@ #include "a52_internal.h" #include "bitstream.h" #include "tables.h" +#include "mm_accel.h" +#include "libavutil/avutil.h" - + -#ifdef HAVE_MEMALIGN +#if HAVE_MEMALIGN /* some systems have memalign() but no declaration for it */ @@ -2364,13 +2364,13 @@ -/* assume malloc alignment is sufficient */ -#define memalign(align,size) malloc (size) #endif - + typedef struct { @@ -60,7 +64,16 @@ if (state == NULL) return NULL; - -+#if defined(__MINGW32__) && defined(HAVE_SSE) + ++#if defined(__MINGW32__) && defined(HAVE_SSE) + state->samples = av_malloc(256 * 12 * sizeof (sample_t)); +#else state->samples = memalign (16, 256 * 12 * sizeof (sample_t)); @@ -2378,30 +2378,30 @@ + if(((int)state->samples%16) && (mm_accel&MM_ACCEL_X86_SSE)){ + mm_accel &=~MM_ACCEL_X86_SSE; + fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n"); -+ } -+ ++ } ++ if (state->samples == NULL) { free (state); return NULL; @@ -74,6 +87,7 @@ state->lfsr_state = 1; - + a52_imdct_init (mm_accel); + downmix_accel_init(mm_accel); - + return state; } @@ -141,7 +155,7 @@ state->acmod = acmod = buf[6] >> 5; - + a52_bitstream_set_ptr (state, buf + 6); - bitstream_get (state, 3); /* skip acmod we already parsed */ + bitstream_skip (state, 3); /* skip acmod we already parsed */ - + if ((acmod == 2) && (bitstream_get (state, 2) == 2)) /* dsurmod */ acmod = A52_DOLBY; @@ -172,28 +186,28 @@ - + chaninfo = !acmod; do { - bitstream_get (state, 5); /* dialnorm */ @@ -2416,27 +2416,27 @@ - bitstream_get (state, 7); /* mixlevel + roomtyp */ + bitstream_skip (state, 7); /* mixlevel + roomtyp */ } while (chaninfo--); - + - bitstream_get (state, 2); /* copyrightb + origbs */ + bitstream_skip (state, 2); /* copyrightb + origbs */ - + if (bitstream_get (state, 1)) /* timecod1e */ - bitstream_get (state, 14); /* timecod1 */ + bitstream_skip (state, 14); /* timecod1 */ if (bitstream_get (state, 1)) /* timecod2e */ - bitstream_get (state, 14); /* timecod2 */ + bitstream_skip (state, 14); /* timecod2 */ - + if (bitstream_get (state, 1)) { /* addbsie */ int addbsil; - + addbsil = bitstream_get (state, 6); do { - bitstream_get (state, 8); /* addbsi */ + bitstream_skip (state, 8); /* addbsi */ } while (addbsil--); } - + @@ -680,7 +694,7 @@ state->fbw_expbap[i].exp[0], state->fbw_expbap[i].exp + 1)) @@ -2453,10 +2453,10 @@ - bitstream_get (state, 8); + bitstream_skip (state, 8); } - + samples = state->samples; @@ -896,6 +910,10 @@ - + void a52_free (a52_state_t * state) { - free (state->samples); -- cgit v1.2.3