From 6e695dc64f0310745a32e2399a955fbf1368cd1a Mon Sep 17 00:00:00 2001 From: bircoph Date: Wed, 13 May 2009 15:22:13 +0000 Subject: Remove all kind of trailing whitespaces from all MPlayer's files. This affects all kind of spaces (' ',^I,^M,^L,...): actually [:space:] regex character set. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@29306 b3059339-0415-0410-9bf9-f77b7e298cf2 --- LICENSE | 10 +- liba52/bitstream.c | 2 +- liba52/bitstream.h | 8 +- liba52/crc.c | 12 +- liba52/downmix.c | 178 +++++++-------- liba52/imdct.c | 160 +++++++------- liba52/imdct_3dnow.h | 84 ++++---- liba52/liba52_changes.diff | 464 ++++++++++++++++++++-------------------- liba52/parse.c | 16 +- liba52/srfftp.h | 20 +- liba52/srfftp_3dnow.h | 6 +- liba52/test.c | 8 +- libfaad2/analysis.h | 8 +- libfaad2/bits.c | 8 +- libfaad2/cfft.h | 8 +- libfaad2/codebook/hcb.h | 8 +- libfaad2/codebook/hcb_1.h | 8 +- libfaad2/codebook/hcb_10.h | 8 +- libfaad2/codebook/hcb_11.h | 8 +- libfaad2/codebook/hcb_2.h | 8 +- libfaad2/codebook/hcb_3.h | 8 +- libfaad2/codebook/hcb_4.h | 8 +- libfaad2/codebook/hcb_5.h | 8 +- libfaad2/codebook/hcb_6.h | 8 +- libfaad2/codebook/hcb_7.h | 8 +- libfaad2/codebook/hcb_8.h | 8 +- libfaad2/codebook/hcb_9.h | 8 +- libfaad2/codebook/hcb_sf.h | 8 +- libfaad2/decoder.c | 4 +- libfaad2/decoder.h | 8 +- libfaad2/drc.c | 8 +- libfaad2/drc.h | 8 +- libfaad2/drm_dec.c | 240 ++++++++++----------- libfaad2/drm_dec.h | 26 +-- libfaad2/error.c | 8 +- libfaad2/error.h | 8 +- libfaad2/filtbank.c | 8 +- libfaad2/filtbank.h | 8 +- libfaad2/fixed.h | 8 +- libfaad2/hcr.c | 160 +++++++------- libfaad2/huffman.c | 20 +- libfaad2/huffman.h | 8 +- libfaad2/ic_predict.c | 10 +- libfaad2/ic_predict.h | 8 +- libfaad2/iq_table.h | 8 +- libfaad2/is.c | 8 +- libfaad2/is.h | 8 +- libfaad2/kbd_win.h | 8 +- libfaad2/local_changes.diff | 76 +++---- libfaad2/lt_predict.c | 8 +- libfaad2/lt_predict.h | 8 +- libfaad2/mdct.c | 10 +- libfaad2/mdct.h | 8 +- libfaad2/mdct_tab.h | 8 +- libfaad2/mp4.c | 8 +- libfaad2/mp4.h | 8 +- libfaad2/ms.c | 10 +- libfaad2/ms.h | 8 +- libfaad2/output.h | 8 +- libfaad2/pns.c | 8 +- libfaad2/pns.h | 8 +- libfaad2/ps_dec.c | 2 +- libfaad2/ps_dec.h | 8 +- libfaad2/pulse.c | 8 +- libfaad2/pulse.h | 8 +- libfaad2/rvlc.c | 10 +- libfaad2/rvlc.h | 8 +- libfaad2/sbr_dct.h | 8 +- libfaad2/sbr_dec.c | 10 +- libfaad2/sbr_e_nf.c | 8 +- libfaad2/sbr_e_nf.h | 8 +- libfaad2/sbr_fbt.c | 10 +- libfaad2/sbr_fbt.h | 8 +- libfaad2/sbr_hfadj.c | 52 ++--- libfaad2/sbr_hfadj.h | 8 +- libfaad2/sbr_hfgen.c | 10 +- libfaad2/sbr_hfgen.h | 8 +- libfaad2/sbr_huff.c | 8 +- libfaad2/sbr_huff.h | 8 +- libfaad2/sbr_noise.h | 8 +- libfaad2/sbr_qmf.h | 8 +- libfaad2/sbr_syntax.c | 8 +- libfaad2/sbr_syntax.h | 8 +- libfaad2/sbr_tf_grid.c | 8 +- libfaad2/sbr_tf_grid.h | 8 +- libfaad2/sine_win.h | 8 +- libfaad2/specrec.c | 6 +- libfaad2/specrec.h | 8 +- libfaad2/ssr.c | 8 +- libfaad2/ssr.h | 8 +- libfaad2/ssr_fb.c | 8 +- libfaad2/ssr_fb.h | 8 +- libfaad2/ssr_ipqf.c | 10 +- libfaad2/ssr_ipqf.h | 8 +- libfaad2/ssr_win.h | 8 +- libfaad2/syntax.c | 14 +- libfaad2/syntax.h | 8 +- libfaad2/tns.c | 8 +- libfaad2/tns.h | 10 +- libmpeg2/alpha_asm.h | 2 +- libmpeg2/cpu_accel.c | 4 +- libmpeg2/header.c | 6 +- libmpeg2/idct_alpha.c | 2 +- libmpeg2/libmpeg2_changes.diff | 108 +++++----- libmpeg2/slice.c | 2 +- tremor/asm_arm.h | 26 +-- tremor/backends.h | 6 +- tremor/bitwise.c | 66 +++--- tremor/block.c | 56 ++--- tremor/codebook.c | 34 +-- tremor/codebook.h | 14 +- tremor/codec_internal.h | 2 +- tremor/floor0.c | 62 +++--- tremor/floor1.c | 40 ++-- tremor/framing.c | 104 ++++----- tremor/info.c | 22 +- tremor/ivorbiscodec.h | 22 +- tremor/mapping0.c | 34 +-- tremor/mdct.c | 8 +- tremor/mdct_lookup.h | 6 +- tremor/misc.h | 10 +- tremor/ogg.h | 2 +- tremor/os.h | 2 +- tremor/os_types.h | 2 +- tremor/res012.c | 20 +- tremor/sharedbook.c | 42 ++-- tremor/synthesis.c | 8 +- tremor/tremor.diff | 40 ++-- tremor/window.c | 4 +- vidix/dhahelperwin/common.ver | 414 +++++++++++++++++------------------ vidix/dhahelperwin/dhahelper.rc | 34 +-- vidix/dhahelperwin/ntverp.h | 312 +++++++++++++-------------- 132 files changed, 1859 insertions(+), 1859 deletions(-) diff --git a/LICENSE b/LICENSE index 983f982a1f..3841ae92c6 100644 --- a/LICENSE +++ b/LICENSE @@ -55,7 +55,7 @@ patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. - + GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION @@ -110,7 +110,7 @@ above, provided that you also meet all of these conditions: License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) - + These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in @@ -168,7 +168,7 @@ access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. - + 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is @@ -225,7 +225,7 @@ impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. - + 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License @@ -278,7 +278,7 @@ PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS - + How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest diff --git a/liba52/bitstream.c b/liba52/bitstream.c index a46ccced6b..7307527194 100644 --- a/liba52/bitstream.c +++ b/liba52/bitstream.c @@ -99,7 +99,7 @@ int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits) if (num_bits != 0) result = (result << num_bits) | (state->current_word >> (32 - num_bits)); - + state->bits_left = 32 - num_bits; return result; diff --git a/liba52/bitstream.h b/liba52/bitstream.h index 8500212c7e..e894f16781 100644 --- a/liba52/bitstream.h +++ b/liba52/bitstream.h @@ -104,11 +104,11 @@ static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) result<<= (indx&0x07); result>>= 32 - num_bits; indx+= num_bits; - + return result; #else uint32_t result; - + if (num_bits < state->bits_left) { result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits); state->bits_left -= num_bits; @@ -136,11 +136,11 @@ static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) result<<= (indx&0x07); result>>= 32 - num_bits; indx+= num_bits; - + return result; #else int32_t result; - + if (num_bits < state->bits_left) { result = (((int32_t)state->current_word) << (32 - state->bits_left)) >> (32 - num_bits); state->bits_left -= num_bits; diff --git a/liba52/crc.c b/liba52/crc.c index d19a4a2e11..aa0a19c005 100644 --- a/liba52/crc.c +++ b/liba52/crc.c @@ -1,23 +1,23 @@ -/* +/* * crc.c * * Copyright (C) Aaron Holtzman - May 1999 * * This file is part of ac3dec, a free Dolby AC-3 stream decoder. - * + * * ac3dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. - * + * * ac3dec is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * */ @@ -25,7 +25,7 @@ #include #include -static const uint16_t crc_lut[256] = +static const uint16_t crc_lut[256] = { 0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011, 0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022, diff --git a/liba52/downmix.c b/liba52/downmix.c index c44317fd6f..212c87d921 100644 --- a/liba52/downmix.c +++ b/liba52/downmix.c @@ -62,7 +62,7 @@ void downmix_accel_init(uint32_t mm_accel) if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; #endif } - + int a52_downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev) { @@ -695,10 +695,10 @@ static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" - "addps (%1, %%"REG_S"), %%xmm0 \n\t" - "addps 16(%1, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "addps (%1, %%"REG_S"), %%xmm0 \n\t" + "addps 16(%1, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" "addps %%xmm7, %%xmm1 \n\t" "movaps %%xmm0, (%1, %%"REG_S") \n\t" @@ -718,9 +718,9 @@ static void mix3to1_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" - "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm1, %%xmm0 \n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -739,10 +739,10 @@ static void mix4to1_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" - "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" "addps %%xmm1, %%xmm0 \n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -761,12 +761,12 @@ static void mix5to1_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" - "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" - "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm1, %%xmm0 \n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" @@ -784,9 +784,9 @@ static void mix3to2_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" //common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -807,9 +807,9 @@ static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" //common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps (%1, %%"REG_S"), %%xmm2 \n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -831,7 +831,7 @@ static void mix21toS_SSE (sample_t * samples, sample_t bias) ASMALIGN(4) "1: \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm7, %%xmm2 \n\t" @@ -854,10 +854,10 @@ static void mix31to2_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" // common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -878,10 +878,10 @@ static void mix31toS_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround "addps %%xmm7, %%xmm0 \n\t" // common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -904,9 +904,9 @@ static void mix22toS_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm7, %%xmm2 \n\t" @@ -929,13 +929,13 @@ static void mix32to2_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" // common "movaps %%xmm0, %%xmm1 \n\t" // common - "addps (%0, %%"REG_S"), %%xmm0 \n\t" - "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" - "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "addps (%0, %%"REG_S"), %%xmm0 \n\t" + "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" "add $16, %%"REG_S" \n\t" @@ -953,16 +953,16 @@ static void mix32toS_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" - "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm0 \n\t" // common - "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" - "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" - "subps %%xmm2, %%xmm1 \n\t" - "addps %%xmm2, %%xmm3 \n\t" - "addps %%xmm0, %%xmm1 \n\t" - "addps %%xmm0, %%xmm3 \n\t" + "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" + "subps %%xmm2, %%xmm1 \n\t" + "addps %%xmm2, %%xmm3 \n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm3 \n\t" "movaps %%xmm1, (%0, %%"REG_S") \n\t" "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" "add $16, %%"REG_S" \n\t" @@ -980,8 +980,8 @@ static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" @@ -1229,13 +1229,13 @@ static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" "movq 24(%0, %%"REG_S"), %%mm3 \n\t" - "pfadd (%1, %%"REG_S"), %%mm0 \n\t" + "pfadd (%1, %%"REG_S"), %%mm0 \n\t" "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" - "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" + "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" @@ -1260,11 +1260,11 @@ static void mix3to1_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" "movq 1032(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" @@ -1287,13 +1287,13 @@ static void mix4to1_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" "movq 1032(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" @@ -1316,17 +1316,17 @@ static void mix5to1_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" "movq 1032(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" - "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm2, %%mm0 \n\t" "pfadd %%mm3, %%mm1 \n\t" @@ -1347,11 +1347,11 @@ static void mix3to2_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" //common "pfadd %%mm7, %%mm1 \n\t" //common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 2048(%0, %%"REG_S"), %%mm4\n\t" "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1378,11 +1378,11 @@ static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" "movq 1032(%1, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" //common "pfadd %%mm7, %%mm1 \n\t" //common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq (%1, %%"REG_S"), %%mm4 \n\t" "movq 8(%1, %%"REG_S"), %%mm5 \n\t" @@ -1411,7 +1411,7 @@ static void mix21toS_3dnow (sample_t * samples, sample_t bias) "1: \n\t" "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 1024(%0, %%"REG_S"), %%mm4\n\t" "movq 1032(%0, %%"REG_S"), %%mm5\n\t" @@ -1442,13 +1442,13 @@ static void mix31to2_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 2048(%0, %%"REG_S"), %%mm4\n\t" "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1475,11 +1475,11 @@ static void mix31toS_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 2048(%0, %%"REG_S"), %%mm4\n\t" "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1512,11 +1512,11 @@ static void mix22toS_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 2048(%0, %%"REG_S"), %%mm0\n\t" + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" "movq 2056(%0, %%"REG_S"), %%mm1\n\t" "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 1024(%0, %%"REG_S"), %%mm4\n\t" "movq 1032(%0, %%"REG_S"), %%mm5\n\t" @@ -1547,19 +1547,19 @@ static void mix32to2_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common "movq %%mm0, %%mm2 \n\t" // common "movq %%mm1, %%mm3 \n\t" // common - "pfadd (%0, %%"REG_S"), %%mm0 \n\t" + "pfadd (%0, %%"REG_S"), %%mm0 \n\t" "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" "movq %%mm0, (%0, %%"REG_S") \n\t" "movq %%mm1, 8(%0, %%"REG_S") \n\t" @@ -1581,25 +1581,25 @@ static void mix32toS_3dnow (sample_t * samples, sample_t bias) "1: \n\t" "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" - "movq 3072(%0, %%"REG_S"), %%mm4\n\t" + "movq 3072(%0, %%"REG_S"), %%mm4\n\t" "movq 3080(%0, %%"REG_S"), %%mm5\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround + "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" - "movq 2048(%0, %%"REG_S"), %%mm6\n\t" + "movq 2048(%0, %%"REG_S"), %%mm6\n\t" "movq 2056(%0, %%"REG_S"), %%mm7\n\t" - "pfsub %%mm4, %%mm2 \n\t" + "pfsub %%mm4, %%mm2 \n\t" "pfsub %%mm5, %%mm3 \n\t" - "pfadd %%mm4, %%mm6 \n\t" + "pfadd %%mm4, %%mm6 \n\t" "pfadd %%mm5, %%mm7 \n\t" - "pfadd %%mm0, %%mm2 \n\t" + "pfadd %%mm0, %%mm2 \n\t" "pfadd %%mm1, %%mm3 \n\t" - "pfadd %%mm0, %%mm6 \n\t" + "pfadd %%mm0, %%mm6 \n\t" "pfadd %%mm1, %%mm7 \n\t" "movq %%mm2, (%0, %%"REG_S") \n\t" "movq %%mm3, 8(%0, %%"REG_S") \n\t" @@ -1620,9 +1620,9 @@ static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" "movq 24(%0, %%"REG_S"), %%mm3 \n\t" "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" diff --git a/liba52/imdct.c b/liba52/imdct.c index b813345537..089fa0acae 100644 --- a/liba52/imdct.c +++ b/liba52/imdct.c @@ -72,24 +72,24 @@ static const int pm128[128] attribute_used __attribute__((aligned(16))) = 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 -}; +}; static uint8_t attribute_used bit_reverse_512[] = { - 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, - 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, - 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, - 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, - 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, - 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, - 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, - 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, - 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, - 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, - 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, - 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, - 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, - 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, - 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, + 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, + 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, + 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, + 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, + 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, + 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, + 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, + 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, + 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, + 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, + 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, + 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, + 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, + 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, + 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f}; static uint8_t fftorder[] = { @@ -120,8 +120,8 @@ static sample_t __attribute__((aligned(16))) xcos1[128]; static sample_t __attribute__((aligned(16))) xsin1[128]; #if ARCH_X86 || ARCH_X86_64 -// NOTE: SSE needs 16byte alignment or it will segfault -// +// NOTE: SSE needs 16byte alignment or it will segfault +// static float __attribute__((aligned(16))) sseSinCos1c[256]; static float __attribute__((aligned(16))) sseSinCos1d[256]; static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1}; @@ -328,7 +328,7 @@ void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; const sample_t * window = a52_imdct_window; complex_t buf[128]; - + for (i = 0; i < 128; i++) { k = fftorder[i]; t_r = pre1[i].real; @@ -417,17 +417,17 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) sample_t *data_ptr; sample_t *delay_ptr; sample_t *window_ptr; - + /* 512 IMDCT with source and dest data in 'data' */ - + /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ for( i=0; i < 128; i++) { - /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ + /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ int j= bit_reverse_512[i]; buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); } - + /* 1. iteration */ for(i = 0; i < 128; i += 2) { #if 0 @@ -440,7 +440,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) buf[i+1].real = tmp_a_r - tmp_b_r; buf[i+1].imag = tmp_a_i - tmp_b_i; #else - vector float temp, bufv; + vector float temp, bufv; bufv = vec_ld(i << 3, (float*)buf); temp = vec_perm(bufv, bufv, vcprm(2,3,0,1)); @@ -448,7 +448,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) vec_st(bufv, i << 3, (float*)buf); #endif } - + /* 2. iteration */ // Note w[1]={{1,0}, {0,-1}} for(i = 0; i < 128; i += 4) { @@ -472,7 +472,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) buf[i+3].imag = tmp_a_i + tmp_b_i; #else vector float buf01, buf23, temp1, temp2; - + buf01 = vec_ld((i + 0) << 3, (float*)buf); buf23 = vec_ld((i + 2) << 3, (float*)buf); buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2)); @@ -540,14 +540,14 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) buf45 = vec_ld((i + 4) << 3, (float*)buf); buf67 = vec_ld((i + 6) << 3, (float*)buf); buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3)); - + vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf); vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf); vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf); vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf); #endif } - + /* 4-7. iterations */ for (m=3; m < 7; m++) { two_m = (1 << m); @@ -600,10 +600,10 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) vecq = vec_madd(temp4, vcii(n,p,n,p), temp3); // then butterfly with buf[p] and buf[p+1] vecp = vec_ld(p << 3, (float*)buf); - + temp1 = vec_add(vecp, vecq); temp2 = vec_sub(vecp, vecq); - + vec_st(temp1, p << 3, (float*)buf); vec_st(temp2, q << 3, (float*)buf); #endif @@ -660,7 +660,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1)); temp2 = vec_madd(temp1133, tempCS01, vczero); bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1); - + vec_st(bufv_0, (i + 0) << 3, (float*)buf); /* idem with bufv_2 and high-order cosv/sinv */ @@ -674,36 +674,36 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1); vec_st(bufv_2, (i + 2) << 3, (float*)buf); - + #endif } - + data_ptr = data; delay_ptr = delay; window_ptr = a52_imdct_window; /* Window and convert to real valued signal */ - for(i=0; i< 64; i++) { - *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; - *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; + *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; } - - for(i=0; i< 64; i++) { - *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; - *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; + + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; + *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; } - + /* The trailing edge of the window goes into the delay line */ delay_ptr = delay; - for(i=0; i< 64; i++) { - *delay_ptr++ = -buf[64+i].real * *--window_ptr; - *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf[64+i].real * *--window_ptr; + *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; } - + for(i=0; i<64; i++) { - *delay_ptr++ = buf[i].imag * *--window_ptr; - *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; + *delay_ptr++ = buf[i].imag * *--window_ptr; + *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; } } #endif @@ -716,8 +716,8 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) #define HAVE_AMD3DNOW 1 #include "srfftp_3dnow.h" -const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; -const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; +const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; +const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 }; #undef HAVE_AMD3DNOWEXT @@ -746,9 +746,9 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) sample_t *data_ptr; sample_t *delay_ptr; sample_t *window_ptr; - + /* 512 IMDCT with source and dest data in 'data' */ - /* see the c version (dct_do_512()), its allmost identical, just in C */ + /* see the c version (dct_do_512()), its allmost identical, just in C */ /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ /* Bit reversed shuffling */ @@ -809,7 +809,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) } } */ - + /* 1. iteration */ // Note w[0][0]={1,0} __asm__ volatile( @@ -831,7 +831,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) :: "g" (buf), "r" (buf + 128) : "%"REG_S ); - + /* 2. iteration */ // Note w[1]={{1,0}, {0,-1}} __asm__ volatile( @@ -863,8 +863,8 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) */ __asm__ volatile( - "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" - "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" + "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" + "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" "xorps %%xmm5, %%xmm5 \n\t" "xorps %%xmm2, %%xmm2 \n\t" "mov %0, %%"REG_S" \n\t" @@ -890,10 +890,10 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "addps %%xmm1, %%xmm3 \n\t" "subps %%xmm4, %%xmm0 \n\t" "subps %%xmm5, %%xmm1 \n\t" - "movaps %%xmm2, (%%"REG_S") \n\t" - "movaps %%xmm3, 16(%%"REG_S") \n\t" - "movaps %%xmm0, 32(%%"REG_S") \n\t" - "movaps %%xmm1, 48(%%"REG_S") \n\t" + "movaps %%xmm2, (%%"REG_S") \n\t" + "movaps %%xmm3, 16(%%"REG_S") \n\t" + "movaps %%xmm0, 32(%%"REG_S") \n\t" + "movaps %%xmm1, 48(%%"REG_S") \n\t" "add $64, %%"REG_S" \n\t" "cmp %1, %%"REG_S" \n\t" " jb 1b \n\t" @@ -927,7 +927,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" "add $16, %%"REG_D" \n\t" - "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 "jb 2b \n\t" "add %2, %%"REG_S" \n\t" "cmp %1, %%"REG_S" \n\t" @@ -954,9 +954,9 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) " jnz 1b \n\t" :: "r" (buf+128) : "%"REG_S - ); + ); + - data_ptr = data; delay_ptr = delay; window_ptr = a52_imdct_window; @@ -980,7 +980,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias) : "%"REG_S, "%"REG_D @@ -988,7 +988,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) data_ptr+=128; delay_ptr+=128; // window_ptr+=128; - + __asm__ volatile( "mov $1024, %%"REG_D" \n\t" // 512 "xor %%"REG_S", %%"REG_S" \n\t" // 0 @@ -1007,7 +1007,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias) : "%"REG_S, "%"REG_D @@ -1025,21 +1025,21 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "1: \n\t" "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C - "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C - "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf+64), "r" (delay_ptr) : "%"REG_S, "%"REG_D ); delay_ptr+=128; // window_ptr-=128; - + __asm__ volatile( "mov $1024, %%"REG_D" \n\t" // 1024 "xor %%"REG_S", %%"REG_S" \n\t" // 0 @@ -1047,14 +1047,14 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "1: \n\t" "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? - "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? - "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf), "r" (delay_ptr) : "%"REG_S, "%"REG_D @@ -1088,7 +1088,7 @@ void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) /* Post IFFT complex multiply */ /* Window and convert to real valued signal */ for (i = 0; i < 32; i++) { - /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ + /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ t_r = post2[i].real; t_i = post2[i].imag; @@ -1209,12 +1209,12 @@ void a52_imdct_init (uint32_t mm_accel) sseSinCos1c[2*i+0]= xcos1[i]; sseSinCos1c[2*i+1]= -xcos1[i]; sseSinCos1d[2*i+0]= xsin1[i]; - sseSinCos1d[2*i+1]= xsin1[i]; + sseSinCos1d[2*i+1]= xsin1[i]; } for (i = 1; i < 7; i++) { j = 1 << i; for (k = 0; k < j; k+=2) { - + sseW[i][4*k + 0] = w[i][k+0].real; sseW[i][4*k + 1] = w[i][k+0].real; sseW[i][4*k + 2] = w[i][k+1].real; @@ -1223,15 +1223,15 @@ void a52_imdct_init (uint32_t mm_accel) sseW[i][4*k + 4] = -w[i][k+0].imag; sseW[i][4*k + 5] = w[i][k+0].imag; sseW[i][4*k + 6] = -w[i][k+1].imag; - sseW[i][4*k + 7] = w[i][k+1].imag; - + sseW[i][4*k + 7] = w[i][k+1].imag; + //we multiply more or less uninitalized numbers so we need to use exactly 0.0 if(k==0) { // sseW[i][4*k + 0]= sseW[i][4*k + 1]= 1.0; sseW[i][4*k + 4]= sseW[i][4*k + 5]= 0.0; } - + if(2*k == j) { sseW[i][4*k + 0]= sseW[i][4*k + 1]= 0.0; @@ -1243,9 +1243,9 @@ void a52_imdct_init (uint32_t mm_accel) for(i=0; i<128; i++) { sseWindow[2*i+0]= -a52_imdct_window[2*i+0]; - sseWindow[2*i+1]= a52_imdct_window[2*i+1]; + sseWindow[2*i+1]= a52_imdct_window[2*i+1]; } - + for(i=0; i<64; i++) { sseWindow[256 + 2*i+0]= -a52_imdct_window[254 - 2*i+1]; diff --git a/liba52/imdct_3dnow.h b/liba52/imdct_3dnow.h index 1c13f06870..e8a91d11a4 100644 --- a/liba52/imdct_3dnow.h +++ b/liba52/imdct_3dnow.h @@ -43,7 +43,7 @@ static void FFT_4_3DNOW(complex_t *x) { /* delta_p = 1 here */ - /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} + /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} */ __asm__ volatile( "movq 24(%1), %%mm3\n\t" @@ -86,10 +86,10 @@ static void FFT_4_3DNOW(complex_t *x) static void FFT_8_3DNOW(complex_t *x) { /* delta_p = diag{1, sqrt(i)} here */ - /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8} + /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8} */ complex_t wT1, wB1, wB2; - + __asm__ volatile( "movq 8(%2), %%mm0\n\t" "movq 24(%2), %%mm1\n\t" @@ -111,9 +111,9 @@ static void FFT_8_3DNOW(complex_t *x) :"memory"); fft_4_3dnow(&x[0]); - + /* x[0] x[4] x[2] x[6] */ - + __asm__ volatile( "movq 40(%1), %%mm0\n\t" "movq %%mm0, %%mm3\n\t" @@ -151,7 +151,7 @@ static void FFT_8_3DNOW(complex_t *x) :"=r"(x) :"0"(x), "r"(&wT1), "r"(&wB1) :"memory"); - + /* x[1] x[5] */ __asm__ volatile ( "movq %6, %%mm6\n\t" @@ -180,7 +180,7 @@ static void FFT_8_3DNOW(complex_t *x) "pxor %%mm6, %%mm1\n\t" "pfacc %%mm1, %%mm0\n\t" "pfmul %4, %%mm0\n\t" - + "movq 40(%3), %%mm5\n\t" #if HAVE_AMD3DNOWEXT "pswapd %%mm5, %%mm5\n\t" @@ -189,7 +189,7 @@ static void FFT_8_3DNOW(complex_t *x) "punpckhdq %%mm1, %%mm5\n\t" #endif "movq %%mm5, %0\n\t" - + "movq 8(%3), %%mm1\n\t" "movq %%mm1, %%mm2\n\t" "pfsub %%mm0, %%mm1\n\t" @@ -197,7 +197,7 @@ static void FFT_8_3DNOW(complex_t *x) "movq %%mm1, 40(%3)\n\t" "movq %%mm2, 8(%3)\n\t" :"=m"(wB2) - :"m"(wT1), "m"(wB1), "r"(x), "m"(HSQRT2_3DNOW), + :"m"(wT1), "m"(wB1), "r"(x), "m"(HSQRT2_3DNOW), "m"(x_plus_minus_3dnow), "m"(x_minus_plus_3dnow) :"memory"); @@ -212,7 +212,7 @@ static void FFT_8_3DNOW(complex_t *x) "punpckldq %%mm1, %%mm2\n\t" "punpckhdq %%mm2, %%mm1\n\t" #endif - "pxor %%mm6, %%mm1\n\t" + "pxor %%mm6, %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movq %2, %%mm2\n\t" "movq 56(%4), %%mm3\n\t" @@ -253,10 +253,10 @@ static void FFT_ASMB_3DNOW(int k, complex_t *x, complex_t *wTB, x3k = x2k + 2 * k; x4k = x3k + 2 * k; wB = wTB + 2 * k; - + TRANSZERO_3DNOW(x[0],x2k[0],x3k[0],x4k[0]); TRANS_3DNOW(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]); - + --k; for(;;) { TRANS_3DNOW(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]); @@ -271,7 +271,7 @@ static void FFT_ASMB_3DNOW(int k, complex_t *x, complex_t *wTB, wTB += 2; wB += 2; } - + } void FFT_ASMB16_3DNOW(complex_t *x, complex_t *wTB) @@ -291,13 +291,13 @@ void FFT_ASMB16_3DNOW(complex_t *x, complex_t *wTB) /* transform x[3], x[11], x[7], x[15] */ TRANS_3DNOW(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]); -} +} static void FFT_128P_3DNOW(complex_t *a) { FFT_8_3DNOW(&a[0]); FFT_4_3DNOW(&a[8]); FFT_4_3DNOW(&a[12]); FFT_ASMB16_3DNOW(&a[0], &a[8]); - + FFT_8_3DNOW(&a[16]), FFT_8_3DNOW(&a[24]); FFT_ASMB_3DNOW(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); @@ -314,7 +314,7 @@ static void FFT_128P_3DNOW(complex_t *a) FFT_ASMB16_3DNOW(&a[64], &a[72]); FFT_8_3DNOW(&a[80]); FFT_8_3DNOW(&a[88]); - + /* FFT_32(&a[64]); */ FFT_ASMB_3DNOW(4, &a[64], &a[80],&delta32[0], &delta32_3[0]); @@ -325,7 +325,7 @@ static void FFT_128P_3DNOW(complex_t *a) FFT_8_3DNOW(&a[112]), FFT_8_3DNOW(&a[120]); /* FFT_32(&a[96]); */ FFT_ASMB_3DNOW(4, &a[96], &a[112], &delta32[0], &delta32_3[0]); - + /* FFT_128(&a[0]); */ FFT_ASMB_3DNOW(16, &a[0], &a[64], &delta128[0], &delta128_3[0]); } @@ -353,9 +353,9 @@ imdct_do_512_3dnow sample_t *data_ptr; sample_t *delay_ptr; sample_t *window_ptr; - + /* 512 IMDCT with source and dest data in 'data' */ - + /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ #if 1 __asm__ volatile ( @@ -396,7 +396,7 @@ imdct_do_512_3dnow #else __asm__ volatile ("femms":::"memory"); for( i=0; i < 128; i++) { - /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ + /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ int j= pm128[i]; buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); @@ -432,9 +432,9 @@ imdct_do_512_3dnow FFT_128P_3DNOW (&buf[0]); // __asm__ volatile ("femms \n\t":::"memory"); - + /* Post IFFT complex multiply plus IFFT complex conjugate*/ -#if 1 +#if 1 __asm__ volatile ( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" @@ -448,9 +448,9 @@ imdct_do_512_3dnow #if !HAVE_AMD3DNOWEXT "punpckldq %%mm1, %%mm2\n\t" "punpckhdq %%mm2, %%mm1\n\t" -#else +#else "pswapd %%mm1, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */ -#endif +#endif "movd %3, %%mm3\n\t" /* ac3_xsin[i] */ "punpckldq %2, %%mm3\n\t" /* ac3_xsin[i] | ac3_xcos[i] */ "pfmul %%mm3, %%mm0\n\t" @@ -472,7 +472,7 @@ imdct_do_512_3dnow /* ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i]) + (tmp_a_i * ac3_xsin1[i]); ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i]) - (tmp_a_i * ac3_xcos1[i]);*/ } -#else +#else __asm__ volatile ("femms":::"memory"); for( i=0; i < 128; i++) { /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ @@ -482,7 +482,7 @@ imdct_do_512_3dnow buf[i].imag =(tmp_a_r * xsin1[i]) + (tmp_a_i * xcos1[i]); } #endif - + data_ptr = data; delay_ptr = delay; window_ptr = a52_imdct_window; @@ -519,16 +519,16 @@ imdct_do_512_3dnow delay_ptr += 2; } window_ptr += 128; -#else +#else __asm__ volatile ("femms":::"memory"); - for(i=0; i< 64; i++) { - *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; - *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; + *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; } - - for(i=0; i< 64; i++) { - *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; - *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; + + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; + *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; } #endif @@ -566,16 +566,16 @@ imdct_do_512_3dnow delay_ptr += 2; } __asm__ volatile ("femms":::"memory"); -#else +#else __asm__ volatile ("femms":::"memory"); - for(i=0; i< 64; i++) { - *delay_ptr++ = -buf[64+i].real * *--window_ptr; - *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf[64+i].real * *--window_ptr; + *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; } - + for(i=0; i<64; i++) { - *delay_ptr++ = buf[i].imag * *--window_ptr; - *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; + *delay_ptr++ = buf[i].imag * *--window_ptr; + *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; } -#endif +#endif } diff --git a/liba52/liba52_changes.diff b/liba52/liba52_changes.diff index f00bb444d2..1b3574f043 100644 --- a/liba52/liba52_changes.diff +++ b/liba52/liba52_changes.diff @@ -3,7 +3,7 @@ @@ -59,4 +66,9 @@ int a52_block (a52_state_t * state); void a52_free (a52_state_t * state); - + +void* a52_resample_init(uint32_t mm_accel,int flags,int chans); +extern int (* a52_resample) (float * _f, int16_t * s16); + @@ -15,7 +15,7 @@ @@ -103,18 +107,34 @@ #define DELTA_BIT_NONE (2) #define DELTA_BIT_RESERVED (3) - + +#if ARCH_X86_64 +# define REG_a "rax" +# define REG_d "rdx" @@ -33,7 +33,7 @@ void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, int start, int end, int fastleak, int slowleak, expbap_t * expbap); - + int a52_downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev); +void downmix_accel_init(uint32_t mm_accel); @@ -44,7 +44,7 @@ sample_t clev, sample_t slev); -void a52_upmix (sample_t * samples, int acmod, int output); +extern void (*a52_upmix) (sample_t * samples, int acmod, int output); - + void a52_imdct_init (uint32_t mm_accel); void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias); @@ -53,9 +53,9 @@ --- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200 +++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200 @@ -31,6 +35,10 @@ - + #define BUFFER_SIZE 4096 - + +#ifdef ALT_BITSTREAM_READER +int indx=0; +#endif @@ -72,13 +72,13 @@ +#endif bitstream_get (state, align * 8); } - + --- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200 +++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200 @@ -21,6 +25,42 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - + +/* code from ffmpeg/libavcodec */ +#if defined(__sparc__) || defined(hpux) +/* @@ -117,20 +117,20 @@ + /* (stolen from the kernel) */ #ifdef WORDS_BIGENDIAN - + @@ -28,7 +68,7 @@ - + #else - + -# if 0 && defined (__i386__) +# if defined (__i386__) - + # define swab32(x) __i386_swab32(x) static inline const uint32_t __i386_swab32(uint32_t x) @@ -39,19 +79,34 @@ - + # else - + -# define swab32(x)\ -((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \ - (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])) @@ -143,7 +143,7 @@ + } # endif #endif - + +#ifdef ALT_BITSTREAM_READER +extern int indx; +#endif @@ -151,7 +151,7 @@ void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf); uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits); int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits); - + static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) { +#ifdef ALT_BITSTREAM_READER @@ -160,15 +160,15 @@ + result<<= (indx&0x07); + result>>= 32 - num_bits; + indx+= num_bits; -+ ++ + return result; +#else uint32_t result; - + if (num_bits < state->bits_left) { @@ -61,10 +116,29 @@ } - + return a52_bitstream_get_bh (state, num_bits); +#endif +} @@ -181,7 +181,7 @@ + bitstream_get(state, num_bits); +#endif } - + static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) { +#ifdef ALT_BITSTREAM_READER @@ -190,15 +190,15 @@ + result<<= (indx&0x07); + result>>= 32 - num_bits; + indx+= num_bits; -+ ++ + return result; +#else int32_t result; - + if (num_bits < state->bits_left) { @@ -74,4 +148,5 @@ } - + return a52_bitstream_get_bh_2 (state, num_bits); +#endif } @@ -211,18 +211,18 @@ + * + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) */ - + #include "config.h" - + #include #include - + #include "a52.h" #include "a52_internal.h" +#include "mm_accel.h" - + #define CONVERT(acmod,output) (((output) << 3) + (acmod)) - + + +void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev)= NULL; @@ -247,14 +247,14 @@ + if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; +#endif +} -+ ++ int a52_downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev) { @@ -447,7 +479,7 @@ samples[i] = 0; } - + -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, +void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, sample_t clev, sample_t slev) @@ -262,28 +262,28 @@ switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { @@ -559,7 +591,7 @@ break; - + case CONVERT (A52_3F2R, A52_2F1R): - mix3to2 (samples, bias); + mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) move2to1 (samples + 768, samples + 512, bias); break; - + @@ -583,12 +615,12 @@ break; - + case CONVERT (A52_3F1R, A52_3F2R): - memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); break; } } - + -void a52_upmix (sample_t * samples, int acmod, int output) +void upmix_C (sample_t * samples, int acmod, int output) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { - + @@ -653,3 +685,1104 @@ goto mix_31to21; } @@ -298,10 +298,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps (%1, %%"REG_S"), %%xmm0 \n\t" -+ "addps 16(%1, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps (%1, %%"REG_S"), %%xmm0 \n\t" ++ "addps 16(%1, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" + "addps %%xmm7, %%xmm1 \n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" @@ -321,9 +321,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm1, %%xmm0 \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -342,10 +342,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" + "addps %%xmm1, %%xmm0 \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -364,12 +364,12 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" -+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm1, %%xmm0 \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" @@ -387,9 +387,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" //common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -410,9 +410,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" //common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%1, %%"REG_S"), %%xmm2 \n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -434,7 +434,7 @@ + ASMALIGN(4) + "1: \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm7, %%xmm2 \n\t" @@ -457,10 +457,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -481,10 +481,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround + "addps %%xmm7, %%xmm0 \n\t" // common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -507,9 +507,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm7, %%xmm2 \n\t" @@ -532,13 +532,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common + "movaps %%xmm0, %%xmm1 \n\t" // common -+ "addps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" @@ -556,16 +556,16 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common -+ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" -+ "subps %%xmm2, %%xmm1 \n\t" -+ "addps %%xmm2, %%xmm3 \n\t" -+ "addps %%xmm0, %%xmm1 \n\t" -+ "addps %%xmm0, %%xmm3 \n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" ++ "subps %%xmm2, %%xmm1 \n\t" ++ "addps %%xmm2, %%xmm3 \n\t" ++ "addps %%xmm0, %%xmm1 \n\t" ++ "addps %%xmm0, %%xmm3 \n\t" + "movaps %%xmm1, (%0, %%"REG_S") \n\t" + "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" @@ -583,8 +583,8 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" @@ -832,13 +832,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" -+ "pfadd (%1, %%"REG_S"), %%mm0 \n\t" ++ "pfadd (%1, %%"REG_S"), %%mm0 \n\t" + "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" -+ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" ++ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" + "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" @@ -863,11 +863,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" @@ -890,13 +890,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" @@ -919,17 +919,17 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" -+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm2, %%mm0 \n\t" + "pfadd %%m