diff options
author | bircoph <bircoph@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2009-05-13 15:22:13 +0000 |
---|---|---|
committer | bircoph <bircoph@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2009-05-13 15:22:13 +0000 |
commit | 6e695dc64f0310745a32e2399a955fbf1368cd1a (patch) | |
tree | b2b839f1541449a8a441ae918473ac64c0ae5293 /liba52 | |
parent | 6e9cbdc10448203e7c8b2de41447442fcc9f7bae (diff) | |
download | mpv-6e695dc64f0310745a32e2399a955fbf1368cd1a.tar.bz2 mpv-6e695dc64f0310745a32e2399a955fbf1368cd1a.tar.xz |
Remove all kind of trailing whitespaces from all MPlayer's files.
This affects all kind of spaces (' ',^I,^M,^L,...): actually
[:space:] regex character set.
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@29306 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'liba52')
-rw-r--r-- | liba52/bitstream.c | 2 | ||||
-rw-r--r-- | liba52/bitstream.h | 8 | ||||
-rw-r--r-- | liba52/crc.c | 12 | ||||
-rw-r--r-- | liba52/downmix.c | 178 | ||||
-rw-r--r-- | liba52/imdct.c | 160 | ||||
-rw-r--r-- | liba52/imdct_3dnow.h | 84 | ||||
-rw-r--r-- | liba52/liba52_changes.diff | 464 | ||||
-rw-r--r-- | liba52/parse.c | 16 | ||||
-rw-r--r-- | liba52/srfftp.h | 20 | ||||
-rw-r--r-- | liba52/srfftp_3dnow.h | 6 | ||||
-rw-r--r-- | liba52/test.c | 8 |
11 files changed, 479 insertions, 479 deletions
diff --git a/liba52/bitstream.c b/liba52/bitstream.c index a46ccced6b..7307527194 100644 --- a/liba52/bitstream.c +++ b/liba52/bitstream.c @@ -99,7 +99,7 @@ int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits) if (num_bits != 0) result = (result << num_bits) | (state->current_word >> (32 - num_bits)); - + state->bits_left = 32 - num_bits; return result; diff --git a/liba52/bitstream.h b/liba52/bitstream.h index 8500212c7e..e894f16781 100644 --- a/liba52/bitstream.h +++ b/liba52/bitstream.h @@ -104,11 +104,11 @@ static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) result<<= (indx&0x07); result>>= 32 - num_bits; indx+= num_bits; - + return result; #else uint32_t result; - + if (num_bits < state->bits_left) { result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits); state->bits_left -= num_bits; @@ -136,11 +136,11 @@ static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) result<<= (indx&0x07); result>>= 32 - num_bits; indx+= num_bits; - + return result; #else int32_t result; - + if (num_bits < state->bits_left) { result = (((int32_t)state->current_word) << (32 - state->bits_left)) >> (32 - num_bits); state->bits_left -= num_bits; diff --git a/liba52/crc.c b/liba52/crc.c index d19a4a2e11..aa0a19c005 100644 --- a/liba52/crc.c +++ b/liba52/crc.c @@ -1,23 +1,23 @@ -/* +/* * crc.c * * Copyright (C) Aaron Holtzman - May 1999 * * This file is part of ac3dec, a free Dolby AC-3 stream decoder. - * + * * ac3dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. - * + * * ac3dec is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * */ @@ -25,7 +25,7 @@ #include <stdio.h> #include <inttypes.h> -static const uint16_t crc_lut[256] = +static const uint16_t crc_lut[256] = { 0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011, 0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022, diff --git a/liba52/downmix.c b/liba52/downmix.c index c44317fd6f..212c87d921 100644 --- a/liba52/downmix.c +++ b/liba52/downmix.c @@ -62,7 +62,7 @@ void downmix_accel_init(uint32_t mm_accel) if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; #endif } - + int a52_downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev) { @@ -695,10 +695,10 @@ static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" - "addps (%1, %%"REG_S"), %%xmm0 \n\t" - "addps 16(%1, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "addps (%1, %%"REG_S"), %%xmm0 \n\t" + "addps 16(%1, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" "addps %%xmm7, %%xmm1 \n\t" "movaps %%xmm0, (%1, %%"REG_S") \n\t" @@ -718,9 +718,9 @@ static void mix3to1_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" - "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm1, %%xmm0 \n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -739,10 +739,10 @@ static void mix4to1_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" - "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" "addps %%xmm1, %%xmm0 \n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -761,12 +761,12 @@ static void mix5to1_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" - "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" - "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm1, %%xmm0 \n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" @@ -784,9 +784,9 @@ static void mix3to2_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" //common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -807,9 +807,9 @@ static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" //common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps (%1, %%"REG_S"), %%xmm2 \n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -831,7 +831,7 @@ static void mix21toS_SSE (sample_t * samples, sample_t bias) ASMALIGN(4) "1: \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm7, %%xmm2 \n\t" @@ -854,10 +854,10 @@ static void mix31to2_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" // common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -878,10 +878,10 @@ static void mix31toS_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround "addps %%xmm7, %%xmm0 \n\t" // common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -904,9 +904,9 @@ static void mix22toS_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm7, %%xmm2 \n\t" @@ -929,13 +929,13 @@ static void mix32to2_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" // common "movaps %%xmm0, %%xmm1 \n\t" // common - "addps (%0, %%"REG_S"), %%xmm0 \n\t" - "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" - "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "addps (%0, %%"REG_S"), %%xmm0 \n\t" + "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" "add $16, %%"REG_S" \n\t" @@ -953,16 +953,16 @@ static void mix32toS_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" - "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm0 \n\t" // common - "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" - "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" - "subps %%xmm2, %%xmm1 \n\t" - "addps %%xmm2, %%xmm3 \n\t" - "addps %%xmm0, %%xmm1 \n\t" - "addps %%xmm0, %%xmm3 \n\t" + "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" + "subps %%xmm2, %%xmm1 \n\t" + "addps %%xmm2, %%xmm3 \n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm3 \n\t" "movaps %%xmm1, (%0, %%"REG_S") \n\t" "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" "add $16, %%"REG_S" \n\t" @@ -980,8 +980,8 @@ static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" @@ -1229,13 +1229,13 @@ static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" "movq 24(%0, %%"REG_S"), %%mm3 \n\t" - "pfadd (%1, %%"REG_S"), %%mm0 \n\t" + "pfadd (%1, %%"REG_S"), %%mm0 \n\t" "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" - "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" + "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" @@ -1260,11 +1260,11 @@ static void mix3to1_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" "movq 1032(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" @@ -1287,13 +1287,13 @@ static void mix4to1_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" "movq 1032(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" @@ -1316,17 +1316,17 @@ static void mix5to1_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" "movq 1032(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" - "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm2, %%mm0 \n\t" "pfadd %%mm3, %%mm1 \n\t" @@ -1347,11 +1347,11 @@ static void mix3to2_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" //common "pfadd %%mm7, %%mm1 \n\t" //common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 2048(%0, %%"REG_S"), %%mm4\n\t" "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1378,11 +1378,11 @@ static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" "movq 1032(%1, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" //common "pfadd %%mm7, %%mm1 \n\t" //common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq (%1, %%"REG_S"), %%mm4 \n\t" "movq 8(%1, %%"REG_S"), %%mm5 \n\t" @@ -1411,7 +1411,7 @@ static void mix21toS_3dnow (sample_t * samples, sample_t bias) "1: \n\t" "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 1024(%0, %%"REG_S"), %%mm4\n\t" "movq 1032(%0, %%"REG_S"), %%mm5\n\t" @@ -1442,13 +1442,13 @@ static void mix31to2_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 2048(%0, %%"REG_S"), %%mm4\n\t" "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1475,11 +1475,11 @@ static void mix31toS_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 2048(%0, %%"REG_S"), %%mm4\n\t" "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1512,11 +1512,11 @@ static void mix22toS_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 2048(%0, %%"REG_S"), %%mm0\n\t" + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" "movq 2056(%0, %%"REG_S"), %%mm1\n\t" "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 1024(%0, %%"REG_S"), %%mm4\n\t" "movq 1032(%0, %%"REG_S"), %%mm5\n\t" @@ -1547,19 +1547,19 @@ static void mix32to2_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common "movq %%mm0, %%mm2 \n\t" // common "movq %%mm1, %%mm3 \n\t" // common - "pfadd (%0, %%"REG_S"), %%mm0 \n\t" + "pfadd (%0, %%"REG_S"), %%mm0 \n\t" "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" "movq %%mm0, (%0, %%"REG_S") \n\t" "movq %%mm1, 8(%0, %%"REG_S") \n\t" @@ -1581,25 +1581,25 @@ static void mix32toS_3dnow (sample_t * samples, sample_t bias) "1: \n\t" "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" - "movq 3072(%0, %%"REG_S"), %%mm4\n\t" + "movq 3072(%0, %%"REG_S"), %%mm4\n\t" "movq 3080(%0, %%"REG_S"), %%mm5\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround + "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" - "movq 2048(%0, %%"REG_S"), %%mm6\n\t" + "movq 2048(%0, %%"REG_S"), %%mm6\n\t" "movq 2056(%0, %%"REG_S"), %%mm7\n\t" - "pfsub %%mm4, %%mm2 \n\t" + "pfsub %%mm4, %%mm2 \n\t" "pfsub %%mm5, %%mm3 \n\t" - "pfadd %%mm4, %%mm6 \n\t" + "pfadd %%mm4, %%mm6 \n\t" "pfadd %%mm5, %%mm7 \n\t" - "pfadd %%mm0, %%mm2 \n\t" + "pfadd %%mm0, %%mm2 \n\t" "pfadd %%mm1, %%mm3 \n\t" - "pfadd %%mm0, %%mm6 \n\t" + "pfadd %%mm0, %%mm6 \n\t" "pfadd %%mm1, %%mm7 \n\t" "movq %%mm2, (%0, %%"REG_S") \n\t" "movq %%mm3, 8(%0, %%"REG_S") \n\t" @@ -1620,9 +1620,9 @@ static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" "movq 24(%0, %%"REG_S"), %%mm3 \n\t" "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" diff --git a/liba52/imdct.c b/liba52/imdct.c index b813345537..089fa0acae 100644 --- a/liba52/imdct.c +++ b/liba52/imdct.c @@ -72,24 +72,24 @@ static const int pm128[128] attribute_used __attribute__((aligned(16))) = 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 -}; +}; static uint8_t attribute_used bit_reverse_512[] = { - 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, - 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, - 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, - 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, - 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, - 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, - 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, - 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, - 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, - 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, - 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, - 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, - 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, - 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, - 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, + 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, + 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, + 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, + 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, + 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, + 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, + 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, + 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, + 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, + 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, + 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, + 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, + 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, + 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, + 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f}; static uint8_t fftorder[] = { @@ -120,8 +120,8 @@ static sample_t __attribute__((aligned(16))) xcos1[128]; static sample_t __attribute__((aligned(16))) xsin1[128]; #if ARCH_X86 || ARCH_X86_64 -// NOTE: SSE needs 16byte alignment or it will segfault -// +// NOTE: SSE needs 16byte alignment or it will segfault +// static float __attribute__((aligned(16))) sseSinCos1c[256]; static float __attribute__((aligned(16))) sseSinCos1d[256]; static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1}; @@ -328,7 +328,7 @@ void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; const sample_t * window = a52_imdct_window; complex_t buf[128]; - + for (i = 0; i < 128; i++) { k = fftorder[i]; t_r = pre1[i].real; @@ -417,17 +417,17 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) sample_t *data_ptr; sample_t *delay_ptr; sample_t *window_ptr; - + /* 512 IMDCT with source and dest data in 'data' */ - + /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ for( i=0; i < 128; i++) { - /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ + /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ int j= bit_reverse_512[i]; buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); } - + /* 1. iteration */ for(i = 0; i < 128; i += 2) { #if 0 @@ -440,7 +440,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) buf[i+1].real = tmp_a_r - tmp_b_r; buf[i+1].imag = tmp_a_i - tmp_b_i; #else - vector float temp, bufv; + vector float temp, bufv; bufv = vec_ld(i << 3, (float*)buf); temp = vec_perm(bufv, bufv, vcprm(2,3,0,1)); @@ -448,7 +448,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) vec_st(bufv, i << 3, (float*)buf); #endif } - + /* 2. iteration */ // Note w[1]={{1,0}, {0,-1}} for(i = 0; i < 128; i += 4) { @@ -472,7 +472,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) buf[i+3].imag = tmp_a_i + tmp_b_i; #else vector float buf01, buf23, temp1, temp2; - + buf01 = vec_ld((i + 0) << 3, (float*)buf); buf23 = vec_ld((i + 2) << 3, (float*)buf); buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2)); @@ -540,14 +540,14 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) buf45 = vec_ld((i + 4) << 3, (float*)buf); buf67 = vec_ld((i + 6) << 3, (float*)buf); buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3)); - + vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf); vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf); vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf); vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf); #endif } - + /* 4-7. iterations */ for (m=3; m < 7; m++) { two_m = (1 << m); @@ -600,10 +600,10 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) vecq = vec_madd(temp4, vcii(n,p,n,p), temp3); // then butterfly with buf[p] and buf[p+1] vecp = vec_ld(p << 3, (float*)buf); - + temp1 = vec_add(vecp, vecq); temp2 = vec_sub(vecp, vecq); - + vec_st(temp1, p << 3, (float*)buf); vec_st(temp2, q << 3, (float*)buf); #endif @@ -660,7 +660,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1)); temp2 = vec_madd(temp1133, tempCS01, vczero); bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1); - + vec_st(bufv_0, (i + 0) << 3, (float*)buf); /* idem with bufv_2 and high-order cosv/sinv */ @@ -674,36 +674,36 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1); vec_st(bufv_2, (i + 2) << 3, (float*)buf); - + #endif } - + data_ptr = data; delay_ptr = delay; window_ptr = a52_imdct_window; /* Window and convert to real valued signal */ - for(i=0; i< 64; i++) { - *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; - *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; + *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; } - - for(i=0; i< 64; i++) { - *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; - *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; + + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; + *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; } - + /* The trailing edge of the window goes into the delay line */ delay_ptr = delay; - for(i=0; i< 64; i++) { - *delay_ptr++ = -buf[64+i].real * *--window_ptr; - *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf[64+i].real * *--window_ptr; + *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; } - + for(i=0; i<64; i++) { - *delay_ptr++ = buf[i].imag * *--window_ptr; - *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; + *delay_ptr++ = buf[i].imag * *--window_ptr; + *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; } } #endif @@ -716,8 +716,8 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) #define HAVE_AMD3DNOW 1 #include "srfftp_3dnow.h" -const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; -const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; +const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; +const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 }; #undef HAVE_AMD3DNOWEXT @@ -746,9 +746,9 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) sample_t *data_ptr; sample_t *delay_ptr; sample_t *window_ptr; - + /* 512 IMDCT with source and dest data in 'data' */ - /* see the c version (dct_do_512()), its allmost identical, just in C */ + /* see the c version (dct_do_512()), its allmost identical, just in C */ /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ /* Bit reversed shuffling */ @@ -809,7 +809,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) } } */ - + /* 1. iteration */ // Note w[0][0]={1,0} __asm__ volatile( @@ -831,7 +831,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) :: "g" (buf), "r" (buf + 128) : "%"REG_S ); - + /* 2. iteration */ // Note w[1]={{1,0}, {0,-1}} __asm__ volatile( @@ -863,8 +863,8 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) */ __asm__ volatile( - "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" - "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" + "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" + "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" "xorps %%xmm5, %%xmm5 \n\t" "xorps %%xmm2, %%xmm2 \n\t" "mov %0, %%"REG_S" \n\t" @@ -890,10 +890,10 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "addps %%xmm1, %%xmm3 \n\t" "subps %%xmm4, %%xmm0 \n\t" "subps %%xmm5, %%xmm1 \n\t" - "movaps %%xmm2, (%%"REG_S") \n\t" - "movaps %%xmm3, 16(%%"REG_S") \n\t" - "movaps %%xmm0, 32(%%"REG_S") \n\t" - "movaps %%xmm1, 48(%%"REG_S") \n\t" + "movaps %%xmm2, (%%"REG_S") \n\t" + "movaps %%xmm3, 16(%%"REG_S") \n\t" + "movaps %%xmm0, 32(%%"REG_S") \n\t" + "movaps %%xmm1, 48(%%"REG_S") \n\t" "add $64, %%"REG_S" \n\t" "cmp %1, %%"REG_S" \n\t" " jb 1b \n\t" @@ -927,7 +927,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" "add $16, %%"REG_D" \n\t" - "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 "jb 2b \n\t" "add %2, %%"REG_S" \n\t" "cmp %1, %%"REG_S" \n\t" @@ -954,9 +954,9 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) " jnz 1b \n\t" :: "r" (buf+128) : "%"REG_S - ); + ); + - data_ptr = data; delay_ptr = delay; window_ptr = a52_imdct_window; @@ -980,7 +980,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias) : "%"REG_S, "%"REG_D @@ -988,7 +988,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) data_ptr+=128; delay_ptr+=128; // window_ptr+=128; - + __asm__ volatile( "mov $1024, %%"REG_D" \n\t" // 512 "xor %%"REG_S", %%"REG_S" \n\t" // 0 @@ -1007,7 +1007,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias) : "%"REG_S, "%"REG_D @@ -1025,21 +1025,21 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "1: \n\t" "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C - "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C - "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf+64), "r" (delay_ptr) : "%"REG_S, "%"REG_D ); delay_ptr+=128; // window_ptr-=128; - + __asm__ volatile( "mov $1024, %%"REG_D" \n\t" // 1024 "xor %%"REG_S", %%"REG_S" \n\t" // 0 @@ -1047,14 +1047,14 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "1: \n\t" |