summaryrefslogtreecommitdiffstats
path: root/liba52
diff options
context:
space:
mode:
Diffstat (limited to 'liba52')
-rw-r--r--liba52/bitstream.c2
-rw-r--r--liba52/bitstream.h8
-rw-r--r--liba52/crc.c12
-rw-r--r--liba52/downmix.c178
-rw-r--r--liba52/imdct.c160
-rw-r--r--liba52/imdct_3dnow.h84
-rw-r--r--liba52/liba52_changes.diff464
-rw-r--r--liba52/parse.c16
-rw-r--r--liba52/srfftp.h20
-rw-r--r--liba52/srfftp_3dnow.h6
-rw-r--r--liba52/test.c8
11 files changed, 479 insertions, 479 deletions
diff --git a/liba52/bitstream.c b/liba52/bitstream.c
index a46ccced6b..7307527194 100644
--- a/liba52/bitstream.c
+++ b/liba52/bitstream.c
@@ -99,7 +99,7 @@ int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits)
if (num_bits != 0)
result = (result << num_bits) | (state->current_word >> (32 - num_bits));
-
+
state->bits_left = 32 - num_bits;
return result;
diff --git a/liba52/bitstream.h b/liba52/bitstream.h
index 8500212c7e..e894f16781 100644
--- a/liba52/bitstream.h
+++ b/liba52/bitstream.h
@@ -104,11 +104,11 @@ static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits)
result<<= (indx&0x07);
result>>= 32 - num_bits;
indx+= num_bits;
-
+
return result;
#else
uint32_t result;
-
+
if (num_bits < state->bits_left) {
result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits);
state->bits_left -= num_bits;
@@ -136,11 +136,11 @@ static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits)
result<<= (indx&0x07);
result>>= 32 - num_bits;
indx+= num_bits;
-
+
return result;
#else
int32_t result;
-
+
if (num_bits < state->bits_left) {
result = (((int32_t)state->current_word) << (32 - state->bits_left)) >> (32 - num_bits);
state->bits_left -= num_bits;
diff --git a/liba52/crc.c b/liba52/crc.c
index d19a4a2e11..aa0a19c005 100644
--- a/liba52/crc.c
+++ b/liba52/crc.c
@@ -1,23 +1,23 @@
-/*
+/*
* crc.c
*
* Copyright (C) Aaron Holtzman - May 1999
*
* This file is part of ac3dec, a free Dolby AC-3 stream decoder.
- *
+ *
* ac3dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
- *
+ *
* ac3dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
@@ -25,7 +25,7 @@
#include <stdio.h>
#include <inttypes.h>
-static const uint16_t crc_lut[256] =
+static const uint16_t crc_lut[256] =
{
0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011,
0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022,
diff --git a/liba52/downmix.c b/liba52/downmix.c
index c44317fd6f..212c87d921 100644
--- a/liba52/downmix.c
+++ b/liba52/downmix.c
@@ -62,7 +62,7 @@ void downmix_accel_init(uint32_t mm_accel)
if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow;
#endif
}
-
+
int a52_downmix_init (int input, int flags, sample_t * level,
sample_t clev, sample_t slev)
{
@@ -695,10 +695,10 @@ static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
- "addps (%1, %%"REG_S"), %%xmm0 \n\t"
- "addps 16(%1, %%"REG_S"), %%xmm1\n\t"
+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps (%1, %%"REG_S"), %%xmm0 \n\t"
+ "addps 16(%1, %%"REG_S"), %%xmm1\n\t"
"addps %%xmm7, %%xmm0 \n\t"
"addps %%xmm7, %%xmm1 \n\t"
"movaps %%xmm0, (%1, %%"REG_S") \n\t"
@@ -718,9 +718,9 @@ static void mix3to1_SSE (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
- "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
"addps %%xmm7, %%xmm1 \n\t"
"addps %%xmm1, %%xmm0 \n\t"
"movaps %%xmm0, (%0, %%"REG_S") \n\t"
@@ -739,10 +739,10 @@ static void mix4to1_SSE (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
- "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
- "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
"addps %%xmm7, %%xmm0 \n\t"
"addps %%xmm1, %%xmm0 \n\t"
"movaps %%xmm0, (%0, %%"REG_S") \n\t"
@@ -761,12 +761,12 @@ static void mix5to1_SSE (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
- "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
- "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
"addps %%xmm7, %%xmm0 \n\t"
- "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
"addps %%xmm1, %%xmm0 \n\t"
"movaps %%xmm0, (%0, %%"REG_S") \n\t"
"add $16, %%"REG_S" \n\t"
@@ -784,9 +784,9 @@ static void mix3to2_SSE (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
"addps %%xmm7, %%xmm0 \n\t" //common
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
"movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
"addps %%xmm0, %%xmm1 \n\t"
"addps %%xmm0, %%xmm2 \n\t"
@@ -807,9 +807,9 @@ static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t"
+ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t"
"addps %%xmm7, %%xmm0 \n\t" //common
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
"movaps (%1, %%"REG_S"), %%xmm2 \n\t"
"addps %%xmm0, %%xmm1 \n\t"
"addps %%xmm0, %%xmm2 \n\t"
@@ -831,7 +831,7 @@ static void mix21toS_SSE (sample_t * samples, sample_t bias)
ASMALIGN(4)
"1: \n\t"
"movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
"movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
"addps %%xmm7, %%xmm1 \n\t"
"addps %%xmm7, %%xmm2 \n\t"
@@ -854,10 +854,10 @@ static void mix31to2_SSE (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
- "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
"addps %%xmm7, %%xmm0 \n\t" // common
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
"movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
"addps %%xmm0, %%xmm1 \n\t"
"addps %%xmm0, %%xmm2 \n\t"
@@ -878,10 +878,10 @@ static void mix31toS_SSE (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
"movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround
"addps %%xmm7, %%xmm0 \n\t" // common
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
"movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
"addps %%xmm0, %%xmm1 \n\t"
"addps %%xmm0, %%xmm2 \n\t"
@@ -904,9 +904,9 @@ static void mix22toS_SSE (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t"
+ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t"
"addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
"movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
"addps %%xmm7, %%xmm1 \n\t"
"addps %%xmm7, %%xmm2 \n\t"
@@ -929,13 +929,13 @@ static void mix32to2_SSE (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
"addps %%xmm7, %%xmm0 \n\t" // common
"movaps %%xmm0, %%xmm1 \n\t" // common
- "addps (%0, %%"REG_S"), %%xmm0 \n\t"
- "addps 2048(%0, %%"REG_S"), %%xmm1\n\t"
- "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
- "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps (%0, %%"REG_S"), %%xmm0 \n\t"
+ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t"
+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
"movaps %%xmm0, (%0, %%"REG_S") \n\t"
"movaps %%xmm1, 1024(%0, %%"REG_S")\n\t"
"add $16, %%"REG_S" \n\t"
@@ -953,16 +953,16 @@ static void mix32toS_SSE (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
- "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t"
+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
+ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t"
"addps %%xmm7, %%xmm0 \n\t" // common
- "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
- "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t"
- "subps %%xmm2, %%xmm1 \n\t"
- "addps %%xmm2, %%xmm3 \n\t"
- "addps %%xmm0, %%xmm1 \n\t"
- "addps %%xmm0, %%xmm3 \n\t"
+ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround
+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
+ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t"
+ "subps %%xmm2, %%xmm1 \n\t"
+ "addps %%xmm2, %%xmm3 \n\t"
+ "addps %%xmm0, %%xmm1 \n\t"
+ "addps %%xmm0, %%xmm3 \n\t"
"movaps %%xmm1, (%0, %%"REG_S") \n\t"
"movaps %%xmm3, 1024(%0, %%"REG_S")\n\t"
"add $16, %%"REG_S" \n\t"
@@ -980,8 +980,8 @@ static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
"addps 1024(%0, %%"REG_S"), %%xmm0\n\t"
"addps 1040(%0, %%"REG_S"), %%xmm1\n\t"
"addps %%xmm7, %%xmm0 \n\t"
@@ -1229,13 +1229,13 @@ static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq (%0, %%"REG_S"), %%mm0 \n\t"
+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
"movq 8(%0, %%"REG_S"), %%mm1 \n\t"
- "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
"movq 24(%0, %%"REG_S"), %%mm3 \n\t"
- "pfadd (%1, %%"REG_S"), %%mm0 \n\t"
+ "pfadd (%1, %%"REG_S"), %%mm0 \n\t"
"pfadd 8(%1, %%"REG_S"), %%mm1 \n\t"
- "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t"
+ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t"
"pfadd 24(%1, %%"REG_S"), %%mm3 \n\t"
"pfadd %%mm7, %%mm0 \n\t"
"pfadd %%mm7, %%mm1 \n\t"
@@ -1260,11 +1260,11 @@ static void mix3to1_3dnow (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq (%0, %%"REG_S"), %%mm0 \n\t"
+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
"movq 8(%0, %%"REG_S"), %%mm1 \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
"movq 1032(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
"pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
"pfadd %%mm7, %%mm0 \n\t"
"pfadd %%mm7, %%mm1 \n\t"
@@ -1287,13 +1287,13 @@ static void mix4to1_3dnow (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq (%0, %%"REG_S"), %%mm0 \n\t"
+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
"movq 8(%0, %%"REG_S"), %%mm1 \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
"movq 1032(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
"pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
"pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
"pfadd %%mm7, %%mm0 \n\t"
"pfadd %%mm7, %%mm1 \n\t"
@@ -1316,17 +1316,17 @@ static void mix5to1_3dnow (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq (%0, %%"REG_S"), %%mm0 \n\t"
+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
"movq 8(%0, %%"REG_S"), %%mm1 \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
"movq 1032(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
"pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
"pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
"pfadd %%mm7, %%mm0 \n\t"
"pfadd %%mm7, %%mm1 \n\t"
- "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
"pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
"pfadd %%mm2, %%mm0 \n\t"
"pfadd %%mm3, %%mm1 \n\t"
@@ -1347,11 +1347,11 @@ static void mix3to2_3dnow (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
"movq 1032(%0, %%"REG_S"), %%mm1\n\t"
"pfadd %%mm7, %%mm0 \n\t" //common
"pfadd %%mm7, %%mm1 \n\t" //common
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
"movq 8(%0, %%"REG_S"), %%mm3 \n\t"
"movq 2048(%0, %%"REG_S"), %%mm4\n\t"
"movq 2056(%0, %%"REG_S"), %%mm5\n\t"
@@ -1378,11 +1378,11 @@ static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
"movq 1032(%1, %%"REG_S"), %%mm1\n\t"
"pfadd %%mm7, %%mm0 \n\t" //common
"pfadd %%mm7, %%mm1 \n\t" //common
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
"movq 8(%0, %%"REG_S"), %%mm3 \n\t"
"movq (%1, %%"REG_S"), %%mm4 \n\t"
"movq 8(%1, %%"REG_S"), %%mm5 \n\t"
@@ -1411,7 +1411,7 @@ static void mix21toS_3dnow (sample_t * samples, sample_t bias)
"1: \n\t"
"movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround
"movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
"movq 8(%0, %%"REG_S"), %%mm3 \n\t"
"movq 1024(%0, %%"REG_S"), %%mm4\n\t"
"movq 1032(%0, %%"REG_S"), %%mm5\n\t"
@@ -1442,13 +1442,13 @@ static void mix31to2_3dnow (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
"movq 1032(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
"pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
"pfadd %%mm7, %%mm0 \n\t" // common
"pfadd %%mm7, %%mm1 \n\t" // common
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
"movq 8(%0, %%"REG_S"), %%mm3 \n\t"
"movq 2048(%0, %%"REG_S"), %%mm4\n\t"
"movq 2056(%0, %%"REG_S"), %%mm5\n\t"
@@ -1475,11 +1475,11 @@ static void mix31toS_3dnow (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
"movq 1032(%0, %%"REG_S"), %%mm1\n\t"
"pfadd %%mm7, %%mm0 \n\t" // common
"pfadd %%mm7, %%mm1 \n\t" // common
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
"movq 8(%0, %%"REG_S"), %%mm3 \n\t"
"movq 2048(%0, %%"REG_S"), %%mm4\n\t"
"movq 2056(%0, %%"REG_S"), %%mm5\n\t"
@@ -1512,11 +1512,11 @@ static void mix22toS_3dnow (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq 2048(%0, %%"REG_S"), %%mm0\n\t"
+ "movq 2048(%0, %%"REG_S"), %%mm0\n\t"
"movq 2056(%0, %%"REG_S"), %%mm1\n\t"
"pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
"pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
"movq 8(%0, %%"REG_S"), %%mm3 \n\t"
"movq 1024(%0, %%"REG_S"), %%mm4\n\t"
"movq 1032(%0, %%"REG_S"), %%mm5\n\t"
@@ -1547,19 +1547,19 @@ static void mix32to2_3dnow (sample_t * samples, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
"movq 1032(%0, %%"REG_S"), %%mm1\n\t"
"pfadd %%mm7, %%mm0 \n\t" // common
"pfadd %%mm7, %%mm1 \n\t" // common
"movq %%mm0, %%mm2 \n\t" // common
"movq %%mm1, %%mm3 \n\t" // common
- "pfadd (%0, %%"REG_S"), %%mm0 \n\t"
+ "pfadd (%0, %%"REG_S"), %%mm0 \n\t"
"pfadd 8(%0, %%"REG_S"), %%mm1 \n\t"
- "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t"
+ "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t"
"pfadd 2056(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
"pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
"pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
"movq %%mm0, (%0, %%"REG_S") \n\t"
"movq %%mm1, 8(%0, %%"REG_S") \n\t"
@@ -1581,25 +1581,25 @@ static void mix32toS_3dnow (sample_t * samples, sample_t bias)
"1: \n\t"
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
"movq 1032(%0, %%"REG_S"), %%mm1\n\t"
- "movq 3072(%0, %%"REG_S"), %%mm4\n\t"
+ "movq 3072(%0, %%"REG_S"), %%mm4\n\t"
"movq 3080(%0, %%"REG_S"), %%mm5\n\t"
"pfadd %%mm7, %%mm0 \n\t" // common
"pfadd %%mm7, %%mm1 \n\t" // common
- "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround
+ "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround
"pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
"movq 8(%0, %%"REG_S"), %%mm3 \n\t"
- "movq 2048(%0, %%"REG_S"), %%mm6\n\t"
+ "movq 2048(%0, %%"REG_S"), %%mm6\n\t"
"movq 2056(%0, %%"REG_S"), %%mm7\n\t"
- "pfsub %%mm4, %%mm2 \n\t"
+ "pfsub %%mm4, %%mm2 \n\t"
"pfsub %%mm5, %%mm3 \n\t"
- "pfadd %%mm4, %%mm6 \n\t"
+ "pfadd %%mm4, %%mm6 \n\t"
"pfadd %%mm5, %%mm7 \n\t"
- "pfadd %%mm0, %%mm2 \n\t"
+ "pfadd %%mm0, %%mm2 \n\t"
"pfadd %%mm1, %%mm3 \n\t"
- "pfadd %%mm0, %%mm6 \n\t"
+ "pfadd %%mm0, %%mm6 \n\t"
"pfadd %%mm1, %%mm7 \n\t"
"movq %%mm2, (%0, %%"REG_S") \n\t"
"movq %%mm3, 8(%0, %%"REG_S") \n\t"
@@ -1620,9 +1620,9 @@ static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
"mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4)
"1: \n\t"
- "movq (%0, %%"REG_S"), %%mm0 \n\t"
+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
"movq 8(%0, %%"REG_S"), %%mm1 \n\t"
- "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
"movq 24(%0, %%"REG_S"), %%mm3 \n\t"
"pfadd 1024(%0, %%"REG_S"), %%mm0\n\t"
"pfadd 1032(%0, %%"REG_S"), %%mm1\n\t"
diff --git a/liba52/imdct.c b/liba52/imdct.c
index b813345537..089fa0acae 100644
--- a/liba52/imdct.c
+++ b/liba52/imdct.c
@@ -72,24 +72,24 @@ static const int pm128[128] attribute_used __attribute__((aligned(16))) =
5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
-};
+};
static uint8_t attribute_used bit_reverse_512[] = {
- 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
- 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
- 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
- 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
- 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
- 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
- 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
- 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
- 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
- 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
- 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
- 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
- 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
- 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
- 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
+ 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
+ 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
+ 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
+ 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
+ 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
+ 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
+ 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
+ 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
+ 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
+ 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
+ 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
+ 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
+ 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
+ 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
+ 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
static uint8_t fftorder[] = {
@@ -120,8 +120,8 @@ static sample_t __attribute__((aligned(16))) xcos1[128];
static sample_t __attribute__((aligned(16))) xsin1[128];
#if ARCH_X86 || ARCH_X86_64
-// NOTE: SSE needs 16byte alignment or it will segfault
-//
+// NOTE: SSE needs 16byte alignment or it will segfault
+//
static float __attribute__((aligned(16))) sseSinCos1c[256];
static float __attribute__((aligned(16))) sseSinCos1d[256];
static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
@@ -328,7 +328,7 @@ void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias)
sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
const sample_t * window = a52_imdct_window;
complex_t buf[128];
-
+
for (i = 0; i < 128; i++) {
k = fftorder[i];
t_r = pre1[i].real;
@@ -417,17 +417,17 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
sample_t *data_ptr;
sample_t *delay_ptr;
sample_t *window_ptr;
-
+
/* 512 IMDCT with source and dest data in 'data' */
-
+
/* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/
for( i=0; i < 128; i++) {
- /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
+ /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
int j= bit_reverse_512[i];
buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j]));
}
-
+
/* 1. iteration */
for(i = 0; i < 128; i += 2) {
#if 0
@@ -440,7 +440,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
buf[i+1].real = tmp_a_r - tmp_b_r;
buf[i+1].imag = tmp_a_i - tmp_b_i;
#else
- vector float temp, bufv;
+ vector float temp, bufv;
bufv = vec_ld(i << 3, (float*)buf);
temp = vec_perm(bufv, bufv, vcprm(2,3,0,1));
@@ -448,7 +448,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
vec_st(bufv, i << 3, (float*)buf);
#endif
}
-
+
/* 2. iteration */
// Note w[1]={{1,0}, {0,-1}}
for(i = 0; i < 128; i += 4) {
@@ -472,7 +472,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
buf[i+3].imag = tmp_a_i + tmp_b_i;
#else
vector float buf01, buf23, temp1, temp2;
-
+
buf01 = vec_ld((i + 0) << 3, (float*)buf);
buf23 = vec_ld((i + 2) << 3, (float*)buf);
buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2));
@@ -540,14 +540,14 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
buf45 = vec_ld((i + 4) << 3, (float*)buf);
buf67 = vec_ld((i + 6) << 3, (float*)buf);
buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3));
-
+
vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf);
vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf);
vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf);
vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf);
#endif
}
-
+
/* 4-7. iterations */
for (m=3; m < 7; m++) {
two_m = (1 << m);
@@ -600,10 +600,10 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
vecq = vec_madd(temp4, vcii(n,p,n,p), temp3);
// then butterfly with buf[p] and buf[p+1]
vecp = vec_ld(p << 3, (float*)buf);
-
+
temp1 = vec_add(vecp, vecq);
temp2 = vec_sub(vecp, vecq);
-
+
vec_st(temp1, p << 3, (float*)buf);
vec_st(temp2, q << 3, (float*)buf);
#endif
@@ -660,7 +660,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1));
temp2 = vec_madd(temp1133, tempCS01, vczero);
bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1);
-
+
vec_st(bufv_0, (i + 0) << 3, (float*)buf);
/* idem with bufv_2 and high-order cosv/sinv */
@@ -674,36 +674,36 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1);
vec_st(bufv_2, (i + 2) << 3, (float*)buf);
-
+
#endif
}
-
+
data_ptr = data;
delay_ptr = delay;
window_ptr = a52_imdct_window;
/* Window and convert to real valued signal */
- for(i=0; i< 64; i++) {
- *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias;
- *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias;
+ *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
}
-
- for(i=0; i< 64; i++) {
- *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias;
- *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
+
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias;
+ *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
}
-
+
/* The trailing edge of the window goes into the delay line */
delay_ptr = delay;
- for(i=0; i< 64; i++) {
- *delay_ptr++ = -buf[64+i].real * *--window_ptr;
- *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = -buf[64+i].real * *--window_ptr;
+ *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
}
-
+
for(i=0; i<64; i++) {
- *delay_ptr++ = buf[i].imag * *--window_ptr;
- *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
+ *delay_ptr++ = buf[i].imag * *--window_ptr;
+ *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
}
}
#endif
@@ -716,8 +716,8 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
#define HAVE_AMD3DNOW 1
#include "srfftp_3dnow.h"
-const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }};
-const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }};
+const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }};
+const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }};
const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 };
#undef HAVE_AMD3DNOWEXT
@@ -746,9 +746,9 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
sample_t *data_ptr;
sample_t *delay_ptr;
sample_t *window_ptr;
-
+
/* 512 IMDCT with source and dest data in 'data' */
- /* see the c version (dct_do_512()), its allmost identical, just in C */
+ /* see the c version (dct_do_512()), its allmost identical, just in C */
/* Pre IFFT complex multiply plus IFFT cmplx conjugate */
/* Bit reversed shuffling */
@@ -809,7 +809,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
}
}
*/
-
+
/* 1. iteration */
// Note w[0][0]={1,0}
__asm__ volatile(
@@ -831,7 +831,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
:: "g" (buf), "r" (buf + 128)
: "%"REG_S
);
-
+
/* 2. iteration */
// Note w[1]={{1,0}, {0,-1}}
__asm__ volatile(
@@ -863,8 +863,8 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
Note sseW2+48={1,-1,sqrt(2),-sqrt(2))
*/
__asm__ volatile(
- "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t"
- "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t"
+ "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t"
+ "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t"
"xorps %%xmm5, %%xmm5 \n\t"
"xorps %%xmm2, %%xmm2 \n\t"
"mov %0, %%"REG_S" \n\t"
@@ -890,10 +890,10 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"addps %%xmm1, %%xmm3 \n\t"
"subps %%xmm4, %%xmm0 \n\t"
"subps %%xmm5, %%xmm1 \n\t"
- "movaps %%xmm2, (%%"REG_S") \n\t"
- "movaps %%xmm3, 16(%%"REG_S") \n\t"
- "movaps %%xmm0, 32(%%"REG_S") \n\t"
- "movaps %%xmm1, 48(%%"REG_S") \n\t"
+ "movaps %%xmm2, (%%"REG_S") \n\t"
+ "movaps %%xmm3, 16(%%"REG_S") \n\t"
+ "movaps %%xmm0, 32(%%"REG_S") \n\t"
+ "movaps %%xmm1, 48(%%"REG_S") \n\t"
"add $64, %%"REG_S" \n\t"
"cmp %1, %%"REG_S" \n\t"
" jb 1b \n\t"
@@ -927,7 +927,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t"
"movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t"
"add $16, %%"REG_D" \n\t"
- "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0
+ "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0
"jb 2b \n\t"
"add %2, %%"REG_S" \n\t"
"cmp %1, %%"REG_S" \n\t"
@@ -954,9 +954,9 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
" jnz 1b \n\t"
:: "r" (buf+128)
: "%"REG_S
- );
+ );
+
-
data_ptr = data;
delay_ptr = delay;
window_ptr = a52_imdct_window;
@@ -980,7 +980,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"movaps %%xmm0, (%1, %%"REG_S") \n\t"
"add $16, %%"REG_S" \n\t"
"sub $16, %%"REG_D" \n\t"
- "cmp $512, %%"REG_S" \n\t"
+ "cmp $512, %%"REG_S" \n\t"
" jb 1b \n\t"
:: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias)
: "%"REG_S, "%"REG_D
@@ -988,7 +988,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
data_ptr+=128;
delay_ptr+=128;
// window_ptr+=128;
-
+
__asm__ volatile(
"mov $1024, %%"REG_D" \n\t" // 512
"xor %%"REG_S", %%"REG_S" \n\t" // 0
@@ -1007,7 +1007,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"movaps %%xmm0, (%1, %%"REG_S") \n\t"
"add $16, %%"REG_S" \n\t"
"sub $16, %%"REG_D" \n\t"
- "cmp $512, %%"REG_S" \n\t"
+ "cmp $512, %%"REG_S" \n\t"
" jb 1b \n\t"
:: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias)
: "%"REG_S, "%"REG_D
@@ -1025,21 +1025,21 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"1: \n\t"
"movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A
"movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C
- "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C
- "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A
+ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C
+ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A
"shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A
"mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
"movaps %%xmm0, (%1, %%"REG_S") \n\t"
"add $16, %%"REG_S" \n\t"
"sub $16, %%"REG_D" \n\t"
- "cmp $512, %%"REG_S" \n\t"
+ "cmp $512, %%"REG_S" \n\t"
" jb 1b \n\t"
:: "r" (buf+64), "r" (delay_ptr)
: "%"REG_S, "%"REG_D
);
delay_ptr+=128;
// window_ptr-=128;
-
+
__asm__ volatile(
"mov $1024, %%"REG_D" \n\t" // 1024
"xor %%"REG_S", %%"REG_S" \n\t" // 0
@@ -1047,14 +1047,14 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"1: \n\t"
"movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ?
"movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ?
- "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ?
- "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ?
+ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ?
+ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ?
"shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A
"mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
"movaps %%xmm0, (%1, %%"REG_S") \n\t"
"add $16, %%"REG_S" \n\t"
"sub $16, %%"REG_D" \n\t"
- "cmp $512, %%"REG_S" \n\t"
+ "cmp $512, %%"REG_S" \n\t"
" jb 1b \n\t"
:: "r" (buf), "r" (delay_ptr)
: "%"REG_S, "%"REG_D
@@ -1088,7 +1088,7 @@ void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
/* Post IFFT complex multiply */
/* Window and convert to real valued signal */
for (i = 0; i < 32; i++) {
- /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
+ /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
t_r = post2[i].real;
t_i = post2[i].imag;
@@ -1209,12 +1209,12 @@ void a52_imdct_init (uint32_t mm_accel)
sseSinCos1c[2*i+0]= xcos1[i];
sseSinCos1c[2*i+1]= -xcos1[i];
sseSinCos1d[2*i+0]= xsin1[i];
- sseSinCos1d[2*i+1]= xsin1[i];
+ sseSinCos1d[2*i+1]= xsin1[i];
}
for (i = 1; i < 7; i++) {
j = 1 << i;
for (k = 0; k < j; k+=2) {
-
+
sseW[i][4*k + 0] = w[i][k+0].real;
sseW[i][4*k + 1] = w[i][k+0].real;
sseW[i][4*k + 2] = w[i][k+1].real;
@@ -1223,15 +1223,15 @@ void a52_imdct_init (uint32_t mm_accel)
sseW[i][4*k + 4] = -w[i][k+0].imag;