summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--liba52/a52_internal.h8
-rw-r--r--liba52/bit_allocate.c2
-rw-r--r--liba52/bitstream.c6
-rw-r--r--liba52/bitstream.h4
-rw-r--r--liba52/downmix.c12
-rw-r--r--liba52/imdct.c70
-rw-r--r--liba52/liba52_changes.diff34
-rw-r--r--liba52/parse.c32
8 files changed, 85 insertions, 83 deletions
diff --git a/liba52/a52_internal.h b/liba52/a52_internal.h
index d420803699..3627a2d23a 100644
--- a/liba52/a52_internal.h
+++ b/liba52/a52_internal.h
@@ -122,16 +122,16 @@ struct a52_state_s {
#endif
void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
- int start, int end, int fastleak, int slowleak,
+ int start, int end, int fastleak, int slowleak,
expbap_t * expbap);
int a52_downmix_init (int input, int flags, sample_t * level,
- sample_t clev, sample_t slev);
+ sample_t clev, sample_t slev);
void downmix_accel_init(uint32_t mm_accel);
int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
- sample_t clev, sample_t slev);
+ sample_t clev, sample_t slev);
extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev);
+ sample_t clev, sample_t slev);
extern void (*a52_upmix) (sample_t * samples, int acmod, int output);
void a52_imdct_init (uint32_t mm_accel);
diff --git a/liba52/bit_allocate.c b/liba52/bit_allocate.c
index a5f3b77024..0567b22852 100644
--- a/liba52/bit_allocate.c
+++ b/liba52/bit_allocate.c
@@ -122,7 +122,7 @@ do { \
} while (0)
void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
- int start, int end, int fastleak, int slowleak,
+ int start, int end, int fastleak, int slowleak,
expbap_t * expbap)
{
static int slowgain[4] = {0x540, 0x4d8, 0x478, 0x410};
diff --git a/liba52/bitstream.c b/liba52/bitstream.c
index 6c275109d1..0b2d8fa48f 100644
--- a/liba52/bitstream.c
+++ b/liba52/bitstream.c
@@ -79,9 +79,9 @@ uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits)
bitstream_fill_current (state);
- if(num_bits != 0)
+ if (num_bits != 0)
result = (result << num_bits) | (state->current_word >> (32 - num_bits));
-
+
state->bits_left = 32 - num_bits;
return result;
@@ -97,7 +97,7 @@ int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits)
bitstream_fill_current(state);
- if(num_bits != 0)
+ if (num_bits != 0)
result = (result << num_bits) | (state->current_word >> (32 - num_bits));
state->bits_left = 32 - num_bits;
diff --git a/liba52/bitstream.h b/liba52/bitstream.h
index 8576f8b282..6764b12a78 100644
--- a/liba52/bitstream.h
+++ b/liba52/bitstream.h
@@ -66,7 +66,7 @@ static inline uint32_t unaligned32(const void *v) {
#endif //!ARCH_X86
#endif
-
+
/* (stolen from the kernel) */
#ifdef WORDS_BIGENDIAN
@@ -114,7 +114,7 @@ static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits)
return result;
#else
uint32_t result;
-
+
if (num_bits < state->bits_left) {
result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits);
state->bits_left -= num_bits;
diff --git a/liba52/downmix.c b/liba52/downmix.c
index 5c61cee475..368773e879 100644
--- a/liba52/downmix.c
+++ b/liba52/downmix.c
@@ -63,9 +63,9 @@ void downmix_accel_init(uint32_t mm_accel)
if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow;
#endif
}
-
+
int a52_downmix_init (int input, int flags, sample_t * level,
- sample_t clev, sample_t slev)
+ sample_t clev, sample_t slev)
{
static uint8_t table[11][8] = {
{A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
@@ -96,7 +96,7 @@ int a52_downmix_init (int input, int flags, sample_t * level,
output = flags & A52_CHANNEL_MASK;
if (output > A52_DOLBY)
return -1;
-
+
output = table[output][input & 7];
if ((output == A52_STEREO) &&
@@ -180,11 +180,12 @@ int a52_downmix_init (int input, int flags, sample_t * level,
*level *= 1 / (1 + 3 * LEVEL_3DB);
break;
}
+
return output;
}
int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
- sample_t clev, sample_t slev)
+ sample_t clev, sample_t slev)
{
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
@@ -474,12 +475,13 @@ static void move2to1 (sample_t * src, sample_t * dest, sample_t bias)
static void zero (sample_t * samples)
{
int i;
+
for (i = 0; i < 256; i++)
samples[i] = 0;
}
void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev)
+ sample_t clev, sample_t slev)
{
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
diff --git a/liba52/imdct.c b/liba52/imdct.c
index 4909fc5ce9..8a29ee3248 100644
--- a/liba52/imdct.c
+++ b/liba52/imdct.c
@@ -292,7 +292,7 @@ static void ifft16 (complex_t * buf)
ifft4 (buf + 8);
ifft4 (buf + 12);
ifft_pass (buf, roots16 - 4, 4);
- }
+}
static void ifft32 (complex_t * buf)
{
@@ -300,7 +300,7 @@ static void ifft32 (complex_t * buf)
ifft8 (buf + 16);
ifft8 (buf + 24);
ifft_pass (buf, roots32 - 8, 8);
- }
+}
static void ifft64_c (complex_t * buf)
{
@@ -308,8 +308,8 @@ static void ifft64_c (complex_t * buf)
ifft16 (buf + 32);
ifft16 (buf + 48);
ifft_pass (buf, roots64 - 16, 16);
- }
-
+}
+
static void ifft128_c (complex_t * buf)
{
ifft32 (buf);
@@ -320,33 +320,33 @@ static void ifft128_c (complex_t * buf)
ifft32 (buf + 64);
ifft32 (buf + 96);
ifft_pass (buf, roots128 - 32, 32);
- }
-
+}
+
void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias)
{
int i, k;
sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
const sample_t * window = a52_imdct_window;
complex_t buf[128];
-
- for( i=0; i < 128; i++) {
+
+ for (i = 0; i < 128; i++) {
k = fftorder[i];
t_r = pre1[i].real;
t_i = pre1[i].imag;
-
+
buf[i].real = t_i * data[255-k] + t_r * data[k];
buf[i].imag = t_r * data[255-k] - t_i * data[k];
}
-
+
ifft128 (buf);
/* Post IFFT complex multiply plus IFFT complex conjugate*/
/* Window and convert to real valued signal */
- for(i=0; i< 64; i++) {
+ for (i = 0; i < 64; i++) {
/* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
t_r = post1[i].real;
t_i = post1[i].imag;
-
+
a_r = t_r * buf[i].real + t_i * buf[i].imag;
a_i = t_i * buf[i].real - t_r * buf[i].imag;
b_r = t_i * buf[127-i].real + t_r * buf[127-i].imag;
@@ -1076,7 +1076,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
{
- int i,k;
+ int i, k;
sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2;
const sample_t * window = a52_imdct_window;
complex_t buf1[64], buf2[64];
@@ -1137,8 +1137,8 @@ void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
data[129+2*i] = delay[126-2*i] * w_2 + b_r * w_1 + bias;
data[126-2*i] = delay[126-2*i] * w_1 - b_r * w_2 + bias;
delay[126-2*i] = d_i;
- }
}
+}
static double besselI0 (double x)
{
@@ -1149,8 +1149,8 @@ static double besselI0 (double x)
bessel = bessel * x / (i * i) + 1;
while (--i);
return bessel;
- }
-
+}
+
void a52_imdct_init (uint32_t mm_accel)
{
int i, j, k;
@@ -1178,44 +1178,44 @@ void a52_imdct_init (uint32_t mm_accel)
for (i = 0; i < 31; i++)
roots128[i] = cos ((M_PI / 64) * (i + 1));
- for(i=0; i< 64; i++) {
+ for (i = 0; i < 64; i++) {
k = fftorder[i] / 2 + 64;
pre1[i].real = cos ((M_PI / 256) * (k - 0.25));
pre1[i].imag = sin ((M_PI / 256) * (k - 0.25));
}
-
+
for (i = 64; i < 128; i++) {
k = fftorder[i] / 2 + 64;
pre1[i].real = -cos ((M_PI / 256) * (k - 0.25));
pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25));
}
- for(i=0; i< 64; i++) {
+ for (i = 0; i < 64; i++) {
post1[i].real = cos ((M_PI / 256) * (i + 0.5));
post1[i].imag = sin ((M_PI / 256) * (i + 0.5));
}
- for(i=0; i< 64; i++) {
+ for (i = 0; i < 64; i++) {
k = fftorder[i] / 4;
pre2[i].real = cos ((M_PI / 128) * (k - 0.25));
pre2[i].imag = sin ((M_PI / 128) * (k - 0.25));
-}
+ }
for (i = 0; i < 32; i++) {
post2[i].real = cos ((M_PI / 128) * (i + 0.5));
post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
}
- for (i = 0; i < 128; i++) {
- xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
- xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
- }
- for (i = 0; i < 7; i++) {
- j = 1 << i;
- for (k = 0; k < j; k++) {
- w[i][k].real = cos (-M_PI * k / j);
- w[i][k].imag = sin (-M_PI * k / j);
- }
+ for (i = 0; i < 128; i++) {
+ xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
+ xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
+ }
+ for (i = 0; i < 7; i++) {
+ j = 1 << i;
+ for (k = 0; k < j; k++) {
+ w[i][k].real = cos (-M_PI * k / j);
+ w[i][k].imag = sin (-M_PI * k / j);
}
+ }
#if defined(ARCH_X86) || defined(ARCH_X86_64)
for (i = 0; i < 128; i++) {
sseSinCos1c[2*i+0]= xcos1[i];
@@ -1275,7 +1275,7 @@ void a52_imdct_init (uint32_t mm_accel)
{
fprintf (stderr, "Using SSE optimized IMDCT transform\n");
a52_imdct_512 = imdct_do_512_sse;
- }
+ }
else
if(mm_accel & MM_ACCEL_X86_3DNOWEXT)
{
@@ -1296,7 +1296,7 @@ void a52_imdct_init (uint32_t mm_accel)
fprintf(stderr, "Using AltiVec optimized IMDCT transform\n");
a52_imdct_512 = imdct_do_512_altivec;
}
- else
+ else
#endif
#ifdef LIBA52_DJBFFT
@@ -1306,7 +1306,7 @@ void a52_imdct_init (uint32_t mm_accel)
ifft64 = (void (*) (complex_t *)) fftc4_un64;
} else
#endif
-{
+ {
fprintf (stderr, "No accelerated IMDCT transform found\n");
-}
+ }
}
diff --git a/liba52/liba52_changes.diff b/liba52/liba52_changes.diff
index 09eefbd617..10cbc94b08 100644
--- a/liba52/liba52_changes.diff
+++ b/liba52/liba52_changes.diff
@@ -35,13 +35,13 @@
expbap_t * expbap);
int a52_downmix_init (int input, int flags, sample_t * level,
- sample_t clev, sample_t slev);
+ sample_t clev, sample_t slev);
+void downmix_accel_init(uint32_t mm_accel);
int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
- sample_t clev, sample_t slev);
+ sample_t clev, sample_t slev);
-void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
+extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev);
+ sample_t clev, sample_t slev);
-void a52_upmix (sample_t * samples, int acmod, int output);
+extern void (*a52_upmix) (sample_t * samples, int acmod, int output);
@@ -120,7 +120,7 @@
+#endif //!ARCH_X86
+
+#endif
-+
++
/* (stolen from the kernel) */
#ifdef WORDS_BIGENDIAN
@@ -254,9 +254,9 @@
+ if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow;
+#endif
+}
-+
++
int a52_downmix_init (int input, int flags, sample_t * level,
- sample_t clev, sample_t slev)
+ sample_t clev, sample_t slev)
{
@@ -451,7 +480,7 @@
samples[i] = 0;
@@ -264,7 +264,7 @@
-void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
+void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev)
+ sample_t clev, sample_t slev)
{
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
@@ -563,7 +592,7 @@
@@ -1533,14 +1533,14 @@
+static float __attribute__((aligned(16))) *sseW[7]=
+ {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6};
+static float __attribute__((aligned(16))) sseWindow[512];
-+#endif
-+
++#endif
++
/* Root values for IFFT */
static sample_t roots16[3];
static sample_t roots32[7];
@@ -245,7 +322,7 @@
ifft_pass (buf, roots128 - 32, 32);
- }
+ }
-void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
+void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias)
@@ -1973,7 +1973,7 @@
+ "add $16, %%"REG_S" \n\t"
+ "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap
+ "sub $16, %%"REG_D" \n\t"
-+ " jnc 1b \n\t"
++ "jnc 1b \n\t"
+ "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g*
+ :: "b" (data), "c" (buf)
+ : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d
@@ -2125,7 +2125,7 @@
+ "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t"
+ "add $16, %%"REG_D" \n\t"
+ "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0
-+ " jb 2b \n\t"
++ "jb 2b \n\t"
+ "add %2, %%"REG_S" \n\t"
+ "cmp %1, %%"REG_S" \n\t"
+ " jb 1b \n\t"
@@ -2265,7 +2265,7 @@
@@ -368,7 +1153,7 @@
void a52_imdct_init (uint32_t mm_accel)
- {
+ {
- int i, k;
+ int i, j, k;
double sum;
@@ -2274,7 +2274,7 @@
@@ -420,6 +1205,99 @@
post2[i].real = cos ((M_PI / 128) * (i + 0.5));
post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
- }
+ }
+ for (i = 0; i < 128; i++) {
+ xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
+ xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
@@ -2345,7 +2345,7 @@
+ {
+ fprintf (stderr, "Using SSE optimized IMDCT transform\n");
+ a52_imdct_512 = imdct_do_512_sse;
-+ }
++ }
+ else
+ if(mm_accel & MM_ACCEL_X86_3DNOWEXT)
+ {
@@ -2366,7 +2366,7 @@
+ fprintf(stderr, "Using AltiVec optimized IMDCT transform\n");
+ a52_imdct_512 = imdct_do_512_altivec;
+ }
-+ else
++ else
+#endif
#ifdef LIBA52_DJBFFT
@@ -2437,7 +2437,7 @@
+ mm_accel &=~MM_ACCEL_X86_SSE;
+ fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n");
+ }
-+
++
if (state->samples == NULL) {
free (state);
return NULL;
diff --git a/liba52/parse.c b/liba52/parse.c
index 0791123366..31dfe4652e 100644
--- a/liba52/parse.c
+++ b/liba52/parse.c
@@ -78,10 +78,10 @@ a52_state_t * a52_init (uint32_t mm_accel)
mm_accel &=~MM_ACCEL_X86_SSE;
fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n");
}
-
+
if (state->samples == NULL) {
free (state);
- return NULL;
+ return NULL;
}
for (i = 0; i < 256 * 12; i++)
@@ -90,10 +90,10 @@ a52_state_t * a52_init (uint32_t mm_accel)
state->downmixed = 1;
state->lfsr_state = 1;
-
+
a52_imdct_init (mm_accel);
downmix_accel_init(mm_accel);
-
+
return state;
}
@@ -174,7 +174,7 @@ int a52_frame (a52_state_t * state, uint8_t * buf, int * flags,
state->lfeon = bitstream_get (state, 1);
state->output = a52_downmix_init (acmod, *flags, level,
- state->clev, state->slev);
+ state->clev, state->slev);
if (state->output < 0)
return 1;
if (state->lfeon && (*flags & A52_LFE))
@@ -580,7 +580,7 @@ int a52_block (a52_state_t * state)
state->chincpl = 0;
if (bitstream_get (state, 1)) { /* cplinu */
static uint8_t bndtab[16] = {31, 35, 37, 39, 41, 42, 43, 44,
- 45, 45, 46, 46, 47, 47, 48, 48};
+ 45, 45, 46, 46, 47, 47, 48, 48};
int cplbegf;
int cplendf;
int ncplsubnd;
@@ -608,7 +608,7 @@ int a52_block (a52_state_t * state)
if (bitstream_get (state, 1)) {
state->cplbndstrc |= 1 << i;
state->ncplbnd--;
- }
+ }
}
}
@@ -755,7 +755,7 @@ int a52_block (a52_state_t * state)
} else {
if (state->chincpl && (do_bit_alloc & 64)) /* cplinu */
a52_bit_allocate (state, &state->cplba, state->cplstrtbnd,
- state->cplstrtmant, state->cplendmant,
+ state->cplstrtmant, state->cplendmant,
state->cplfleak << 8, state->cplsleak << 8,
&state->cpl_expbap);
for (i = 0; i < nfchans; i++)
@@ -782,7 +782,7 @@ int a52_block (a52_state_t * state)
samples += 256; /* shift for LFE channel */
chanbias = a52_downmix_coeff (coeff, state->acmod, state->output,
- state->dynrng, state->clev, state->slev);
+ state->dynrng, state->clev, state->slev);
quantizer.q1_ptr = quantizer.q2_ptr = quantizer.q4_ptr = -1;
done_cpl = 0;
@@ -874,10 +874,10 @@ int a52_block (a52_state_t * state)
if (coeff[i]) {
if (blksw[i])
a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i,
- bias);
+ bias);
else
a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i,
- bias);
+ bias);
} else {
int j;
@@ -887,27 +887,27 @@ int a52_block (a52_state_t * state)
}
a52_downmix (samples, state->acmod, state->output, state->bias,
- state->clev, state->slev);
+ state->clev, state->slev);
} else {
nfchans = nfchans_tbl[state->output & A52_CHANNEL_MASK];
a52_downmix (samples, state->acmod, state->output, 0,
- state->clev, state->slev);
+ state->clev, state->slev);
if (!state->downmixed) {
state->downmixed = 1;
a52_downmix (samples + 1536, state->acmod, state->output, 0,
- state->clev, state->slev);
+ state->clev, state->slev);
}
if (blksw[0])
for (i = 0; i < nfchans; i++)
a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i,
- state->bias);
+ state->bias);
else
for (i = 0; i < nfchans; i++)
a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i,
- state->bias);
+ state->bias);
}
return 0;