diff options
-rw-r--r-- | liba52/a52_internal.h | 8 | ||||
-rw-r--r-- | liba52/bit_allocate.c | 2 | ||||
-rw-r--r-- | liba52/bitstream.c | 6 | ||||
-rw-r--r-- | liba52/bitstream.h | 4 | ||||
-rw-r--r-- | liba52/downmix.c | 12 | ||||
-rw-r--r-- | liba52/imdct.c | 70 | ||||
-rw-r--r-- | liba52/liba52_changes.diff | 34 | ||||
-rw-r--r-- | liba52/parse.c | 32 |
8 files changed, 85 insertions, 83 deletions
diff --git a/liba52/a52_internal.h b/liba52/a52_internal.h index d420803699..3627a2d23a 100644 --- a/liba52/a52_internal.h +++ b/liba52/a52_internal.h @@ -122,16 +122,16 @@ struct a52_state_s { #endif void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, - int start, int end, int fastleak, int slowleak, + int start, int end, int fastleak, int slowleak, expbap_t * expbap); int a52_downmix_init (int input, int flags, sample_t * level, - sample_t clev, sample_t slev); + sample_t clev, sample_t slev); void downmix_accel_init(uint32_t mm_accel); int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, - sample_t clev, sample_t slev); + sample_t clev, sample_t slev); extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias, - sample_t clev, sample_t slev); + sample_t clev, sample_t slev); extern void (*a52_upmix) (sample_t * samples, int acmod, int output); void a52_imdct_init (uint32_t mm_accel); diff --git a/liba52/bit_allocate.c b/liba52/bit_allocate.c index a5f3b77024..0567b22852 100644 --- a/liba52/bit_allocate.c +++ b/liba52/bit_allocate.c @@ -122,7 +122,7 @@ do { \ } while (0) void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, - int start, int end, int fastleak, int slowleak, + int start, int end, int fastleak, int slowleak, expbap_t * expbap) { static int slowgain[4] = {0x540, 0x4d8, 0x478, 0x410}; diff --git a/liba52/bitstream.c b/liba52/bitstream.c index 6c275109d1..0b2d8fa48f 100644 --- a/liba52/bitstream.c +++ b/liba52/bitstream.c @@ -79,9 +79,9 @@ uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits) bitstream_fill_current (state); - if(num_bits != 0) + if (num_bits != 0) result = (result << num_bits) | (state->current_word >> (32 - num_bits)); - + state->bits_left = 32 - num_bits; return result; @@ -97,7 +97,7 @@ int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits) bitstream_fill_current(state); - if(num_bits != 0) + if (num_bits != 0) result = (result << num_bits) | (state->current_word >> (32 - num_bits)); state->bits_left = 32 - num_bits; diff --git a/liba52/bitstream.h b/liba52/bitstream.h index 8576f8b282..6764b12a78 100644 --- a/liba52/bitstream.h +++ b/liba52/bitstream.h @@ -66,7 +66,7 @@ static inline uint32_t unaligned32(const void *v) { #endif //!ARCH_X86 #endif - + /* (stolen from the kernel) */ #ifdef WORDS_BIGENDIAN @@ -114,7 +114,7 @@ static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) return result; #else uint32_t result; - + if (num_bits < state->bits_left) { result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits); state->bits_left -= num_bits; diff --git a/liba52/downmix.c b/liba52/downmix.c index 5c61cee475..368773e879 100644 --- a/liba52/downmix.c +++ b/liba52/downmix.c @@ -63,9 +63,9 @@ void downmix_accel_init(uint32_t mm_accel) if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; #endif } - + int a52_downmix_init (int input, int flags, sample_t * level, - sample_t clev, sample_t slev) + sample_t clev, sample_t slev) { static uint8_t table[11][8] = { {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, @@ -96,7 +96,7 @@ int a52_downmix_init (int input, int flags, sample_t * level, output = flags & A52_CHANNEL_MASK; if (output > A52_DOLBY) return -1; - + output = table[output][input & 7]; if ((output == A52_STEREO) && @@ -180,11 +180,12 @@ int a52_downmix_init (int input, int flags, sample_t * level, *level *= 1 / (1 + 3 * LEVEL_3DB); break; } + return output; } int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, - sample_t clev, sample_t slev) + sample_t clev, sample_t slev) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { @@ -474,12 +475,13 @@ static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) static void zero (sample_t * samples) { int i; + for (i = 0; i < 256; i++) samples[i] = 0; } void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, - sample_t clev, sample_t slev) + sample_t clev, sample_t slev) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { diff --git a/liba52/imdct.c b/liba52/imdct.c index 4909fc5ce9..8a29ee3248 100644 --- a/liba52/imdct.c +++ b/liba52/imdct.c @@ -292,7 +292,7 @@ static void ifft16 (complex_t * buf) ifft4 (buf + 8); ifft4 (buf + 12); ifft_pass (buf, roots16 - 4, 4); - } +} static void ifft32 (complex_t * buf) { @@ -300,7 +300,7 @@ static void ifft32 (complex_t * buf) ifft8 (buf + 16); ifft8 (buf + 24); ifft_pass (buf, roots32 - 8, 8); - } +} static void ifft64_c (complex_t * buf) { @@ -308,8 +308,8 @@ static void ifft64_c (complex_t * buf) ifft16 (buf + 32); ifft16 (buf + 48); ifft_pass (buf, roots64 - 16, 16); - } - +} + static void ifft128_c (complex_t * buf) { ifft32 (buf); @@ -320,33 +320,33 @@ static void ifft128_c (complex_t * buf) ifft32 (buf + 64); ifft32 (buf + 96); ifft_pass (buf, roots128 - 32, 32); - } - +} + void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) { int i, k; sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; const sample_t * window = a52_imdct_window; complex_t buf[128]; - - for( i=0; i < 128; i++) { + + for (i = 0; i < 128; i++) { k = fftorder[i]; t_r = pre1[i].real; t_i = pre1[i].imag; - + buf[i].real = t_i * data[255-k] + t_r * data[k]; buf[i].imag = t_r * data[255-k] - t_i * data[k]; } - + ifft128 (buf); /* Post IFFT complex multiply plus IFFT complex conjugate*/ /* Window and convert to real valued signal */ - for(i=0; i< 64; i++) { + for (i = 0; i < 64; i++) { /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ t_r = post1[i].real; t_i = post1[i].imag; - + a_r = t_r * buf[i].real + t_i * buf[i].imag; a_i = t_i * buf[i].real - t_r * buf[i].imag; b_r = t_i * buf[127-i].real + t_r * buf[127-i].imag; @@ -1076,7 +1076,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) { - int i,k; + int i, k; sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; const sample_t * window = a52_imdct_window; complex_t buf1[64], buf2[64]; @@ -1137,8 +1137,8 @@ void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) data[129+2*i] = delay[126-2*i] * w_2 + b_r * w_1 + bias; data[126-2*i] = delay[126-2*i] * w_1 - b_r * w_2 + bias; delay[126-2*i] = d_i; - } } +} static double besselI0 (double x) { @@ -1149,8 +1149,8 @@ static double besselI0 (double x) bessel = bessel * x / (i * i) + 1; while (--i); return bessel; - } - +} + void a52_imdct_init (uint32_t mm_accel) { int i, j, k; @@ -1178,44 +1178,44 @@ void a52_imdct_init (uint32_t mm_accel) for (i = 0; i < 31; i++) roots128[i] = cos ((M_PI / 64) * (i + 1)); - for(i=0; i< 64; i++) { + for (i = 0; i < 64; i++) { k = fftorder[i] / 2 + 64; pre1[i].real = cos ((M_PI / 256) * (k - 0.25)); pre1[i].imag = sin ((M_PI / 256) * (k - 0.25)); } - + for (i = 64; i < 128; i++) { k = fftorder[i] / 2 + 64; pre1[i].real = -cos ((M_PI / 256) * (k - 0.25)); pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25)); } - for(i=0; i< 64; i++) { + for (i = 0; i < 64; i++) { post1[i].real = cos ((M_PI / 256) * (i + 0.5)); post1[i].imag = sin ((M_PI / 256) * (i + 0.5)); } - for(i=0; i< 64; i++) { + for (i = 0; i < 64; i++) { k = fftorder[i] / 4; pre2[i].real = cos ((M_PI / 128) * (k - 0.25)); pre2[i].imag = sin ((M_PI / 128) * (k - 0.25)); -} + } for (i = 0; i < 32; i++) { post2[i].real = cos ((M_PI / 128) * (i + 0.5)); post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); } - for (i = 0; i < 128; i++) { - xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); - xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); - } - for (i = 0; i < 7; i++) { - j = 1 << i; - for (k = 0; k < j; k++) { - w[i][k].real = cos (-M_PI * k / j); - w[i][k].imag = sin (-M_PI * k / j); - } + for (i = 0; i < 128; i++) { + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); + xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); + } + for (i = 0; i < 7; i++) { + j = 1 << i; + for (k = 0; k < j; k++) { + w[i][k].real = cos (-M_PI * k / j); + w[i][k].imag = sin (-M_PI * k / j); } + } #if defined(ARCH_X86) || defined(ARCH_X86_64) for (i = 0; i < 128; i++) { sseSinCos1c[2*i+0]= xcos1[i]; @@ -1275,7 +1275,7 @@ void a52_imdct_init (uint32_t mm_accel) { fprintf (stderr, "Using SSE optimized IMDCT transform\n"); a52_imdct_512 = imdct_do_512_sse; - } + } else if(mm_accel & MM_ACCEL_X86_3DNOWEXT) { @@ -1296,7 +1296,7 @@ void a52_imdct_init (uint32_t mm_accel) fprintf(stderr, "Using AltiVec optimized IMDCT transform\n"); a52_imdct_512 = imdct_do_512_altivec; } - else + else #endif #ifdef LIBA52_DJBFFT @@ -1306,7 +1306,7 @@ void a52_imdct_init (uint32_t mm_accel) ifft64 = (void (*) (complex_t *)) fftc4_un64; } else #endif -{ + { fprintf (stderr, "No accelerated IMDCT transform found\n"); -} + } } diff --git a/liba52/liba52_changes.diff b/liba52/liba52_changes.diff index 09eefbd617..10cbc94b08 100644 --- a/liba52/liba52_changes.diff +++ b/liba52/liba52_changes.diff @@ -35,13 +35,13 @@ expbap_t * expbap); int a52_downmix_init (int input, int flags, sample_t * level, - sample_t clev, sample_t slev); + sample_t clev, sample_t slev); +void downmix_accel_init(uint32_t mm_accel); int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, - sample_t clev, sample_t slev); + sample_t clev, sample_t slev); -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, +extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias, - sample_t clev, sample_t slev); + sample_t clev, sample_t slev); -void a52_upmix (sample_t * samples, int acmod, int output); +extern void (*a52_upmix) (sample_t * samples, int acmod, int output); @@ -120,7 +120,7 @@ +#endif //!ARCH_X86 + +#endif -+ ++ /* (stolen from the kernel) */ #ifdef WORDS_BIGENDIAN @@ -254,9 +254,9 @@ + if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; +#endif +} -+ ++ int a52_downmix_init (int input, int flags, sample_t * level, - sample_t clev, sample_t slev) + sample_t clev, sample_t slev) { @@ -451,7 +480,7 @@ samples[i] = 0; @@ -264,7 +264,7 @@ -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, +void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, - sample_t clev, sample_t slev) + sample_t clev, sample_t slev) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { @@ -563,7 +592,7 @@ @@ -1533,14 +1533,14 @@ +static float __attribute__((aligned(16))) *sseW[7]= + {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6}; +static float __attribute__((aligned(16))) sseWindow[512]; -+#endif -+ ++#endif ++ /* Root values for IFFT */ static sample_t roots16[3]; static sample_t roots32[7]; @@ -245,7 +322,7 @@ ifft_pass (buf, roots128 - 32, 32); - } + } -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) @@ -1973,7 +1973,7 @@ + "add $16, %%"REG_S" \n\t" + "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap + "sub $16, %%"REG_D" \n\t" -+ " jnc 1b \n\t" ++ "jnc 1b \n\t" + "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g* + :: "b" (data), "c" (buf) + : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d @@ -2125,7 +2125,7 @@ + "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" + "add $16, %%"REG_D" \n\t" + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 -+ " jb 2b \n\t" ++ "jb 2b \n\t" + "add %2, %%"REG_S" \n\t" + "cmp %1, %%"REG_S" \n\t" + " jb 1b \n\t" @@ -2265,7 +2265,7 @@ @@ -368,7 +1153,7 @@ void a52_imdct_init (uint32_t mm_accel) - { + { - int i, k; + int i, j, k; double sum; @@ -2274,7 +2274,7 @@ @@ -420,6 +1205,99 @@ post2[i].real = cos ((M_PI / 128) * (i + 0.5)); post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); - } + } + for (i = 0; i < 128; i++) { + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); + xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); @@ -2345,7 +2345,7 @@ + { + fprintf (stderr, "Using SSE optimized IMDCT transform\n"); + a52_imdct_512 = imdct_do_512_sse; -+ } ++ } + else + if(mm_accel & MM_ACCEL_X86_3DNOWEXT) + { @@ -2366,7 +2366,7 @@ + fprintf(stderr, "Using AltiVec optimized IMDCT transform\n"); + a52_imdct_512 = imdct_do_512_altivec; + } -+ else ++ else +#endif #ifdef LIBA52_DJBFFT @@ -2437,7 +2437,7 @@ + mm_accel &=~MM_ACCEL_X86_SSE; + fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n"); + } -+ ++ if (state->samples == NULL) { free (state); return NULL; diff --git a/liba52/parse.c b/liba52/parse.c index 0791123366..31dfe4652e 100644 --- a/liba52/parse.c +++ b/liba52/parse.c @@ -78,10 +78,10 @@ a52_state_t * a52_init (uint32_t mm_accel) mm_accel &=~MM_ACCEL_X86_SSE; fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n"); } - + if (state->samples == NULL) { free (state); - return NULL; + return NULL; } for (i = 0; i < 256 * 12; i++) @@ -90,10 +90,10 @@ a52_state_t * a52_init (uint32_t mm_accel) state->downmixed = 1; state->lfsr_state = 1; - + a52_imdct_init (mm_accel); downmix_accel_init(mm_accel); - + return state; } @@ -174,7 +174,7 @@ int a52_frame (a52_state_t * state, uint8_t * buf, int * flags, state->lfeon = bitstream_get (state, 1); state->output = a52_downmix_init (acmod, *flags, level, - state->clev, state->slev); + state->clev, state->slev); if (state->output < 0) return 1; if (state->lfeon && (*flags & A52_LFE)) @@ -580,7 +580,7 @@ int a52_block (a52_state_t * state) state->chincpl = 0; if (bitstream_get (state, 1)) { /* cplinu */ static uint8_t bndtab[16] = {31, 35, 37, 39, 41, 42, 43, 44, - 45, 45, 46, 46, 47, 47, 48, 48}; + 45, 45, 46, 46, 47, 47, 48, 48}; int cplbegf; int cplendf; int ncplsubnd; @@ -608,7 +608,7 @@ int a52_block (a52_state_t * state) if (bitstream_get (state, 1)) { state->cplbndstrc |= 1 << i; state->ncplbnd--; - } + } } } @@ -755,7 +755,7 @@ int a52_block (a52_state_t * state) } else { if (state->chincpl && (do_bit_alloc & 64)) /* cplinu */ a52_bit_allocate (state, &state->cplba, state->cplstrtbnd, - state->cplstrtmant, state->cplendmant, + state->cplstrtmant, state->cplendmant, state->cplfleak << 8, state->cplsleak << 8, &state->cpl_expbap); for (i = 0; i < nfchans; i++) @@ -782,7 +782,7 @@ int a52_block (a52_state_t * state) samples += 256; /* shift for LFE channel */ chanbias = a52_downmix_coeff (coeff, state->acmod, state->output, - state->dynrng, state->clev, state->slev); + state->dynrng, state->clev, state->slev); quantizer.q1_ptr = quantizer.q2_ptr = quantizer.q4_ptr = -1; done_cpl = 0; @@ -874,10 +874,10 @@ int a52_block (a52_state_t * state) if (coeff[i]) { if (blksw[i]) a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i, - bias); + bias); else a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i, - bias); + bias); } else { int j; @@ -887,27 +887,27 @@ int a52_block (a52_state_t * state) } a52_downmix (samples, state->acmod, state->output, state->bias, - state->clev, state->slev); + state->clev, state->slev); } else { nfchans = nfchans_tbl[state->output & A52_CHANNEL_MASK]; a52_downmix (samples, state->acmod, state->output, 0, - state->clev, state->slev); + state->clev, state->slev); if (!state->downmixed) { state->downmixed = 1; a52_downmix (samples + 1536, state->acmod, state->output, 0, - state->clev, state->slev); + state->clev, state->slev); } if (blksw[0]) for (i = 0; i < nfchans; i++) a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i, - state->bias); + state->bias); else for (i = 0; i < nfchans; i++) a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i, - state->bias); + state->bias); } return 0; |