diff options
author | diego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2004-06-02 22:59:04 +0000 |
---|---|---|
committer | diego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2004-06-02 22:59:04 +0000 |
commit | 228ca70d485e2660c2e381d7112cbcca65c156a0 (patch) | |
tree | f7ab4303f2daa68c76271787a60d50cb1ada2e46 /libfaad2/mdct.c | |
parent | eb1dee5cbf86fba8d5081bae6071cc4a4fd68306 (diff) | |
download | mpv-228ca70d485e2660c2e381d7112cbcca65c156a0.tar.bz2 mpv-228ca70d485e2660c2e381d7112cbcca65c156a0.tar.xz |
update to the 2.0 release of faad, patch by adland
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@12528 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libfaad2/mdct.c')
-rw-r--r-- | libfaad2/mdct.c | 383 |
1 files changed, 305 insertions, 78 deletions
diff --git a/libfaad2/mdct.c b/libfaad2/mdct.c index ba0888d2ae..ff56814fdf 100644 --- a/libfaad2/mdct.c +++ b/libfaad2/mdct.c @@ -1,6 +1,6 @@ /* ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding -** Copyright (C) 2003 M. Bakker, Ahead Software AG, http://www.nero.com +** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ ** Commercial non-GPL licensing of this software is possible. ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. ** -** $Id: mdct.c,v 1.28 2003/09/30 12:43:05 menno Exp $ +** $Id: mdct.c,v 1.2 2003/10/03 22:22:27 alex Exp $ **/ /* @@ -61,57 +61,66 @@ 3: cos(2 * PI * (1/8) / N) 4: sin(2 * PI * (1/8) / N) */ -#ifndef FIXED_POINT -#ifdef _MSC_VER -#pragma warning(disable:4305) -#pragma warning(disable:4244) -#endif -real_t const_tab[][5] = -{ - { COEF_CONST(0.0312500000), COEF_CONST(0.9999952938), COEF_CONST(0.0030679568), - COEF_CONST(0.9999999265), COEF_CONST(0.0003834952) }, /* 2048 */ - { COEF_CONST(0.0322748612), COEF_CONST(0.9999946356), COEF_CONST(0.0032724866), - COEF_CONST(0.9999999404), COEF_CONST(0.0004090615) }, /* 1920 */ - { COEF_CONST(0.0441941738), COEF_CONST(0.9999811649), COEF_CONST(0.0061358847), - COEF_CONST(0.9999997020), COEF_CONST(0.0007669903) }, /* 1024 */ - { COEF_CONST(0.0456435465), COEF_CONST(0.9999786019), COEF_CONST(0.0065449383), - COEF_CONST(0.9999996424), COEF_CONST(0.0008181230) }, /* 960 */ - { COEF_CONST(0.0883883476), COEF_CONST(0.9996988177), COEF_CONST(0.0245412290), - COEF_CONST(0.9999952912), COEF_CONST(0.0030679568) }, /* 256 */ - { COEF_CONST(0.0912870929), COEF_CONST(0.9996573329), COEF_CONST(0.0261769500), - COEF_CONST(0.9999946356), COEF_CONST(0.0032724866) } /* 240 */ -#ifdef SSR_DEC - ,{ COEF_CONST(0.062500000), COEF_CONST(0.999924702), COEF_CONST(0.012271538), - COEF_CONST(0.999998823), COEF_CONST(0.00153398) }, /* 512 */ - { COEF_CONST(0.176776695), COEF_CONST(0.995184727), COEF_CONST(0.09801714), - COEF_CONST(0.999924702), COEF_CONST(0.012271538) } /* 64 */ -#endif -}; -#else +#ifdef FIXED_POINT real_t const_tab[][5] = { - { COEF_CONST(1), COEF_CONST(0.9999952938), COEF_CONST(0.0030679568), - COEF_CONST(0.9999999265), COEF_CONST(0.0003834952) }, /* 2048 */ - { COEF_CONST(/* sqrt(1024/960) */ 1.03279556), COEF_CONST(0.9999946356), COEF_CONST(0.0032724866), - COEF_CONST(0), COEF_CONST(0.0004090615) }, /* 1920 */ - { COEF_CONST(1), COEF_CONST(0.9999811649), COEF_CONST(0.0061358847), - COEF_CONST(0.9999997020), COEF_CONST(0.0007669903) }, /* 1024 */ - { COEF_CONST(/* sqrt(512/480) */ 1.03279556), COEF_CONST(0.9999786019), COEF_CONST(0.0065449383), - COEF_CONST(0.9999996424), COEF_CONST(0.0008181230) }, /* 960 */ - { COEF_CONST(1), COEF_CONST(0.9996988177), COEF_CONST(0.0245412290), - COEF_CONST(0.9999952912), COEF_CONST(0.0030679568) }, /* 256 */ - { COEF_CONST(/* sqrt(256/240) */ 1.03279556), COEF_CONST(0.9996573329), COEF_CONST(0.0261769500), - COEF_CONST(0.9999946356), COEF_CONST(0.0032724866) } /* 240 */ + { /* 2048 */ + COEF_CONST(1), + FRAC_CONST(0.99999529380957619), + FRAC_CONST(0.0030679567629659761), + FRAC_CONST(0.99999992646571789), + FRAC_CONST(0.00038349518757139556) + }, { /* 1920 */ + COEF_CONST(/* sqrt(1024/960) */ 1.0327955589886444), + FRAC_CONST(0.99999464540169647), + FRAC_CONST(0.0032724865065266251), + FRAC_CONST(0.99999991633432805), + FRAC_CONST(0.00040906153202803459) + }, { /* 1024 */ + COEF_CONST(1), + FRAC_CONST(0.99998117528260111), + FRAC_CONST(0.0061358846491544753), + FRAC_CONST(0.99999970586288223), + FRAC_CONST(0.00076699031874270449) + }, { /* 960 */ + COEF_CONST(/* sqrt(512/480) */ 1.0327955589886444), + FRAC_CONST(0.99997858166412923), + FRAC_CONST(0.0065449379673518581), + FRAC_CONST(0.99999966533732598), + FRAC_CONST(0.00081812299560725323) + }, { /* 256 */ + COEF_CONST(1), + FRAC_CONST(0.99969881869620425), + FRAC_CONST(0.024541228522912288), + FRAC_CONST(0.99999529380957619), + FRAC_CONST(0.0030679567629659761) + }, { /* 240 */ + COEF_CONST(/* sqrt(256/240) */ 1.0327955589886444), + FRAC_CONST(0.99965732497555726), + FRAC_CONST(0.026176948307873149), + FRAC_CONST(0.99999464540169647), + FRAC_CONST(0.0032724865065266251) + } #ifdef SSR_DEC - ,{ COEF_CONST(0), COEF_CONST(0.999924702), COEF_CONST(0.012271538), - COEF_CONST(0.999998823), COEF_CONST(0.00153398) }, /* 512 */ - { COEF_CONST(0), COEF_CONST(0.995184727), COEF_CONST(0.09801714), - COEF_CONST(0.999924702), COEF_CONST(0.012271538) } /* 64 */ + ,{ /* 512 */ + COEF_CONST(1), + FRAC_CONST(0.9999247018391445), + FRAC_CONST(0.012271538285719925), + FRAC_CONST(0.99999882345170188), + FRAC_CONST(0.0015339801862847655) + }, { /* 64 */ + COEF_CONST(1), + FRAC_CONST(0.99518472667219693), + FRAC_CONST(0.098017140329560604), + FRAC_CONST(0.9999247018391445), + FRAC_CONST(0.012271538285719925) + } #endif }; #endif -uint8_t map_N_to_idx(uint16_t N) +#ifdef FIXED_POINT +static uint8_t map_N_to_idx(uint16_t N) { /* gives an index into const_tab above */ /* for normal AAC deocding (eg. no scalable profile) only */ @@ -131,21 +140,25 @@ uint8_t map_N_to_idx(uint16_t N) } return 0; } +#endif mdct_info *faad_mdct_init(uint16_t N) { - uint16_t k, N_idx; + uint16_t k; +#ifdef FIXED_POINT + uint16_t N_idx; real_t cangle, sangle, c, s, cold; +#endif real_t scale; - mdct_info *mdct = (mdct_info*)malloc(sizeof(mdct_info)); + mdct_info *mdct = (mdct_info*)faad_malloc(sizeof(mdct_info)); assert(N % 8 == 0); mdct->N = N; - mdct->sincos = (complex_t*)malloc(N/4*sizeof(complex_t)); - mdct->Z1 = (complex_t*)malloc(N/4*sizeof(complex_t)); + mdct->sincos = (complex_t*)faad_malloc(N/4*sizeof(complex_t)); +#ifdef FIXED_POINT N_idx = map_N_to_idx(N); scale = const_tab[N_idx][0]; @@ -153,29 +166,37 @@ mdct_info *faad_mdct_init(uint16_t N) sangle = const_tab[N_idx][2]; c = const_tab[N_idx][3]; s = const_tab[N_idx][4]; +#else + scale = (real_t)sqrt(2.0 / (real_t)N); +#endif /* (co)sine table build using recurrence relations */ /* this can also be done using static table lookup or */ /* some form of interpolation */ for (k = 0; k < N/4; k++) { -#if 1 - RE(mdct->sincos[k]) = -1*MUL_C_C(c,scale); - IM(mdct->sincos[k]) = -1*MUL_C_C(s,scale); +#ifdef FIXED_POINT + RE(mdct->sincos[k]) = c; //MUL_C_C(c,scale); + IM(mdct->sincos[k]) = s; //MUL_C_C(s,scale); cold = c; - c = MUL_C_C(c,cangle) - MUL_C_C(s,sangle); - s = MUL_C_C(s,cangle) + MUL_C_C(cold,sangle); + c = MUL_F(c,cangle) - MUL_F(s,sangle); + s = MUL_F(s,cangle) + MUL_F(cold,sangle); #else /* no recurrence, just sines */ - RE(mdct->sincos[k]) = -scale*cos(2.0*M_PI*(k+1./8.) / (float)N); - IM(mdct->sincos[k]) = -scale*sin(2.0*M_PI*(k+1./8.) / (float)N); + RE(mdct->sincos[k]) = scale*(real_t)(cos(2.0*M_PI*(k+1./8.) / (real_t)N)); + IM(mdct->sincos[k]) = scale*(real_t)(sin(2.0*M_PI*(k+1./8.) / (real_t)N)); #endif } /* initialise fft */ mdct->cfft = cffti(N/4); +#ifdef PROFILE + mdct->cycles = 0; + mdct->fft_cycles = 0; +#endif + return mdct; } @@ -183,12 +204,16 @@ void faad_mdct_end(mdct_info *mdct) { if (mdct != NULL) { +#ifdef PROFILE + printf("MDCT[%.4d]: %I64d cycles\n", mdct->N, mdct->cycles); + printf("CFFT[%.4d]: %I64d cycles\n", mdct->N/4, mdct->fft_cycles); +#endif + cfftu(mdct->cfft); - if (mdct->Z1) free(mdct->Z1); - if (mdct->sincos) free(mdct->sincos); + if (mdct->sincos) faad_free(mdct->sincos); - free(mdct); + faad_free(mdct); } } @@ -197,7 +222,7 @@ void faad_imdct(mdct_info *mdct, real_t *X_in, real_t *X_out) uint16_t k; complex_t x; - complex_t *Z1 = mdct->Z1; + ALIGN complex_t Z1[512]; complex_t *sincos = mdct->sincos; uint16_t N = mdct->N; @@ -205,47 +230,239 @@ void faad_imdct(mdct_info *mdct, real_t *X_in, real_t *X_out) uint16_t N4 = N >> 2; uint16_t N8 = N >> 3; +#ifdef PROFILE + int64_t count1, count2 = faad_get_ts(); +#endif + /* pre-IFFT complex multiplication */ for (k = 0; k < N4; k++) { - RE(Z1[k]) = MUL_R_C(X_in[N2 - 1 - 2*k], RE(sincos[k])) - MUL_R_C(X_in[2*k], IM(sincos[k])); - IM(Z1[k]) = MUL_R_C(X_in[2*k], RE(sincos[k])) + MUL_R_C(X_in[N2 - 1 - 2*k], IM(sincos[k])); + ComplexMult(&IM(Z1[k]), &RE(Z1[k]), + X_in[2*k], X_in[N2 - 1 - 2*k], RE(sincos[k]), IM(sincos[k])); } +#ifdef PROFILE + count1 = faad_get_ts(); +#endif + /* complex IFFT, any non-scaling FFT can be used here */ cfftb(mdct->cfft, Z1); +#ifdef PROFILE + count1 = faad_get_ts() - count1; +#endif + /* post-IFFT complex multiplication */ for (k = 0; k < N4; k++) { RE(x) = RE(Z1[k]); IM(x) = IM(Z1[k]); - - RE(Z1[k]) = MUL_R_C(RE(x), RE(sincos[k])) - MUL_R_C(IM(x), IM(sincos[k])); - IM(Z1[k]) = MUL_R_C(IM(x), RE(sincos[k])) + MUL_R_C(RE(x), IM(sincos[k])); + ComplexMult(&IM(Z1[k]), &RE(Z1[k]), + IM(x), RE(x), RE(sincos[k]), IM(sincos[k])); } /* reordering */ - for (k = 0; k < N8; k++) + for (k = 0; k < N8; k+=2) { X_out[ 2*k] = IM(Z1[N8 + k]); + X_out[ 2 + 2*k] = IM(Z1[N8 + 1 + k]); + X_out[ 1 + 2*k] = -RE(Z1[N8 - 1 - k]); + X_out[ 3 + 2*k] = -RE(Z1[N8 - 2 - k]); + X_out[N4 + 2*k] = RE(Z1[ k]); + X_out[N4 + + 2 + 2*k] = RE(Z1[ 1 + k]); + X_out[N4 + 1 + 2*k] = -IM(Z1[N4 - 1 - k]); + X_out[N4 + 3 + 2*k] = -IM(Z1[N4 - 2 - k]); + X_out[N2 + 2*k] = RE(Z1[N8 + k]); + X_out[N2 + + 2 + 2*k] = RE(Z1[N8 + 1 + k]); + X_out[N2 + 1 + 2*k] = -IM(Z1[N8 - 1 - k]); + X_out[N2 + 3 + 2*k] = -IM(Z1[N8 - 2 - k]); + X_out[N2 + N4 + 2*k] = -IM(Z1[ k]); + X_out[N2 + N4 + 2 + 2*k] = -IM(Z1[ 1 + k]); + X_out[N2 + N4 + 1 + 2*k] = RE(Z1[N4 - 1 - k]); + X_out[N2 + N4 + 3 + 2*k] = RE(Z1[N4 - 2 - k]); } + +#ifdef PROFILE + count2 = faad_get_ts() - count2; + mdct->fft_cycles += count1; + mdct->cycles += (count2 - count1); +#endif } +#ifdef USE_SSE +void faad_imdct_sse(mdct_info *mdct, real_t *X_in, real_t *X_out) +{ + uint16_t k; + + ALIGN complex_t Z1[512]; + complex_t *sincos = mdct->sincos; + + uint16_t N = mdct->N; + uint16_t N2 = N >> 1; + uint16_t N4 = N >> 2; + uint16_t N8 = N >> 3; + +#ifdef PROFILE + int64_t count1, count2 = faad_get_ts(); +#endif + + /* pre-IFFT complex multiplication */ + for (k = 0; k < N4; k+=4) + { + __m128 m12, m13, m14, m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11; + __m128 n12, n13, n14, n0, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11; + n12 = _mm_load_ps(&X_in[N2 - 2*k - 8]); + m12 = _mm_load_ps(&X_in[N2 - 2*k - 4]); + m13 = _mm_load_ps(&X_in[2*k]); + n13 = _mm_load_ps(&X_in[2*k + 4]); + m1 = _mm_load_ps(&RE(sincos[k])); + n1 = _mm_load_ps(&RE(sincos[k+2])); + + m0 = _mm_shuffle_ps(m12, m13, _MM_SHUFFLE(2,0,1,3)); + m2 = _mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,3,0,1)); + m14 = _mm_shuffle_ps(m0, m0, _MM_SHUFFLE(3,1,2,0)); + n0 = _mm_shuffle_ps(n12, n13, _MM_SHUFFLE(2,0,1,3)); + n2 = _mm_shuffle_ps(n1, n1, _MM_SHUFFLE(2,3,0,1)); + n14 = _mm_shuffle_ps(n0, n0, _MM_SHUFFLE(3,1,2,0)); + + m3 = _mm_mul_ps(m14, m1); + n3 = _mm_mul_ps(n14, n1); + m4 = _mm_mul_ps(m14, m2); + n4 = _mm_mul_ps(n14, n2); + + m5 = _mm_shuffle_ps(m3, m4, _MM_SHUFFLE(2,0,2,0)); + n5 = _mm_shuffle_ps(n3, n4, _MM_SHUFFLE(2,0,2,0)); + m6 = _mm_shuffle_ps(m3, m4, _MM_SHUFFLE(3,1,3,1)); + n6 = _mm_shuffle_ps(n3, n4, _MM_SHUFFLE(3,1,3,1)); + + m7 = _mm_add_ps(m5, m6); + n7 = _mm_add_ps(n5, n6); + m8 = _mm_sub_ps(m5, m6); + n8 = _mm_sub_ps(n5, n6); + + m9 = _mm_shuffle_ps(m7, m7, _MM_SHUFFLE(3,2,3,2)); + n9 = _mm_shuffle_ps(n7, n7, _MM_SHUFFLE(3,2,3,2)); + m10 = _mm_shuffle_ps(m8, m8, _MM_SHUFFLE(1,0,1,0)); + n10 = _mm_shuffle_ps(n8, n8, _MM_SHUFFLE(1,0,1,0)); + + m11 = _mm_unpacklo_ps(m10, m9); + n11 = _mm_unpacklo_ps(n10, n9); + + _mm_store_ps(&RE(Z1[k]), m11); + _mm_store_ps(&RE(Z1[k+2]), n11); + } + +#ifdef PROFILE + count1 = faad_get_ts(); +#endif + + /* complex IFFT, any non-scaling FFT can be used here */ + cfftb_sse(mdct->cfft, Z1); + +#ifdef PROFILE + count1 = faad_get_ts() - count1; +#endif + + /* post-IFFT complex multiplication */ + for (k = 0; k < N4; k+=4) + { + __m128 m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11; + __m128 n0, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11; + m0 = _mm_load_ps(&RE(Z1[k])); + n0 = _mm_load_ps(&RE(Z1[k+2])); + m1 = _mm_load_ps(&RE(sincos[k])); + n1 = _mm_load_ps(&RE(sincos[k+2])); + + m2 = _mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,3,0,1)); + n2 = _mm_shuffle_ps(n1, n1, _MM_SHUFFLE(2,3,0,1)); + + m3 = _mm_mul_ps(m0, m1); + n3 = _mm_mul_ps(n0, n1); + m4 = _mm_mul_ps(m0, m2); + n4 = _mm_mul_ps(n0, n2); + + m5 = _mm_shuffle_ps(m3, m4, _MM_SHUFFLE(2,0,2,0)); + n5 = _mm_shuffle_ps(n3, n4, _MM_SHUFFLE(2,0,2,0)); + m6 = _mm_shuffle_ps(m3, m4, _MM_SHUFFLE(3,1,3,1)); + n6 = _mm_shuffle_ps(n3, n4, _MM_SHUFFLE(3,1,3,1)); + + m7 = _mm_add_ps(m5, m6); + n7 = _mm_add_ps(n5, n6); + m8 = _mm_sub_ps(m5, m6); + n8 = _mm_sub_ps(n5, n6); + + m9 = _mm_shuffle_ps(m7, m7, _MM_SHUFFLE(3,2,3,2)); + n9 = _mm_shuffle_ps(n7, n7, _MM_SHUFFLE(3,2,3,2)); + m10 = _mm_shuffle_ps(m8, m8, _MM_SHUFFLE(1,0,1,0)); + n10 = _mm_shuffle_ps(n8, n8, _MM_SHUFFLE(1,0,1,0)); + + m11 = _mm_unpacklo_ps(m10, m9); + n11 = _mm_unpacklo_ps(n10, n9); + + _mm_store_ps(&RE(Z1[k]), m11); + _mm_store_ps(&RE(Z1[k+2]), n11); + } + + /* reordering */ + for (k = 0; k < N8; k+=2) + { + __m128 m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m13; + __m128 n4, n5, n6, n7, n8, n9; + __m128 neg1 = _mm_set_ps(-1.0, 1.0, -1.0, 1.0); + __m128 neg2 = _mm_set_ps(-1.0, -1.0, -1.0, -1.0); + + m0 = _mm_load_ps(&RE(Z1[k])); + m1 = _mm_load_ps(&RE(Z1[N8 - 2 - k])); + m2 = _mm_load_ps(&RE(Z1[N8 + k])); + m3 = _mm_load_ps(&RE(Z1[N4 - 2 - k])); + + m10 = _mm_mul_ps(m0, neg1); + m11 = _mm_mul_ps(m1, neg2); + m13 = _mm_mul_ps(m3, neg1); + + m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3,1,2,0)); + n4 = _mm_shuffle_ps(m10, m10, _MM_SHUFFLE(3,1,2,0)); + m4 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(3,1,2,0)); + n5 = _mm_shuffle_ps(m13, m13, _MM_SHUFFLE(3,1,2,0)); + + m6 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(3,2,1,0)); + n6 = _mm_shuffle_ps(n4, n5, _MM_SHUFFLE(3,2,1,0)); + m7 = _mm_shuffle_ps(m5, m4, _MM_SHUFFLE(3,2,1,0)); + n7 = _mm_shuffle_ps(n5, n4, _MM_SHUFFLE(3,2,1,0)); + + m8 = _mm_shuffle_ps(m6, m6, _MM_SHUFFLE(0,3,1,2)); + n8 = _mm_shuffle_ps(n6, n6, _MM_SHUFFLE(2,1,3,0)); + m9 = _mm_shuffle_ps(m7, m7, _MM_SHUFFLE(2,1,3,0)); + n9 = _mm_shuffle_ps(n7, n7, _MM_SHUFFLE(0,3,1,2)); + + _mm_store_ps(&X_out[2*k], m8); + _mm_store_ps(&X_out[N4 + 2*k], n8); + _mm_store_ps(&X_out[N2 + 2*k], m9); + _mm_store_ps(&X_out[N2 + N4 + 2*k], n9); + } + +#ifdef PROFILE + count2 = faad_get_ts() - count2; + mdct->fft_cycles += count1; + mdct->cycles += (count2 - count1); +#endif +} +#endif + #ifdef LTP_DEC void faad_mdct(mdct_info *mdct, real_t *X_in, real_t *X_out) { uint16_t k; complex_t x; - complex_t *Z1 = mdct->Z1; + ALIGN complex_t Z1[512]; complex_t *sincos = mdct->sincos; uint16_t N = mdct->N; @@ -253,7 +470,11 @@ void faad_mdct(mdct_info *mdct, real_t *X_in, real_t *X_out) uint16_t N4 = N >> 2; uint16_t N8 = N >> 3; +#ifndef FIXED_POINT real_t scale = REAL_CONST(N); +#else + real_t scale = REAL_CONST(4.0/N); +#endif /* pre-FFT complex multiplication */ for (k = 0; k < N8; k++) @@ -262,14 +483,20 @@ void faad_mdct(mdct_info *mdct, real_t *X_in, real_t *X_out) RE(x) = X_in[N - N4 - 1 - n] + X_in[N - N4 + n]; IM(x) = X_in[ N4 + n] - X_in[ N4 - 1 - n]; - RE(Z1[k]) = -MUL_R_C(RE(x), RE(sincos[k])) - MUL_R_C(IM(x), IM(sincos[k])); - IM(Z1[k]) = -MUL_R_C(IM(x), RE(sincos[k])) + MUL_R_C(RE(x), IM(sincos[k])); + ComplexMult(&RE(Z1[k]), &IM(Z1[k]), + RE(x), IM(x), RE(sincos[k]), IM(sincos[k])); + + RE(Z1[k]) = MUL_R(RE(Z1[k]), scale); + IM(Z1[k]) = MUL_R(IM(Z1[k]), scale); RE(x) = X_in[N2 - 1 - n] - X_in[ n]; IM(x) = X_in[N2 + n] + X_in[N - 1 - n]; - RE(Z1[k + N8]) = -MUL_R_C(RE(x), RE(sincos[k + N8])) - MUL_R_C(IM(x), IM(sincos[k + N8])); - IM(Z1[k + N8]) = -MUL_R_C(IM(x), RE(sincos[k + N8])) + MUL_R_C(RE(x), IM(sincos[k + N8])); + ComplexMult(&RE(Z1[k + N8]), &IM(Z1[k + N8]), + RE(x), IM(x), RE(sincos[k + N8]), IM(sincos[k + N8])); + + RE(Z1[k + N8]) = MUL_R(RE(Z1[k + N8]), scale); + IM(Z1[k + N8]) = MUL_R(IM(Z1[k + N8]), scale); } /* complex FFT, any non-scaling FFT can be used here */ @@ -279,13 +506,13 @@ void faad_mdct(mdct_info *mdct, real_t *X_in, real_t *X_out) for (k = 0; k < N4; k++) { uint16_t n = k << 1; - RE(x) = MUL(MUL_R_C(RE(Z1[k]), RE(sincos[k])) + MUL_R_C(IM(Z1[k]), IM(sincos[k])), scale); - IM(x) = MUL(MUL_R_C(IM(Z1[k]), RE(sincos[k])) - MUL_R_C(RE(Z1[k]), IM(sincos[k])), scale); + ComplexMult(&RE(x), &IM(x), + RE(Z1[k]), IM(Z1[k]), RE(sincos[k]), IM(sincos[k])); - X_out[ n] = RE(x); - X_out[N2 - 1 - n] = -IM(x); - X_out[N2 + n] = IM(x); - X_out[N - 1 - n] = -RE(x); + X_out[ n] = -RE(x); + X_out[N2 - 1 - n] = IM(x); + X_out[N2 + n] = -IM(x); + X_out[N - 1 - n] = RE(x); } } #endif |