diff options
author | rtognimp <rtognimp@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2006-04-18 19:39:34 +0000 |
---|---|---|
committer | rtognimp <rtognimp@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2006-04-18 19:39:34 +0000 |
commit | 82361d50d0dcbb72132fe1203fe152a89dd165e9 (patch) | |
tree | 4abb0b4820172c1167ddb75433038be03aa44063 /libfaad2 | |
parent | b664e7f3216a22b9e3f92e4861733a3222fa11d8 (diff) | |
download | mpv-82361d50d0dcbb72132fe1203fe152a89dd165e9.tar.bz2 mpv-82361d50d0dcbb72132fe1203fe152a89dd165e9.tar.xz |
Update to faad2 cvs 20040915+MPlayer fixes
Patch by me and Emanuele Giaquinta
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@18142 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libfaad2')
89 files changed, 2143 insertions, 1427 deletions
diff --git a/libfaad2/Makefile b/libfaad2/Makefile index 7e3be6a33a..ca911d054d 100644 --- a/libfaad2/Makefile +++ b/libfaad2/Makefile @@ -8,6 +8,7 @@ SRCS = bits.c \ common.c \ decoder.c \ drc.c \ + drm_dec.c \ error.c \ filtbank.c \ hcr.c \ diff --git a/libfaad2/README b/libfaad2/README index bd36b51fc2..5fb717062e 100644 --- a/libfaad2/README +++ b/libfaad2/README @@ -1,3 +1,3 @@ -This is a FAAD2 2.1 beta CVS snapshot from 2004-07-12. +This is a FAAD2 CVS snapshot from 2004-09-15. Only the content of the libfaad/ and include/ subdirectories is present here. MPlayer-specific modifications can be found in local_changes.diff. diff --git a/libfaad2/analysis.h b/libfaad2/analysis.h index 0a562eaaf1..25dd9d90ce 100644 --- a/libfaad2/analysis.h +++ b/libfaad2/analysis.h @@ -22,7 +22,7 @@ ** Commercial non-GPL licensing of this software is possible. ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. ** -** $Id: analysis.h,v 1.12 2004/02/26 09:29:26 menno Exp $ +** $Id: analysis.h,v 1.14 2004/09/04 14:56:27 menno Exp $ **/ #ifndef __ANALYSIS_H__ diff --git a/libfaad2/bits.c b/libfaad2/bits.c index 3c4cb80a24..0801e0e4ff 100644 --- a/libfaad2/bits.c +++ b/libfaad2/bits.c @@ -22,7 +22,7 @@ ** Commercial non-GPL licensing of this software is possible. ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. ** -** $Id: bits.c,v 1.37 2004/02/26 09:29:26 menno Exp $ +** $Id: bits.c,v 1.39 2004/09/04 14:56:27 menno Exp $ **/ #include "common.h" diff --git a/libfaad2/cfft.c b/libfaad2/cfft.c index 6a1b005fc3..3b81665595 100644 --- a/libfaad2/cfft.c +++ b/libfaad2/cfft.c @@ -22,7 +22,7 @@ ** Commercial non-GPL licensing of this software is possible. ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. ** -** $Id: cfft.c,v 1.27 2004/06/30 12:45:55 menno Exp $ +** $Id: cfft.c,v 1.30 2004/09/08 09:43:11 gcp Exp $ **/ /* @@ -44,14 +44,6 @@ /* static function declarations */ -#ifdef USE_SSE -static void passf2pos_sse(const uint16_t l1, const complex_t *cc, - complex_t *ch, const complex_t *wa); -static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, - complex_t *ch, const complex_t *wa); -static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, - const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); -#endif static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa); static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, @@ -74,89 +66,6 @@ static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac); passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd. ----------------------------------------------------------------------*/ -#if 0 //def USE_SSE -static void passf2pos_sse(const uint16_t l1, const complex_t *cc, - complex_t *ch, const complex_t *wa) -{ - uint16_t k, ah, ac; - - for (k = 0; k < l1; k++) - { - ah = 2*k; - ac = 4*k; - - RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); - IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); - - RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); - IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); - } -} - -static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, - complex_t *ch, const complex_t *wa) -{ - uint16_t i, k, ah, ac; - - for (k = 0; k < l1; k++) - { - ah = k*ido; - ac = 2*k*ido; - - for (i = 0; i < ido; i+=4) - { - __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14; - __m128 m15, m16, m17, m18, m19, m20, m21, m22, m23, m24; - __m128 w1, w2, w3, w4; - - m1 = _mm_load_ps(&RE(cc[ac+i])); - m2 = _mm_load_ps(&RE(cc[ac+ido+i])); - m5 = _mm_load_ps(&RE(cc[ac+i+2])); - m6 = _mm_load_ps(&RE(cc[ac+ido+i+2])); - w1 = _mm_load_ps(&RE(wa[i])); - w3 = _mm_load_ps(&RE(wa[i+2])); - - m3 = _mm_add_ps(m1, m2); - m15 = _mm_add_ps(m5, m6); - - m4 = _mm_sub_ps(m1, m2); - m16 = _mm_sub_ps(m5, m6); - - _mm_store_ps(&RE(ch[ah+i]), m3); - _mm_store_ps(&RE(ch[ah+i+2]), m15); - - - w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); - w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); - - m7 = _mm_mul_ps(m4, w1); - m17 = _mm_mul_ps(m16, w3); - m8 = _mm_mul_ps(m4, w2); - m18 = _mm_mul_ps(m16, w4); - - m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); - m19 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(2, 0, 2, 0)); - m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); - m20 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(3, 1, 3, 1)); - - m11 = _mm_add_ps(m9, m10); - m21 = _mm_add_ps(m19, m20); - m12 = _mm_sub_ps(m9, m10); - m22 = _mm_sub_ps(m19, m20); - - m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2)); - m23 = _mm_shuffle_ps(m21, m21, _MM_SHUFFLE(0, 0, 3, 2)); - - m14 = _mm_unpacklo_ps(m12, m13); - m24 = _mm_unpacklo_ps(m22, m23); - - _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14); - _mm_store_ps(&RE(ch[ah+i+2+l1*ido]), m24); - } - } -} -#endif - static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa) { @@ -385,218 +294,6 @@ static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, } } -#ifdef USE_SSE -ALIGN static const int32_t negate[4] = { 0x0, 0x0, 0x0, 0x80000000 }; - -__declspec(naked) static void passf4pos_sse(const uint16_t l1, const complex_t *cc, - complex_t *ch, const complex_t *wa1, const complex_t *wa2, - const complex_t *wa3) -{ - __asm { - push ebx - mov ebx, esp - and esp, -16 - push edi - push esi - sub esp, 8 - movzx edi, WORD PTR [ebx+8] - - movaps xmm1, XMMWORD PTR negate - - test edi, edi - jle l1_is_zero - - lea esi, DWORD PTR [edi+edi] - add esi, esi - sub esi, edi - add esi, esi - add esi, esi - add esi, esi - mov eax, DWORD PTR [ebx+16] - add esi, eax - lea ecx, DWORD PTR [edi+edi] - add ecx, ecx - add ecx, ecx - add ecx, ecx - add ecx, eax - lea edx, DWORD PTR [edi+edi] - add edx, edx - add edx, edx - add edx, eax - xor eax, eax - mov DWORD PTR [esp], ebp - mov ebp, DWORD PTR [ebx+12] - -fftloop: - lea edi, DWORD PTR [eax+eax] - add edi, edi - movaps xmm2, XMMWORD PTR [ebp+edi*8] - movaps xmm0, XMMWORD PTR [ebp+edi*8+16] - movaps xmm7, XMMWORD PTR [ebp+edi*8+32] - movaps xmm5, XMMWORD PTR [ebp+edi*8+48] - movaps xmm6, xmm2 - addps xmm6, xmm0 - movaps xmm4, xmm1 - xorps xmm4, xmm7 - movaps xmm3, xmm1 - xorps xmm3, xmm5 - xorps xmm2, xmm1 - xorps xmm0, xmm1 - addps xmm7, xmm5 - subps xmm2, xmm0 - movaps xmm0, xmm6 - shufps xmm0, xmm7, 68 - subps xmm4, xmm3 - shufps xmm6, xmm7, 238 - movaps xmm5, xmm2 - shufps xmm5, xmm4, 68 - movaps xmm3, xmm0 - addps xmm3, xmm6 - shufps xmm2, xmm4, 187 - subps xmm0, xmm6 - movaps xmm4, xmm5 - addps xmm4, xmm2 - mov edi, DWORD PTR [ebx+16] - movaps XMMWORD PTR [edi+eax*8], xmm3 - subps xmm5, xmm2 - movaps XMMWORD PTR [edx+eax*8], xmm4 - movaps XMMWORD PTR [ecx+eax*8], xmm0 - movaps XMMWORD PTR [esi+eax*8], xmm5 - add eax, 2 - movzx eax, ax - movzx edi, WORD PTR [ebx+8] - cmp eax, edi - jl fftloop - - mov ebp, DWORD PTR [esp] - -l1_is_zero: - - add esp, 8 - pop esi - pop edi - mov esp, ebx - pop ebx - ret - } -} -#endif - -#if 0 -static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, - complex_t *ch, const complex_t *wa1, const complex_t *wa2, - const complex_t *wa3) -{ - uint16_t i, k, ac, ah; - - for (k = 0; k < l1; k++) - { - ac = 4*k*ido; - ah = k*ido; - - for (i = 0; i < ido; i+=2) - { - __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, m15, m16; - __m128 n1, n2, n3, n4, n5, n6, n7, n8, n9, m17, m18, m19, m20, m21, m22, m23; - __m128 w1, w2, w3, w4, w5, w6, m24, m25, m26, m27, m28, m29, m30; - __m128 neg1 = _mm_set_ps(-1.0, 1.0, -1.0, 1.0); - - m1 = _mm_load_ps(&RE(cc[ac+i])); - m2 = _mm_load_ps(&RE(cc[ac+i+2*ido])); - m3 = _mm_add_ps(m1, m2); - m4 = _mm_sub_ps(m1, m2); - - n1 = _mm_load_ps(&RE(cc[ac+i+ido])); - n2 = _mm_load_ps(&RE(cc[ac+i+3*ido])); - n3 = _mm_add_ps(n1, n2); - - n4 = _mm_mul_ps(neg1, n1); - n5 = _mm_mul_ps(neg1, n2); - n6 = _mm_sub_ps(n4, n5); - - m5 = _mm_add_ps(m3, n3); - - n7 = _mm_shuffle_ps(n6, n6, _MM_SHUFFLE(2, 3, 0, 1)); - n8 = _mm_add_ps(m4, n7); - - m6 = _mm_sub_ps(m3, n3); - n9 = _mm_sub_ps(m4, n7); - - _mm_store_ps(&RE(ch[ah+i]), m5); - -#if 0 - static INLINE void ComplexMult(real_t *y1, real_t *y2, - real_t x1, real_t x2, real_t c1, real_t c2) - { - *y1 = MUL_F(x1, c1) + MUL_F(x2, c2); - *y2 = MUL_F(x2, c1) - MUL_F(x1, c2); - } - - m7.0 = RE(c2)*RE(wa1[i]) - m7.1 = IM(c2)*IM(wa1[i]) - m7.2 = RE(c6)*RE(wa1[i+1]) - m7.3 = IM(c6)*IM(wa1[i+1]) - - m8.0 = RE(c2)*IM(wa1[i]) - m8.1 = IM(c2)*RE(wa1[i]) - m8.2 = RE(c6)*IM(wa1[i+1]) - m8.3 = IM(c6)*RE(wa1[i+1]) - - RE(0) = m7.0 - m7.1 - IM(0) = m8.0 + m8.1 - RE(1) = m7.2 - m7.3 - IM(1) = m8.2 + m8.3 - - //// - RE(0) = RE(c2)*RE(wa1[i]) - IM(c2)*IM(wa1[i]) - IM(0) = RE(c2)*IM(wa1[i]) + IM(c2)*RE(wa1[i]) - RE(1) = RE(c6)*RE(wa1[i+1]) - IM(c6)*IM(wa1[i+1]) - IM(1) = RE(c6)*IM(wa1[i+1]) + IM(c6)*RE(wa1[i+1]) -#endif - - w1 = _mm_load_ps(&RE(wa1[i])); - w3 = _mm_load_ps(&RE(wa2[i])); - w5 = _mm_load_ps(&RE(wa3[i])); - - w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); - w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); - w6 = _mm_shuffle_ps(w5, w5, _MM_SHUFFLE(2, 3, 0, 1)); - - m7 = _mm_mul_ps(n8, w1); - m15 = _mm_mul_ps(m6, w3); - m23 = _mm_mul_ps(n9, w5); - m8 = _mm_mul_ps(n8, w2); - m16 = _mm_mul_ps(m6, w4); - m24 = _mm_mul_ps(n9, w6); - - m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); - m17 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(2, 0, 2, 0)); - m25 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(2, 0, 2, 0)); - m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); - m18 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(3, 1, 3, 1)); - m26 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(3, 1, 3, 1)); - - m11 = _mm_add_ps(m9, m10); - m19 = _mm_add_ps(m17, m18); - m27 = _mm_add_ps(m25, m26); - m12 = _mm_sub_ps(m9, m10); - m20 = _mm_sub_ps(m17, m18) |