summaryrefslogtreecommitdiffstats
path: root/libfaad2/filtbank.c
diff options
context:
space:
mode:
authorrtognimp <rtognimp@b3059339-0415-0410-9bf9-f77b7e298cf2>2006-04-18 19:39:34 +0000
committerrtognimp <rtognimp@b3059339-0415-0410-9bf9-f77b7e298cf2>2006-04-18 19:39:34 +0000
commit82361d50d0dcbb72132fe1203fe152a89dd165e9 (patch)
tree4abb0b4820172c1167ddb75433038be03aa44063 /libfaad2/filtbank.c
parentb664e7f3216a22b9e3f92e4861733a3222fa11d8 (diff)
downloadmpv-82361d50d0dcbb72132fe1203fe152a89dd165e9.tar.bz2
mpv-82361d50d0dcbb72132fe1203fe152a89dd165e9.tar.xz
Update to faad2 cvs 20040915+MPlayer fixes
Patch by me and Emanuele Giaquinta git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@18142 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libfaad2/filtbank.c')
-rw-r--r--libfaad2/filtbank.c367
1 files changed, 3 insertions, 364 deletions
diff --git a/libfaad2/filtbank.c b/libfaad2/filtbank.c
index 15798847c2..4dc48dc119 100644
--- a/libfaad2/filtbank.c
+++ b/libfaad2/filtbank.c
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: filtbank.c,v 1.38 2004/06/30 12:45:56 menno Exp $
+** $Id: filtbank.c,v 1.41 2004/09/08 09:43:11 gcp Exp $
**/
#include "common.h"
@@ -87,15 +87,6 @@ fb_info *filter_bank_init(uint16_t frame_len)
}
#endif
-#ifdef USE_SSE
- if (cpu_has_sse())
- {
- fb->if_func = ifilter_bank_sse;
- } else {
- fb->if_func = ifilter_bank;
- }
-#endif
-
return fb;
}
@@ -140,30 +131,6 @@ static INLINE void imdct_long(fb_info *fb, real_t *in_data, real_t *out_data, ui
#endif
}
-#ifdef USE_SSE
-static INLINE void imdct_long_sse(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len)
-{
-#ifdef LD_DEC
- mdct_info *mdct = NULL;
-
- switch (len)
- {
- case 2048:
- case 1920:
- mdct = fb->mdct2048;
- break;
- case 1024:
- case 960:
- mdct = fb->mdct1024;
- break;
- }
-
- faad_imdct_sse(mdct, in_data, out_data);
-#else
- faad_imdct_sse(fb->mdct2048, in_data, out_data);
-#endif
-}
-#endif
#ifdef LTP_DEC
static INLINE void mdct(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len)
@@ -350,8 +317,8 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
#if 0
for (i = 0; i < 1024; i++)
{
- //printf("%d\n", time_out[i]);
- printf("0x%.8X\n", time_out[i]);
+ printf("%d\n", time_out[i]);
+ //printf("0x%.8X\n", time_out[i]);
}
#endif
@@ -362,334 +329,6 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
#endif
}
-#ifdef USE_SSE
-void ifilter_bank_sse(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
- uint8_t window_shape_prev, real_t *freq_in,
- real_t *time_out, uint8_t object_type, uint16_t frame_len)
-{
- int16_t i;
- ALIGN real_t transf_buf[2*1024] = {0};
-
- const real_t *window_long = NULL;
- const real_t *window_long_prev = NULL;
- const real_t *window_short = NULL;
- const real_t *window_short_prev = NULL;
-
- uint16_t nlong = frame_len;
- uint16_t nshort = frame_len/8;
- uint16_t trans = nshort/2;
-
- uint16_t nflat_ls = (nlong-nshort)/2;
-
-#ifdef PROFILE
- int64_t count = faad_get_ts();
-#endif
-
-#ifdef LD_DEC
- if (object_type == LD)
- {
- window_long = fb->ld_window[window_shape];
- window_long_prev = fb->ld_window[window_shape_prev];
- } else {
-#endif
- window_long = fb->long_window[window_shape];
- window_long_prev = fb->long_window[window_shape_prev];
- window_short = fb->short_window[window_shape];
- window_short_prev = fb->short_window[window_shape_prev];
-#ifdef LD_DEC
- }
-#endif
-
- switch (window_sequence)
- {
- case ONLY_LONG_SEQUENCE:
- imdct_long_sse(fb, freq_in, transf_buf, 2*nlong);
- for (i = 0; i < nlong; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
-
- m1 = _mm_load_ps(&transf_buf[i]);
- m2 = _mm_load_ps(&window_long_prev[i]);
- m6 = _mm_load_ps(&window_long[nlong-4-i]);
- m3 = _mm_load_ps(&time_out[nlong+i]);
- m5 = _mm_load_ps(&transf_buf[nlong+i]);
-
- m4 = _mm_mul_ps(m1, m2);
- m7 = _mm_shuffle_ps(m6, m6, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_add_ps(m4, m3);
- m8 = _mm_mul_ps(m5, m7);
-
- _mm_store_ps(&time_out[i], m4);
- _mm_store_ps(&time_out[nlong+i], m8);
- }
- break;
-
- case LONG_START_SEQUENCE:
- imdct_long_sse(fb, freq_in, transf_buf, 2*nlong);
- for (i = 0; i < nlong; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[i]);
- __m128 m2 = _mm_load_ps(&window_long_prev[i]);
- __m128 m3 = _mm_load_ps(&time_out[nlong+i]);
-
- __m128 m4 = _mm_mul_ps(m1, m2);
- m4 = _mm_add_ps(m4, m3);
-
- _mm_store_ps(&time_out[i], m4);
- }
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nlong+i]);
- _mm_store_ps(&time_out[nlong+i], m1);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nlong+nflat_ls+i]);
- __m128 m2 = _mm_load_ps(&window_short[nshort-4-i]);
- __m128 m3, m4;
-
- m3 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m3);
-
- _mm_store_ps(&time_out[nlong+nflat_ls+i], m4);
- }
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_setzero_ps();
- _mm_store_ps(&time_out[nlong+nflat_ls+nshort+i], m1);
- }
- break;
-
- case EIGHT_SHORT_SEQUENCE:
- faad_imdct_sse(fb->mdct256, &freq_in[0*nshort], &transf_buf[2*nshort*0]);
- faad_imdct_sse(fb->mdct256, &freq_in[1*nshort], &transf_buf[2*nshort*1]);
- faad_imdct_sse(fb->mdct256, &freq_in[2*nshort], &transf_buf[2*nshort*2]);
- faad_imdct_sse(fb->mdct256, &freq_in[3*nshort], &transf_buf[2*nshort*3]);
- faad_imdct_sse(fb->mdct256, &freq_in[4*nshort], &transf_buf[2*nshort*4]);
- faad_imdct_sse(fb->mdct256, &freq_in[5*nshort], &transf_buf[2*nshort*5]);
- faad_imdct_sse(fb->mdct256, &freq_in[6*nshort], &transf_buf[2*nshort*6]);
- faad_imdct_sse(fb->mdct256, &freq_in[7*nshort], &transf_buf[2*nshort*7]);
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_load_ps(&time_out[nlong+i]);
- _mm_store_ps(&time_out[i], m1);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nshort*0+i]);
- __m128 m2 = _mm_load_ps(&window_short_prev[i]);
- __m128 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+i]);
-
- __m128 m4 = _mm_mul_ps(m1, m2);
- m4 = _mm_add_ps(m4, m3);
-
- _mm_store_ps(&time_out[nflat_ls+i], m4);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*1+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*1+i]);
- m6 = _mm_load_ps(&transf_buf[nshort*2+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m4 = _mm_add_ps(m4, m3);
- m4 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+1*nshort+i], m4);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*3+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*2+i]);
- m6 = _mm_load_ps(&transf_buf[nshort*4+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m4 = _mm_add_ps(m4, m3);
- m4 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+2*nshort+i], m4);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*5+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*3+i]);
- m6 = _mm_load_ps(&transf_buf[nshort*6+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m4 = _mm_add_ps(m4, m3);
- m4 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+3*nshort+i], m4);
- }
- for(i = 0; i < trans; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*7+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*4+i]);
- m6 = _mm_load_ps(&transf_buf[nshort*8+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m4 = _mm_add_ps(m4, m3);
- m4 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+4*nshort+i], m4);
- }
- for (i = trans; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*7+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m6 = _mm_load_ps(&transf_buf[nshort*8+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m3 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+4*nshort+i], m3);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*9+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m6 = _mm_load_ps(&transf_buf[nshort*10+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m3 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+5*nshort+i], m3);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*11+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m6 = _mm_load_ps(&transf_buf[nshort*12+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m3 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+6*nshort+i], m3);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*13+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m6 = _mm_load_ps(&transf_buf[nshort*14+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m3 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+7*nshort+i], m3);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m5;
- m1 = _mm_load_ps(&transf_buf[nshort*15+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m3 = _mm_mul_ps(m1, m5);
-
- _mm_store_ps(&time_out[nflat_ls+8*nshort+i], m3);
- }
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_setzero_ps();
- _mm_store_ps(&time_out[nlong+nflat_ls+nshort+i], m1);
- }
- break;
-
- case LONG_STOP_SEQUENCE:
- imdct_long_sse(fb, freq_in, transf_buf, 2*nlong);
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_load_ps(&time_out[nlong+i]);
- _mm_store_ps(&time_out[i], m1);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nflat_ls+i]);
- __m128 m2 = _mm_load_ps(&window_short_prev[i]);
- __m128 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+i]);
-
- __m128 m4 = _mm_mul_ps(m1, m2);
- m4 = _mm_add_ps(m4, m3);
-
- _mm_store_ps(&time_out[nflat_ls+i], m4);
- }
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nflat_ls+nshort+i]);
- __m128 m2 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort+i]);
-
- __m128 m3 = _mm_add_ps(m1, m2);
-
- _mm_store_ps(&time_out[nflat_ls+nshort+i], m3);
- }
- for (i = 0; i < nlong; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nlong+i]);
- __m128 m2 = _mm_load_ps(&window_long[nlong-4-i]);
- __m128 m3, m4;
-
- m3 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m3);
-
- _mm_store_ps(&time_out[nlong+i], m4);
- }
- break;
- }
-
-#ifdef PROFILE
- count = faad_get_ts() - count;
- fb->cycles += count;
-#endif
-}
-#endif
#ifdef LTP_DEC
/* only works for LTP -> no overlapping, no short blocks */