diff options
Diffstat (limited to 'liba52/resample_mmx.c')
-rw-r--r-- | liba52/resample_mmx.c | 266 |
1 files changed, 94 insertions, 172 deletions
diff --git a/liba52/resample_mmx.c b/liba52/resample_mmx.c index 61a45ab5d4..dcbbbd46eb 100644 --- a/liba52/resample_mmx.c +++ b/liba52/resample_mmx.c @@ -1,11 +1,3 @@ -// this code come from a52dec/libao/audio_out_oss.c - -// FIXME FIXME FIXME - -// a52_resample_init should find the requested converter (from type flags -> -// given number of channels) and set up some function pointers... - -// a52_resample() should do the conversion. // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) @@ -15,137 +7,14 @@ and it would mean (C / MMX2 / MMX / 3DNOW) versions */ -#include <inttypes.h> -#include <stdio.h> -#include "a52.h" -#include "mm_accel.h" -#include "../config.h" - -int (* a52_resample) (float * _f, int16_t * s16)=NULL; - -#ifdef ARCH_X86 static uint64_t __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; static uint64_t __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; static uint64_t __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; -#endif - -static inline int16_t convert (int32_t i) -{ - if (i > 0x43c07fff) - return 32767; - else if (i < 0x43bf8000) - return -32768; - else - return i - 0x43c00000; -} - -static int chans=2; -static int flags=0; -int a52_resample_C(float * _f, int16_t * s16) -{ +static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ int i; int32_t * f = (int32_t *) _f; - - switch (flags) { - case A52_MONO: - for (i = 0; i < 256; i++) { - s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; - s16[5*i+4] = convert (f[i]); - } - break; - case A52_CHANNEL: - case A52_STEREO: - case A52_DOLBY: - for (i = 0; i < 256; i++) { - s16[2*i] = convert (f[i]); - s16[2*i+1] = convert (f[i+256]); - } - break; - case A52_3F: - for (i = 0; i < 256; i++) { - s16[5*i] = convert (f[i]); - s16[5*i+1] = convert (f[i+512]); - s16[5*i+2] = s16[5*i+3] = 0; - s16[5*i+4] = convert (f[i+256]); - } - break; - case A52_2F2R: - for (i = 0; i < 256; i++) { - s16[4*i] = convert (f[i]); - s16[4*i+1] = convert (f[i+256]); - s16[4*i+2] = convert (f[i+512]); - s16[4*i+3] = convert (f[i+768]); - } - break; - case A52_3F2R: - for (i = 0; i < 256; i++) { - s16[5*i] = convert (f[i]); - s16[5*i+1] = convert (f[i+512]); - s16[5*i+2] = convert (f[i+768]); - s16[5*i+3] = convert (f[i+1024]); - s16[5*i+4] = convert (f[i+256]); - } - break; - case A52_MONO | A52_LFE: - for (i = 0; i < 256; i++) { - s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0; - s16[6*i+4] = convert (f[i+256]); - s16[6*i+5] = convert (f[i]); - } - break; - case A52_CHANNEL | A52_LFE: - case A52_STEREO | A52_LFE: - case A52_DOLBY | A52_LFE: - for (i = 0; i < 256; i++) { - s16[6*i] = convert (f[i+256]); - s16[6*i+1] = convert (f[i+512]); - s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0; - s16[6*i+5] = convert (f[i]); - } - break; - case A52_3F | A52_LFE: - for (i = 0; i < 256; i++) { - s16[6*i] = convert (f[i+256]); - s16[6*i+1] = convert (f[i+768]); - s16[6*i+2] = s16[6*i+3] = 0; - s16[6*i+4] = convert (f[i+512]); - s16[6*i+5] = convert (f[i]); - } - break; - case A52_2F2R | A52_LFE: - for (i = 0; i < 256; i++) { - s16[6*i] = convert (f[i+256]); - s16[6*i+1] = convert (f[i+512]); - s16[6*i+2] = convert (f[i+768]); - s16[6*i+3] = convert (f[i+1024]); - s16[6*i+4] = 0; - s16[6*i+5] = convert (f[i]); - } - break; - case A52_3F2R | A52_LFE: - for (i = 0; i < 256; i++) { - s16[6*i] = convert (f[i+256]); - s16[6*i+1] = convert (f[i+768]); - s16[6*i+2] = convert (f[i+1024]); - s16[6*i+3] = convert (f[i+1280]); - s16[6*i+4] = convert (f[i+512]); - s16[6*i+5] = convert (f[i]); - } - break; - } - return chans*256; -} - -#ifdef ARCH_X86 -int a52_resample_MMX(float * _f, int16_t * s16) -{ - int i; - int32_t * f = (int32_t *) _f; - - switch (flags) { - case A52_MONO: asm volatile( "movl $-512, %%esi \n\t" "movq magicF2W, %%mm7 \n\t" @@ -178,10 +47,12 @@ int a52_resample_MMX(float * _f, int16_t * s16) :: "r" (s16+1280), "r" (f+256) :"%esi", "%edi", "memory" ); - break; - case A52_CHANNEL: - case A52_STEREO: - case A52_DOLBY: + return 5*256; +} + +static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it #ifdef HAVE_SSE asm volatile( @@ -225,8 +96,12 @@ int a52_resample_MMX(float * _f, int16_t * s16) :: "r" (s16+512), "r" (f+256) :"%esi", "memory" ); - break; - case A52_3F: + return 2*256; +} + +static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; asm volatile( "movl $-1024, %%esi \n\t" "movq magicF2W, %%mm7 \n\t" @@ -277,8 +152,12 @@ int a52_resample_MMX(float * _f, int16_t * s16) :: "r" (s16+1280), "r" (f+256) :"%esi", "%edi", "memory" ); - break; - case A52_2F2R: + return 5*256; +} + +static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; asm volatile( "movl $-1024, %%esi \n\t" "movq magicF2W, %%mm7 \n\t" @@ -325,8 +204,12 @@ int a52_resample_MMX(float * _f, int16_t * s16) :: "r" (s16+1024), "r" (f+256) :"%esi", "memory" ); - break; - case A52_3F2R: + return 4*256; +} + +static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; asm volatile( "movl $-1024, %%esi \n\t" "movq magicF2W, %%mm7 \n\t" @@ -381,8 +264,12 @@ int a52_resample_MMX(float * _f, int16_t * s16) :: "r" (s16+1280), "r" (f+256) :"%esi", "%edi", "memory" ); - break; - case A52_MONO | A52_LFE: + return 5*256; +} + +static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; asm volatile( "movl $-1024, %%esi \n\t" "movq magicF2W, %%mm7 \n\t" @@ -418,10 +305,12 @@ int a52_resample_MMX(float * _f, int16_t * s16) :: "r" (s16+1536), "r" (f+256) :"%esi", "%edi", "memory" ); - break; - case A52_CHANNEL | A52_LFE: - case A52_STEREO | A52_LFE: - case A52_DOLBY | A52_LFE: + return 6*256; +} + +static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; asm volatile( "movl $-1024, %%esi \n\t" "movq magicF2W, %%mm7 \n\t" @@ -455,8 +344,12 @@ int a52_resample_MMX(float * _f, int16_t * s16) :: "r" (s16+1536), "r" (f+256) :"%esi", "%edi", "memory" ); - break; - case A52_3F | A52_LFE: + return 6*256; +} + +static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; asm volatile( "movl $-1024, %%esi \n\t" "movq magicF2W, %%mm7 \n\t" @@ -492,8 +385,12 @@ int a52_resample_MMX(float * _f, int16_t * s16) :: "r" (s16+1536), "r" (f+256) :"%esi", "%edi", "memory" ); - break; - case A52_2F2R | A52_LFE: + return 6*256; +} + +static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; asm volatile( "movl $-1024, %%esi \n\t" "movq magicF2W, %%mm7 \n\t" @@ -535,8 +432,12 @@ int a52_resample_MMX(float * _f, int16_t * s16) :: "r" (s16+1536), "r" (f+256) :"%esi", "%edi", "memory" ); - break; - case A52_3F2R | A52_LFE: + return 6*256; +} + +static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; asm volatile( "movl $-1024, %%esi \n\t" "movq magicF2W, %%mm7 \n\t" @@ -580,27 +481,48 @@ int a52_resample_MMX(float * _f, int16_t * s16) :: "r" (s16+1536), "r" (f+256) :"%esi", "%edi", "memory" ); - break; - } - return chans*256; + return 6*256; } -#endif //arch_x86 -void a52_resample_init(uint32_t mm_accel,int _flags,int _chans){ - chans=_chans; - flags=_flags; - if(a52_resample==NULL) // only once please ;) - { - if(mm_accel & MM_ACCEL_X86_MMX) fprintf(stderr, "Using MMX optimized resampler\n"); - else fprintf(stderr, "No accelerated resampler found\n"); +static void* a52_resample_MMX(int flags, int ch){ + switch (flags) { + case A52_MONO: + if(ch==5) return a52_resample_MONO_to_5_MMX; + break; + case A52_CHANNEL: + case A52_STEREO: + case A52_DOLBY: + if(ch==2) return a52_resample_STEREO_to_2_MMX; + break; + case A52_3F: + if(ch==5) return a52_resample_3F_to_5_MMX; + break; + case A52_2F2R: + if(ch==4) return a52_resample_2F_2R_to_4_MMX; + break; + case A52_3F2R: + if(ch==5) return a52_resample_3F_2R_to_5_MMX; + break; + case A52_MONO | A52_LFE: + if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; + break; + case A52_CHANNEL | A52_LFE: + case A52_STEREO | A52_LFE: + case A52_DOLBY | A52_LFE: + if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; + break; + case A52_3F | A52_LFE: + if(ch==6) return a52_resample_3F_LFE_to_6_MMX; + break; + case A52_2F2R | A52_LFE: + if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; + break; + case A52_3F2R | A52_LFE: + if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; + break; } - -#ifdef ARCH_X86 - if(mm_accel & MM_ACCEL_X86_MMX) a52_resample= a52_resample_MMX; -#else - if(0); -#endif - else a52_resample= a52_resample_C; + return NULL; } + |