diff options
author | michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-12-30 19:57:14 +0000 |
---|---|---|
committer | michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-12-30 19:57:14 +0000 |
commit | 7dc503f36b7666faad798ece47da236ce6508f56 (patch) | |
tree | dba002dee760fabec0870696c5bb57a45f311c80 /liba52/downmix.c | |
parent | 1b72f7b2276494c50240e5dbce068753449d9454 (diff) | |
download | mpv-7dc503f36b7666faad798ece47da236ce6508f56.tar.bz2 mpv-7dc503f36b7666faad798ece47da236ce6508f56.tar.xz |
runtime cpudetect
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@3905 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'liba52/downmix.c')
-rw-r--r-- | liba52/downmix.c | 638 |
1 files changed, 442 insertions, 196 deletions
diff --git a/liba52/downmix.c b/liba52/downmix.c index 07cb396d9a..deb192a204 100644 --- a/liba52/downmix.c +++ b/liba52/downmix.c @@ -24,6 +24,7 @@ */ #include "config.h" +#include "../cpudetect.h" #include <string.h> #include <inttypes.h> @@ -33,6 +34,20 @@ #define CONVERT(acmod,output) (((output) << 3) + (acmod)) +//#undef HAVE_SSE +//#undef HAVE_MMX + +void (*downmix)(sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev)= NULL; +void (*upmix)(sample_t * samples, int acmod, int output)= NULL; + +static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev); +static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev); +static void upmix_MMX (sample_t * samples, int acmod, int output); +static void upmix_C (sample_t * samples, int acmod, int output); + int downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev) { @@ -62,6 +77,13 @@ int downmix_init (int input, int flags, sample_t * level, }; int output; + upmix= upmix_C; + downmix= downmix_C; +#ifdef ARCH_X86 + if(gCpuCaps.hasMMX) upmix= upmix_MMX; + if(gCpuCaps.hasSSE) downmix= downmix_SSE; +#endif + output = flags & A52_CHANNEL_MASK; if (output > A52_DOLBY) return -1; @@ -305,7 +327,359 @@ static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) { int i; -#ifdef HAVE_SSE + for (i = 0; i < 256; i++) + dest[i] += src[i] + bias; +} + +static void mix3to1 (sample_t * samples, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] += samples[i + 256] + samples[i + 512] + bias; +} + +static void mix4to1 (sample_t * samples, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] += (samples[i + 256] + samples[i + 512] + + samples[i + 768] + bias); +} + +static void mix5to1 (sample_t * samples, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] += (samples[i + 256] + samples[i + 512] + + samples[i + 768] + samples[i + 1024] + bias); +} + +static void mix3to2 (sample_t * samples, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] + bias; + samples[i] += common; + samples[i + 256] = samples[i + 512] + common; + } +} + +static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = right[i + 256] + bias; + left[i] += common; + right[i] += common; + } +} + +static void mix21toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t surround; + + for (i = 0; i < 256; i++) { + surround = samples[i + 512]; + samples[i] += bias - surround; + samples[i + 256] += bias + surround; + } +} + +static void mix31to2 (sample_t * samples, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] + samples[i + 768] + bias; + samples[i] += common; + samples[i + 256] = samples[i + 512] + common; + } +} + +static void mix31toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t common, surround; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] + bias; + surround = samples[i + 768]; + samples[i] += common - surround; + samples[i + 256] = samples[i + 512] + common + surround; + } +} + +static void mix22toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t surround; + + for (i = 0; i < 256; i++) { + surround = samples[i + 512] + samples[i + 768]; + samples[i] += bias - surround; + samples[i + 256] += bias + surround; + } +} + +static void mix32to2 (sample_t * samples, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] + bias; + samples[i] += common + samples[i + 768]; + samples[i + 256] = common + samples[i + 512] + samples[i + 1024]; + } +} + +static void mix32toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t common, surround; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] + bias; + surround = samples[i + 768] + samples[i + 1024]; + samples[i] += common - surround; + samples[i + 256] = samples[i + 512] + common + surround; + } +} + +static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + dest[i] = src[i] + src[i + 256] + bias; +} + +static void zero (sample_t * samples) +{ + int i; + for (i = 0; i < 256; i++) + samples[i] = 0; +} + +static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev) +{ + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { + + case CONVERT (A52_CHANNEL, A52_CHANNEL2): + memcpy (samples, samples + 256, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_CHANNEL, A52_MONO): + case CONVERT (A52_STEREO, A52_MONO): + mix_2to1: + mix2to1 (samples, samples + 256, bias); + break; + + case CONVERT (A52_2F1R, A52_MONO): + if (slev == 0) + goto mix_2to1; + case CONVERT (A52_3F, A52_MONO): + mix_3to1: + mix3to1 (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_MONO): + if (slev == 0) + goto mix_3to1; + case CONVERT (A52_2F2R, A52_MONO): + if (slev == 0) + goto mix_2to1; + mix4to1 (samples, bias); + break; + + case CONVERT (A52_3F2R, A52_MONO): + if (slev == 0) + goto mix_3to1; + mix5to1 (samples, bias); + break; + + case CONVERT (A52_MONO, A52_DOLBY): + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F, A52_STEREO): + case CONVERT (A52_3F, A52_DOLBY): + mix_3to2: + mix3to2 (samples, bias); + break; + + case CONVERT (A52_2F1R, A52_STEREO): + if (slev == 0) + break; + mix21to2 (samples, samples + 256, bias); + break; + + case CONVERT (A52_2F1R, A52_DOLBY): + mix21toS (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_STEREO): + if (slev == 0) + goto mix_3to2; + mix31to2 (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_DOLBY): + mix31toS (samples, bias); + break; + + case CONVERT (A52_2F2R, A52_STEREO): + if (slev == 0) + break; + mix2to1 (samples, samples + 512, bias); + mix2to1 (samples + 256, samples + 768, bias); + break; + + case CONVERT (A52_2F2R, A52_DOLBY): + mix22toS (samples, bias); + break; + + case CONVERT (A52_3F2R, A52_STEREO): + if (slev == 0) + goto mix_3to2; + mix32to2 (samples, bias); + break; + + case CONVERT (A52_3F2R, A52_DOLBY): + mix32toS (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_3F): + if (slev == 0) + break; + mix21to2 (samples, samples + 512, bias); + break; + + case CONVERT (A52_3F2R, A52_3F): + if (slev == 0) + break; + mix2to1 (samples, samples + 768, bias); + mix2to1 (samples + 512, samples + 1024, bias); + break; + + case CONVERT (A52_3F1R, A52_2F1R): + mix3to2 (samples, bias); + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_2F2R, A52_2F1R): + mix2to1 (samples + 512, samples + 768, bias); + break; + + case CONVERT (A52_3F2R, A52_2F1R): + mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) + move2to1 (samples + 768, samples + 512, bias); + break; + + case CONVERT (A52_3F2R, A52_3F1R): + mix2to1 (samples + 768, samples + 1024, bias); + break; + + case CONVERT (A52_2F1R, A52_2F2R): + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F1R, A52_2F2R): + mix3to2 (samples, bias); + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F2R, A52_2F2R): + mix3to2 (samples, bias); + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); + memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F1R, A52_3F2R): + memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); + break; + } +} + +static void upmix_C (sample_t * samples, int acmod, int output) +{ + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { + + case CONVERT (A52_CHANNEL, A52_CHANNEL2): + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F2R, A52_MONO): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_MONO): + case CONVERT (A52_2F2R, A52_MONO): + zero (samples + 768); + case CONVERT (A52_3F, A52_MONO): + case CONVERT (A52_2F1R, A52_MONO): + zero (samples + 512); + case CONVERT (A52_CHANNEL, A52_MONO): + case CONVERT (A52_STEREO, A52_MONO): + zero (samples + 256); + break; + + case CONVERT (A52_3F2R, A52_STEREO): + case CONVERT (A52_3F2R, A52_DOLBY): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_STEREO): + case CONVERT (A52_3F1R, A52_DOLBY): + zero (samples + 768); + case CONVERT (A52_3F, A52_STEREO): + case CONVERT (A52_3F, A52_DOLBY): + mix_3to2: + memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); + zero (samples + 256); + break; + + case CONVERT (A52_2F2R, A52_STEREO): + case CONVERT (A52_2F2R, A52_DOLBY): + zero (samples + 768); + case CONVERT (A52_2F1R, A52_STEREO): + case CONVERT (A52_2F1R, A52_DOLBY): + zero (samples + 512); + break; + + case CONVERT (A52_3F2R, A52_3F): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_3F): + case CONVERT (A52_2F2R, A52_2F1R): + zero (samples + 768); + break; + + case CONVERT (A52_3F2R, A52_3F1R): + zero (samples + 1024); + break; + + case CONVERT (A52_3F2R, A52_2F1R): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_2F1R): + mix_31to21: + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); + goto mix_3to2; + + case CONVERT (A52_3F2R, A52_2F2R): + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); + goto mix_31to21; + } +} + +#ifdef ARCH_X86 +static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) +{ asm volatile( "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -324,17 +698,10 @@ static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) :: "r" (src+256), "r" (dest+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) - dest[i] += src[i] + bias; -#endif } -static void mix3to1 (sample_t * samples, sample_t bias) +static void mix3to1_SSE (sample_t * samples, sample_t bias) { - int i; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -351,17 +718,10 @@ static void mix3to1 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) - samples[i] += samples[i + 256] + samples[i + 512] + bias; -#endif } -static void mix4to1 (sample_t * samples, sample_t bias) +static void mix4to1_SSE (sample_t * samples, sample_t bias) { - int i; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -379,18 +739,10 @@ static void mix4to1 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) - samples[i] += (samples[i + 256] + samples[i + 512] + - samples[i + 768] + bias); -#endif } -static void mix5to1 (sample_t * samples, sample_t bias) +static void mix5to1_SSE (sample_t * samples, sample_t bias) { - int i; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -409,19 +761,10 @@ static void mix5to1 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) - samples[i] += (samples[i + 256] + samples[i + 512] + - samples[i + 768] + samples[i + 1024] + bias); -#endif } -static void mix3to2 (sample_t * samples, sample_t bias) +static void mix3to2_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t common; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -440,21 +783,10 @@ static void mix3to2 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = samples[i + 256] + bias; - samples[i] += common; - samples[i + 256] = samples[i + 512] + common; - } -#endif } -static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) +static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) { - int i; - sample_t common; - -#ifdef HAVE_SSE asm volatile( "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -473,21 +805,10 @@ static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) :: "r" (left+256), "r" (right+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = right[i + 256] + bias; - left[i] += common; - right[i] += common; - } -#endif } -static void mix21toS (sample_t * samples, sample_t bias) +static void mix21toS_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t surround; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -507,21 +828,10 @@ static void mix21toS (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - surround = samples[i + 512]; - samples[i] += bias - surround; - samples[i + 256] += bias + surround; - } -#endif } -static void mix31to2 (sample_t * samples, sample_t bias) +static void mix31to2_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t common; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -541,21 +851,10 @@ static void mix31to2 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = samples[i + 256] + samples[i + 768] + bias; - samples[i] += common; - samples[i + 256] = samples[i + 512] + common; - } -#endif } -static void mix31toS (sample_t * samples, sample_t bias) +static void mix31toS_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t common, surround; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -577,22 +876,10 @@ static void mix31toS (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = samples[i + 256] + bias; - surround = samples[i + 768]; - samples[i] += common - surround; - samples[i + 256] = samples[i + 512] + common + surround; - } -#endif } -static void mix22toS (sample_t * samples, sample_t bias) +static void mix22toS_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t surround; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -613,21 +900,10 @@ static void mix22toS (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - surround = samples[i + 512] + samples[i + 768]; - samples[i] += bias - surround; - samples[i + 256] += bias + surround; - } -#endif } -static void mix32to2 (sample_t * samples, sample_t bias) +static void mix32to2_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t common; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -647,21 +923,10 @@ static void mix32to2 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = samples[i + 256] + bias; - samples[i] += common + samples[i + 768]; - samples[i + 256] = common + samples[i + 512] + samples[i + 1024]; - } -#endif } -static void mix32toS (sample_t * samples, sample_t bias) +static void mix32toS_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t common, surround; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -684,21 +949,10 @@ static void mix32toS (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = samples[i + 256] + bias; - surround = samples[i + 768] + samples[i + 1024]; - samples[i] += common - surround; - samples[i + 256] = samples[i + 512] + common + surround; - } -#endif } -static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) +static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) { - int i; - -#ifdef HAVE_SSE asm volatile( "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -717,16 +971,10 @@ static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) :: "r" (src+256), "r" (dest+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) - dest[i] = src[i] + src[i + 256] + bias; -#endif } -static void zero (sample_t * samples) +static void zero_MMX(sample_t * samples) { - int i; -#ifdef HAVE_MMX asm volatile( "movl $-1024, %%esi \n\t" "pxor %%mm0, %%mm0 \n\t" @@ -741,13 +989,10 @@ static void zero (sample_t * samples) :: "r" (samples+256) : "%esi" ); -#else - for (i = 0; i < 256; i++) - samples[i] = 0; -#endif } -void downmix (sample_t * samples, int acmod, int output, sample_t bias, + +static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, sample_t clev, sample_t slev) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { @@ -758,31 +1003,31 @@ void downmix (sample_t * samples, int acmod, int output, sample_t bias, case CONVERT (A52_CHANNEL, A52_MONO): case CONVERT (A52_STEREO, A52_MONO): - mix_2to1: - mix2to1 (samples, samples + 256, bias); + mix_2to1_SSE: + mix2to1_SSE (samples, samples + 256, bias); break; case CONVERT (A52_2F1R, A52_MONO): if (slev == 0) - goto mix_2to1; + goto mix_2to1_SSE; case CONVERT (A52_3F, A52_MONO): - mix_3to1: - mix3to1 (samples, bias); + mix_3to1_SSE: + mix3to1_SSE (samples, bias); break; case CONVERT (A52_3F1R, A52_MONO): if (slev == 0) - goto mix_3to1; + goto mix_3to1_SSE; case CONVERT (A52_2F2R, A52_MONO): if (slev == 0) - goto mix_2to1; - mix4to1 (samples, bias); + goto mix_2to1_SSE; + mix4to1_SSE (samples, bias); break; case CONVERT (A52_3F2R, A52_MONO): if (slev == 0) - goto mix_3to1; - mix5to1 (samples, bias); + goto mix_3to1_SSE; + mix5to1_SSE (samples, bias); break; case CONVERT (A52_MONO, A52_DOLBY): @@ -791,80 +1036,80 @@ void downmix (sample_t * samples, int acmod, int output, sample_t bias, case CONVERT (A52_3F, A52_STEREO): case CONVERT (A52_3F, A52_DOLBY): - mix_3to2: - mix3to2 (samples, bias); + mix_3to2_SSE: + mix3to2_SSE (samples, bias); break; case CONVERT (A52_2F1R, A52_STEREO): if (slev == 0) break; - mix21to2 (samples, samples + 256, bias); + mix21to2_SSE (samples, samples + 256, bias); break; case CONVERT (A52_2F1R, A52_DOLBY): - mix21toS (samples, bias); + mix21toS_SSE (samples, bias); break; case CONVERT (A52_3F1R, A52_STEREO): if (slev == 0) - goto mix_3to2; - mix31to2 (samples, bias); + goto mix_3to2_SSE; + mix31to2_SSE (samples, bias); break; case CONVERT (A52_3F1R, A52_DOLBY): - mix31toS (samples, bias); + mix31toS_SSE (samples, bias); break; case CONVERT (A52_2F2R, A52_STEREO): if (slev == 0) break; - mix2to1 (samples, samples + 512, bias); - mix2to1 (samples + 256, samples + 768, bias); + mix2to1_SSE (samples, samples + 512, bias); + mix2to1_SSE (samples + 256, samples + 768, bias); break; case CONVERT (A52_2F2R, A52_DOLBY): - mix22toS (samples, bias); + mix22toS_SSE (samples, bias); break; case CONVERT (A52_3F2R, A52_STEREO): if (slev == 0) - goto mix_3to2; - mix32to2 (samples, bias); + goto mix_3to2_SSE; + mix32to2_SSE (samples, bias); break; case CONVERT (A52_3F2R, A52_DOLBY): - mix32toS (samples, bias); + mix32toS_SSE (samples, bias); break; case CONVERT (A52_3F1R, A52_3F): if (slev == 0) break; - mix21to2 (samples, samples + 512, bias); + mix21to2_SSE (samples, samples + 512, bias); break; case CONVERT (A52_3F2R, A52_3F): if (slev == 0) break; - mix2to1 (samples, samples + 768, bias); - mix2to1 (samples + 512, samples + 1024, bias); + mix2to1_SSE (samples, samples + 768, bias); + mix2to1_SSE (samples + 512, samples + 1024, bias); break; case CONVERT (A52_3F1R, A52_2F1R): - mix3to2 (samples, bias); + mix3to2_SSE (samples, bias); memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); break; case CONVERT (A52_2F2R, A52_2F1R): - mix2to1 (samples + 512, samples + 768, bias); + mix2to1_SSE (samples + 512, samples + 768, bias); break; case CONVERT (A52_3F2R, A52_2F1R): - mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) - move2to1 (samples + 768, samples + 512, bias); + mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used) + move2to1_SSE (samples + 768, samples + 512, bias); break; case CONVERT (A52_3F2R, A52_3F1R): - mix2to1 (samples + 768, samples + 1024, bias); + mix2to1_SSE (samples + 768, samples + 1024, bias); break; case CONVERT (A52_2F1R, A52_2F2R): @@ -872,12 +1117,12 @@ void downmix (sample_t * samples, int acmod, int output, sample_t bias, break; case CONVERT (A52_3F1R, A52_2F2R): - mix3to2 (samples, bias); + mix3to2_SSE (samples, bias); memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); break; case CONVERT (A52_3F2R, A52_2F2R): - mix3to2 (samples, bias); + mix3to2_SSE (samples, bias); memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); break; @@ -888,7 +1133,7 @@ void downmix (sample_t * samples, int acmod, int output, sample_t bias, } } -void upmix (sample_t * samples, int acmod, int output) +static void upmix_MMX (sample_t * samples, int acmod, int output) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { @@ -897,59 +1142,60 @@ void upmix (sample_t * samples, int acmod, int output) break; case CONVERT (A52_3F2R, A52_MONO): - zero (samples + 1024); + zero_MMX (samples + 1024); case CONVERT (A52_3F1R, A52_MONO): case CONVERT (A52_2F2R, A52_MONO): - zero (samples + 768); + zero_MMX (samples + 768); case CONVERT (A52_3F, A52_MONO): case CONVERT (A52_2F1R, A52_MONO): - zero (samples + 512); + zero_MMX (samples + 512); case CONVERT (A52_CHANNEL, A52_MONO): case CONVERT (A52_STEREO, A52_MONO): - zero (samples + 256); + zero_MMX (samples + 256); break; case CONVERT (A52_3F2R, A52_STEREO): case CONVERT (A52_3F2R, A52_DOLBY): - zero (samples + 1024); + zero_MMX (samples + 1024); case CONVERT (A52_3F1R, A52_STEREO): case CONVERT (A52_3F1R, A52_DOLBY): - zero (samples + 768); + zero_MMX (samples + 768); case CONVERT (A52_3F, A52_STEREO): case CONVERT (A52_3F, A52_DOLBY): - mix_3to2: + mix_3to2_MMX: memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); - zero (samples + 256); + zero_MMX (samples + 256); break; case CONVERT (A52_2F2R, A52_STEREO): case CONVERT (A52_2F2R, A52_DOLBY): - zero (samples + 768); + zero_MMX (samples + 768); case CONVERT (A52_2F1R, A52_STEREO): case CONVERT (A52_2F1R, A52_DOLBY): - zero (samples + 512); + zero_MMX (samples + 512); break; case CONVERT (A52_3F2R, A52_3F): - zero (samples + 1024); + zero_MMX (samples + 1024); case CONVERT (A52_3F1R, A52_3F): case CONVERT (A52_2F2R, A52_2F1R): - zero (samples + 768); + zero_MMX (samples + 768); break; case CONVERT (A52_3F2R, A52_3F1R): - zero (samples + 1024); + zero_MMX (samples + 1024); break; case CONVERT (A52_3F2R, A52_2F1R): - zero (samples + 1024); + zero_MMX (samples + 1024); case CONVERT (A52_3F1R, A52_2F1R): - mix_31to21: + mix_31to21_MMX: memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); - goto mix_3to2; + goto mix_3to2_MMX; case CONVERT (A52_3F2R, A52_2F2R): memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); - goto mix_31to21; + goto mix_31to21_MMX; } } +#endif //ARCH_X86 |