From 7dc503f36b7666faad798ece47da236ce6508f56 Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 30 Dec 2001 19:57:14 +0000 Subject: runtime cpudetect git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@3905 b3059339-0415-0410-9bf9-f77b7e298cf2 --- liba52/a52_internal.h | 4 +- liba52/downmix.c | 740 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 495 insertions(+), 249 deletions(-) (limited to 'liba52') diff --git a/liba52/a52_internal.h b/liba52/a52_internal.h index e26115d349..edd6642d5c 100644 --- a/liba52/a52_internal.h +++ b/liba52/a52_internal.h @@ -45,9 +45,9 @@ int downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev); int downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, sample_t clev, sample_t slev); -void downmix (sample_t * samples, int acmod, int output, sample_t bias, +extern void (*downmix) (sample_t * samples, int acmod, int output, sample_t bias, sample_t clev, sample_t slev); -void upmix (sample_t * samples, int acmod, int output); +extern void (*upmix) (sample_t * samples, int acmod, int output); void imdct_init (uint32_t mm_accel); extern void (* imdct_256) (sample_t * data, sample_t * delay, sample_t bias); diff --git a/liba52/downmix.c b/liba52/downmix.c index 07cb396d9a..deb192a204 100644 --- a/liba52/downmix.c +++ b/liba52/downmix.c @@ -24,6 +24,7 @@ */ #include "config.h" +#include "../cpudetect.h" #include #include @@ -33,6 +34,20 @@ #define CONVERT(acmod,output) (((output) << 3) + (acmod)) +//#undef HAVE_SSE +//#undef HAVE_MMX + +void (*downmix)(sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev)= NULL; +void (*upmix)(sample_t * samples, int acmod, int output)= NULL; + +static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev); +static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev); +static void upmix_MMX (sample_t * samples, int acmod, int output); +static void upmix_C (sample_t * samples, int acmod, int output); + int downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev) { @@ -62,6 +77,13 @@ int downmix_init (int input, int flags, sample_t * level, }; int output; + upmix= upmix_C; + downmix= downmix_C; +#ifdef ARCH_X86 + if(gCpuCaps.hasMMX) upmix= upmix_MMX; + if(gCpuCaps.hasSSE) downmix= downmix_SSE; +#endif + output = flags & A52_CHANNEL_MASK; if (output > A52_DOLBY) return -1; @@ -241,71 +263,423 @@ int downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, coeff[2] = coeff[3] = level * slev; return 15; - case CONVERT (A52_3F2R, A52_DOLBY): - clev = LEVEL_3DB; - case CONVERT (A52_3F2R, A52_2F1R): - slev = LEVEL_3DB; + case CONVERT (A52_3F2R, A52_DOLBY): + clev = LEVEL_3DB; + case CONVERT (A52_3F2R, A52_2F1R): + slev = LEVEL_3DB; + case CONVERT (A52_3F2R, A52_STEREO): + coeff[0] = coeff[2] = level; + coeff[1] = level * clev; + coeff[3] = coeff[4] = level * slev; + return 31; + + case CONVERT (A52_3F1R, A52_3F): + coeff[0] = coeff[1] = coeff[2] = level; + coeff[3] = level * slev * LEVEL_3DB; + return 13; + + case CONVERT (A52_3F2R, A52_3F): + coeff[0] = coeff[1] = coeff[2] = level; + coeff[3] = coeff[4] = level * slev; + return 29; + + case CONVERT (A52_2F2R, A52_2F1R): + coeff[0] = coeff[1] = level; + coeff[2] = coeff[3] = level * LEVEL_3DB; + return 12; + + case CONVERT (A52_3F2R, A52_3F1R): + coeff[0] = coeff[1] = coeff[2] = level; + coeff[3] = coeff[4] = level * LEVEL_3DB; + return 24; + + case CONVERT (A52_2F1R, A52_2F2R): + coeff[0] = coeff[1] = level; + coeff[2] = level * LEVEL_3DB; + return 0; + + case CONVERT (A52_3F1R, A52_2F2R): + coeff[0] = coeff[2] = level; + coeff[1] = level * clev; + coeff[3] = level * LEVEL_3DB; + return 7; + + case CONVERT (A52_3F1R, A52_3F2R): + coeff[0] = coeff[1] = coeff[2] = level; + coeff[3] = level * LEVEL_3DB; + return 0; + + case CONVERT (A52_CHANNEL, A52_CHANNEL1): + coeff[0] = level; + coeff[1] = 0; + return 0; + + case CONVERT (A52_CHANNEL, A52_CHANNEL2): + coeff[0] = 0; + coeff[1] = level; + return 0; + } + + return -1; /* NOTREACHED */ +} + +static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + dest[i] += src[i] + bias; +} + +static void mix3to1 (sample_t * samples, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] += samples[i + 256] + samples[i + 512] + bias; +} + +static void mix4to1 (sample_t * samples, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] += (samples[i + 256] + samples[i + 512] + + samples[i + 768] + bias); +} + +static void mix5to1 (sample_t * samples, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] += (samples[i + 256] + samples[i + 512] + + samples[i + 768] + samples[i + 1024] + bias); +} + +static void mix3to2 (sample_t * samples, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] + bias; + samples[i] += common; + samples[i + 256] = samples[i + 512] + common; + } +} + +static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = right[i + 256] + bias; + left[i] += common; + right[i] += common; + } +} + +static void mix21toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t surround; + + for (i = 0; i < 256; i++) { + surround = samples[i + 512]; + samples[i] += bias - surround; + samples[i + 256] += bias + surround; + } +} + +static void mix31to2 (sample_t * samples, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] + samples[i + 768] + bias; + samples[i] += common; + samples[i + 256] = samples[i + 512] + common; + } +} + +static void mix31toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t common, surround; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] + bias; + surround = samples[i + 768]; + samples[i] += common - surround; + samples[i + 256] = samples[i + 512] + common + surround; + } +} + +static void mix22toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t surround; + + for (i = 0; i < 256; i++) { + surround = samples[i + 512] + samples[i + 768]; + samples[i] += bias - surround; + samples[i + 256] += bias + surround; + } +} + +static void mix32to2 (sample_t * samples, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] + bias; + samples[i] += common + samples[i + 768]; + samples[i + 256] = common + samples[i + 512] + samples[i + 1024]; + } +} + +static void mix32toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t common, surround; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] + bias; + surround = samples[i + 768] + samples[i + 1024]; + samples[i] += common - surround; + samples[i + 256] = samples[i + 512] + common + surround; + } +} + +static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + dest[i] = src[i] + src[i + 256] + bias; +} + +static void zero (sample_t * samples) +{ + int i; + for (i = 0; i < 256; i++) + samples[i] = 0; +} + +static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev) +{ + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { + + case CONVERT (A52_CHANNEL, A52_CHANNEL2): + memcpy (samples, samples + 256, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_CHANNEL, A52_MONO): + case CONVERT (A52_STEREO, A52_MONO): + mix_2to1: + mix2to1 (samples, samples + 256, bias); + break; + + case CONVERT (A52_2F1R, A52_MONO): + if (slev == 0) + goto mix_2to1; + case CONVERT (A52_3F, A52_MONO): + mix_3to1: + mix3to1 (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_MONO): + if (slev == 0) + goto mix_3to1; + case CONVERT (A52_2F2R, A52_MONO): + if (slev == 0) + goto mix_2to1; + mix4to1 (samples, bias); + break; + + case CONVERT (A52_3F2R, A52_MONO): + if (slev == 0) + goto mix_3to1; + mix5to1 (samples, bias); + break; + + case CONVERT (A52_MONO, A52_DOLBY): + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F, A52_STEREO): + case CONVERT (A52_3F, A52_DOLBY): + mix_3to2: + mix3to2 (samples, bias); + break; + + case CONVERT (A52_2F1R, A52_STEREO): + if (slev == 0) + break; + mix21to2 (samples, samples + 256, bias); + break; + + case CONVERT (A52_2F1R, A52_DOLBY): + mix21toS (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_STEREO): + if (slev == 0) + goto mix_3to2; + mix31to2 (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_DOLBY): + mix31toS (samples, bias); + break; + + case CONVERT (A52_2F2R, A52_STEREO): + if (slev == 0) + break; + mix2to1 (samples, samples + 512, bias); + mix2to1 (samples + 256, samples + 768, bias); + break; + + case CONVERT (A52_2F2R, A52_DOLBY): + mix22toS (samples, bias); + break; + + case CONVERT (A52_3F2R, A52_STEREO): + if (slev == 0) + goto mix_3to2; + mix32to2 (samples, bias); + break; + + case CONVERT (A52_3F2R, A52_DOLBY): + mix32toS (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_3F): + if (slev == 0) + break; + mix21to2 (samples, samples + 512, bias); + break; + + case CONVERT (A52_3F2R, A52_3F): + if (slev == 0) + break; + mix2to1 (samples, samples + 768, bias); + mix2to1 (samples + 512, samples + 1024, bias); + break; + + case CONVERT (A52_3F1R, A52_2F1R): + mix3to2 (samples, bias); + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_2F2R, A52_2F1R): + mix2to1 (samples + 512, samples + 768, bias); + break; + + case CONVERT (A52_3F2R, A52_2F1R): + mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) + move2to1 (samples + 768, samples + 512, bias); + break; + + case CONVERT (A52_3F2R, A52_3F1R): + mix2to1 (samples + 768, samples + 1024, bias); + break; + + case CONVERT (A52_2F1R, A52_2F2R): + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F1R, A52_2F2R): + mix3to2 (samples, bias); + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F2R, A52_2F2R): + mix3to2 (samples, bias); + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); + memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F1R, A52_3F2R): + memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); + break; + } +} + +static void upmix_C (sample_t * samples, int acmod, int output) +{ + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { + + case CONVERT (A52_CHANNEL, A52_CHANNEL2): + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F2R, A52_MONO): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_MONO): + case CONVERT (A52_2F2R, A52_MONO): + zero (samples + 768); + case CONVERT (A52_3F, A52_MONO): + case CONVERT (A52_2F1R, A52_MONO): + zero (samples + 512); + case CONVERT (A52_CHANNEL, A52_MONO): + case CONVERT (A52_STEREO, A52_MONO): + zero (samples + 256); + break; + case CONVERT (A52_3F2R, A52_STEREO): - coeff[0] = coeff[2] = level; - coeff[1] = level * clev; - coeff[3] = coeff[4] = level * slev; - return 31; + case CONVERT (A52_3F2R, A52_DOLBY): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_STEREO): + case CONVERT (A52_3F1R, A52_DOLBY): + zero (samples + 768); + case CONVERT (A52_3F, A52_STEREO): + case CONVERT (A52_3F, A52_DOLBY): + mix_3to2: + memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); + zero (samples + 256); + break; - case CONVERT (A52_3F1R, A52_3F): - coeff[0] = coeff[1] = coeff[2] = level; - coeff[3] = level * slev * LEVEL_3DB; - return 13; + case CONVERT (A52_2F2R, A52_STEREO): + case CONVERT (A52_2F2R, A52_DOLBY): + zero (samples + 768); + case CONVERT (A52_2F1R, A52_STEREO): + case CONVERT (A52_2F1R, A52_DOLBY): + zero (samples + 512); + break; case CONVERT (A52_3F2R, A52_3F): - coeff[0] = coeff[1] = coeff[2] = level; - coeff[3] = coeff[4] = level * slev; - return 29; - + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_3F): case CONVERT (A52_2F2R, A52_2F1R): - coeff[0] = coeff[1] = level; - coeff[2] = coeff[3] = level * LEVEL_3DB; - return 12; + zero (samples + 768); + break; case CONVERT (A52_3F2R, A52_3F1R): - coeff[0] = coeff[1] = coeff[2] = level; - coeff[3] = coeff[4] = level * LEVEL_3DB; - return 24; - - case CONVERT (A52_2F1R, A52_2F2R): - coeff[0] = coeff[1] = level; - coeff[2] = level * LEVEL_3DB; - return 0; - - case CONVERT (A52_3F1R, A52_2F2R): - coeff[0] = coeff[2] = level; - coeff[1] = level * clev; - coeff[3] = level * LEVEL_3DB; - return 7; - - case CONVERT (A52_3F1R, A52_3F2R): - coeff[0] = coeff[1] = coeff[2] = level; - coeff[3] = level * LEVEL_3DB; - return 0; + zero (samples + 1024); + break; - case CONVERT (A52_CHANNEL, A52_CHANNEL1): - coeff[0] = level; - coeff[1] = 0; - return 0; + case CONVERT (A52_3F2R, A52_2F1R): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_2F1R): + mix_31to21: + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); + goto mix_3to2; - case CONVERT (A52_CHANNEL, A52_CHANNEL2): - coeff[0] = 0; - coeff[1] = level; - return 0; + case CONVERT (A52_3F2R, A52_2F2R): + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); + goto mix_31to21; } - - return -1; /* NOTREACHED */ } -static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) +#ifdef ARCH_X86 +static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) { - int i; - -#ifdef HAVE_SSE asm volatile( "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -324,17 +698,10 @@ static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) :: "r" (src+256), "r" (dest+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) - dest[i] += src[i] + bias; -#endif } -static void mix3to1 (sample_t * samples, sample_t bias) +static void mix3to1_SSE (sample_t * samples, sample_t bias) { - int i; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -351,17 +718,10 @@ static void mix3to1 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) - samples[i] += samples[i + 256] + samples[i + 512] + bias; -#endif } -static void mix4to1 (sample_t * samples, sample_t bias) +static void mix4to1_SSE (sample_t * samples, sample_t bias) { - int i; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -379,18 +739,10 @@ static void mix4to1 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) - samples[i] += (samples[i + 256] + samples[i + 512] + - samples[i + 768] + bias); -#endif } -static void mix5to1 (sample_t * samples, sample_t bias) +static void mix5to1_SSE (sample_t * samples, sample_t bias) { - int i; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -409,19 +761,10 @@ static void mix5to1 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) - samples[i] += (samples[i + 256] + samples[i + 512] + - samples[i + 768] + samples[i + 1024] + bias); -#endif } -static void mix3to2 (sample_t * samples, sample_t bias) +static void mix3to2_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t common; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -440,21 +783,10 @@ static void mix3to2 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = samples[i + 256] + bias; - samples[i] += common; - samples[i + 256] = samples[i + 512] + common; - } -#endif } -static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) +static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) { - int i; - sample_t common; - -#ifdef HAVE_SSE asm volatile( "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -473,21 +805,10 @@ static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) :: "r" (left+256), "r" (right+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = right[i + 256] + bias; - left[i] += common; - right[i] += common; - } -#endif } -static void mix21toS (sample_t * samples, sample_t bias) +static void mix21toS_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t surround; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -507,21 +828,10 @@ static void mix21toS (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - surround = samples[i + 512]; - samples[i] += bias - surround; - samples[i + 256] += bias + surround; - } -#endif } -static void mix31to2 (sample_t * samples, sample_t bias) +static void mix31to2_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t common; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -541,21 +851,10 @@ static void mix31to2 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = samples[i + 256] + samples[i + 768] + bias; - samples[i] += common; - samples[i + 256] = samples[i + 512] + common; - } -#endif } -static void mix31toS (sample_t * samples, sample_t bias) +static void mix31toS_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t common, surround; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -577,22 +876,10 @@ static void mix31toS (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = samples[i + 256] + bias; - surround = samples[i + 768]; - samples[i] += common - surround; - samples[i + 256] = samples[i + 512] + common + surround; - } -#endif } -static void mix22toS (sample_t * samples, sample_t bias) +static void mix22toS_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t surround; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -613,21 +900,10 @@ static void mix22toS (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - surround = samples[i + 512] + samples[i + 768]; - samples[i] += bias - surround; - samples[i + 256] += bias + surround; - } -#endif } -static void mix32to2 (sample_t * samples, sample_t bias) +static void mix32to2_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t common; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -647,21 +923,10 @@ static void mix32to2 (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = samples[i + 256] + bias; - samples[i] += common + samples[i + 768]; - samples[i + 256] = common + samples[i + 512] + samples[i + 1024]; - } -#endif } -static void mix32toS (sample_t * samples, sample_t bias) +static void mix32toS_SSE (sample_t * samples, sample_t bias) { - int i; - sample_t common, surround; - -#ifdef HAVE_SSE asm volatile( "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -684,21 +949,10 @@ static void mix32toS (sample_t * samples, sample_t bias) :: "r" (samples+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) { - common = samples[i + 256] + bias; - surround = samples[i + 768] + samples[i + 1024]; - samples[i] += common - surround; - samples[i + 256] = samples[i + 512] + common + surround; - } -#endif } -static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) +static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) { - int i; - -#ifdef HAVE_SSE asm volatile( "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" @@ -717,16 +971,10 @@ static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) :: "r" (src+256), "r" (dest+256), "m" (bias) : "%esi" ); -#else - for (i = 0; i < 256; i++) - dest[i] = src[i] + src[i + 256] + bias; -#endif } -static void zero (sample_t * samples) +static void zero_MMX(sample_t * samples) { - int i; -#ifdef HAVE_MMX asm volatile( "movl $-1024, %%esi \n\t" "pxor %%mm0, %%mm0 \n\t" @@ -741,13 +989,10 @@ static void zero (sample_t * samples) :: "r" (samples+256) : "%esi" ); -#else - for (i = 0; i < 256; i++) - samples[i] = 0; -#endif } -void downmix (sample_t * samples, int acmod, int output, sample_t bias, + +static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, sample_t clev, sample_t slev) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { @@ -758,31 +1003,31 @@ void downmix (sample_t * samples, int acmod, int output, sample_t bias, case CONVERT (A52_CHANNEL, A52_MONO): case CONVERT (A52_STEREO, A52_MONO): - mix_2to1: - mix2to1 (samples, samples + 256, bias); + mix_2to1_SSE: + mix2to1_SSE (samples, samples + 256, bias); break; case CONVERT (A52_2F1R, A52_MONO): if (slev == 0) - goto mix_2to1; + goto mix_2to1_SSE; case CONVERT (A52_3F, A52_MONO): - mix_3to1: - mix3to1 (samples, bias); + mix_3to1_SSE: + mix3to1_SSE (samples, bias); break; case CONVERT (A52_3F1R, A52_MONO): if (slev == 0) - goto mix_3to1; + goto mix_3to1_SSE; case CONVERT (A52_2F2R, A52_MONO): if (slev == 0) - goto mix_2to1; - mix4to1 (samples, bias); + goto mix_2to1_SSE; + mix4to1_SSE (samples, bias); break; case CONVERT (A52_3F2R, A52_MONO): if (slev == 0) - goto mix_3to1; - mix5to1 (samples, bias); + goto mix_3to1_SSE; + mix5to1_SSE (samples, bias); break; case CONVERT (A52_MONO, A52_DOLBY): @@ -791,80 +1036,80 @@ void downmix (sample_t * samples, int acmod, int output, sample_t bias, case CONVERT (A52_3F, A52_STEREO): case CONVERT (A52_3F, A52_DOLBY): - mix_3to2: - mix3to2 (samples, bias); + mix_3to2_SSE: + mix3to2_SSE (samples, bias); break; case CONVERT (A52_2F1R, A52_STEREO): if (slev == 0) break; - mix21to2 (samples, samples + 256, bias); + mix21to2_SSE (samples, samples + 256, bias); break; case CONVERT (A52_2F1R, A52_DOLBY): - mix21toS (samples, bias); + mix21toS_SSE (samples, bias); break; case CONVERT (A52_3F1R, A52_STEREO): if (slev == 0) - goto mix_3to2; - mix31to2 (samples, bias); + goto mix_3to2_SSE; + mix31to2_SSE (samples, bias); break; case CONVERT (A52_3F1R, A52_DOLBY): - mix31toS (samples, bias); + mix31toS_SSE (samples, bias); break; case CONVERT (A52_2F2R, A52_STEREO): if (slev == 0) break; - mix2to1 (samples, samples + 512, bias); - mix2to1 (samples + 256, samples + 768, bias); + mix2to1_SSE (samples, samples + 512, bias); + mix2to1_SSE (samples + 256, samples + 768, bias); break; case CONVERT (A52_2F2R, A52_DOLBY): - mix22toS (samples, bias); + mix22toS_SSE (samples, bias); break; case CONVERT (A52_3F2R, A52_STEREO): if (slev == 0) - goto mix_3to2; - mix32to2 (samples, bias); + goto mix_3to2_SSE; + mix32to2_SSE (samples, bias); break; case CONVERT (A52_3F2R, A52_DOLBY): - mix32toS (samples, bias); + mix32toS_SSE (samples, bias); break; case CONVERT (A52_3F1R, A52_3F): if (slev == 0) break; - mix21to2 (samples, samples + 512, bias); + mix21to2_SSE (samples, samples + 512, bias); break; case CONVERT (A52_3F2R, A52_3F): if (slev == 0) break; - mix2to1 (samples, samples + 768, bias); - mix2to1 (samples + 512, samples + 1024, bias); + mix2to1_SSE (samples, samples + 768, bias); + mix2to1_SSE (samples + 512, samples + 1024, bias); break; case CONVERT (A52_3F1R, A52_2F1R): - mix3to2 (samples, bias); + mix3to2_SSE (samples, bias); memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); break; case CONVERT (A52_2F2R, A52_2F1R): - mix2to1 (samples + 512, samples + 768, bias); + mix2to1_SSE (samples + 512, samples + 768, bias); break; case CONVERT (A52_3F2R, A52_2F1R): - mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) - move2to1 (samples + 768, samples + 512, bias); + mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used) + move2to1_SSE (samples + 768, samples + 512, bias); break; case CONVERT (A52_3F2R, A52_3F1R): - mix2to1 (samples + 768, samples + 1024, bias); + mix2to1_SSE (samples + 768, samples + 1024, bias); break; case CONVERT (A52_2F1R, A52_2F2R): @@ -872,12 +1117,12 @@ void downmix (sample_t * samples, int acmod, int output, sample_t bias, break; case CONVERT (A52_3F1R, A52_2F2R): - mix3to2 (samples, bias); + mix3to2_SSE (samples, bias); memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); break; case CONVERT (A52_3F2R, A52_2F2R): - mix3to2 (samples, bias); + mix3to2_SSE (samples, bias); memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); break; @@ -888,7 +1133,7 @@ void downmix (sample_t * samples, int acmod, int output, sample_t bias, } } -void upmix (sample_t * samples, int acmod, int output) +static void upmix_MMX (sample_t * samples, int acmod, int output) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { @@ -897,59 +1142,60 @@ void upmix (sample_t * samples, int acmod, int output) break; case CONVERT (A52_3F2R, A52_MONO): - zero (samples + 1024); + zero_MMX (samples + 1024); case CONVERT (A52_3F1R, A52_MONO): case CONVERT (A52_2F2R, A52_MONO): - zero (samples + 768); + zero_MMX (samples + 768); case CONVERT (A52_3F, A52_MONO): case CONVERT (A52_2F1R, A52_MONO): - zero (samples + 512); + zero_MMX (samples + 512); case CONVERT (A52_CHANNEL, A52_MONO): case CONVERT (A52_STEREO, A52_MONO): - zero (samples + 256); + zero_MMX (samples + 256); break; case CONVERT (A52_3F2R, A52_STEREO): case CONVERT (A52_3F2R, A52_DOLBY): - zero (samples + 1024); + zero_MMX (samples + 1024); case CONVERT (A52_3F1R, A52_STEREO): case CONVERT (A52_3F1R, A52_DOLBY): - zero (samples + 768); + zero_MMX (samples + 768); case CONVERT (A52_3F, A52_STEREO): case CONVERT (A52_3F, A52_DOLBY): - mix_3to2: + mix_3to2_MMX: memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); - zero (samples + 256); + zero_MMX (samples + 256); break; case CONVERT (A52_2F2R, A52_STEREO): case CONVERT (A52_2F2R, A52_DOLBY): - zero (samples + 768); + zero_MMX (samples + 768); case CONVERT (A52_2F1R, A52_STEREO): case CONVERT (A52_2F1R, A52_DOLBY): - zero (samples + 512); + zero_MMX (samples + 512); break; case CONVERT (A52_3F2R, A52_3F): - zero (samples + 1024); + zero_MMX (samples + 1024); case CONVERT (A52_3F1R, A52_3F): case CONVERT (A52_2F2R, A52_2F1R): - zero (samples + 768); + zero_MMX (samples + 768); break; case CONVERT (A52_3F2R, A52_3F1R): - zero (samples + 1024); + zero_MMX (samples + 1024); break; case CONVERT (A52_3F2R, A52_2F1R): - zero (samples + 1024); + zero_MMX (samples + 1024); case CONVERT (A52_3F1R, A52_2F1R): - mix_31to21: + mix_31to21_MMX: memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); - goto mix_3to2; + goto mix_3to2_MMX; case CONVERT (A52_3F2R, A52_2F2R): memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); - goto mix_31to21; + goto mix_31to21_MMX; } } +#endif //ARCH_X86 -- cgit v1.2.3