summaryrefslogtreecommitdiffstats
path: root/liba52
diff options
context:
space:
mode:
Diffstat (limited to 'liba52')
-rw-r--r--liba52/downmix.c27
1 files changed, 27 insertions, 0 deletions
diff --git a/liba52/downmix.c b/liba52/downmix.c
index e2bcd87781..e1f70104e6 100644
--- a/liba52/downmix.c
+++ b/liba52/downmix.c
@@ -19,6 +19,8 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
*/
#include "config.h"
@@ -445,12 +447,37 @@ static void mix32toS (sample_t * samples, sample_t bias)
int i;
sample_t common, surround;
+#ifdef HAVE_SSE
+ asm volatile(
+ "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "movl $-1024, %%esi \n\t"
+ "1: \n\t"
+ "movaps 1024(%0, %%esi), %%xmm0 \n\t"
+ "movaps 3072(%0, %%esi), %%xmm2 \n\t"
+ "addps %%xmm7, %%xmm0 \n\t" // common
+ "addps 4096(%0, %%esi), %%xmm2 \n\t" // surround
+ "movaps (%0, %%esi), %%xmm1 \n\t"
+ "movaps 2048(%0, %%esi), %%xmm3 \n\t"
+ "subps %%xmm2, %%xmm1 \n\t"
+ "addps %%xmm2, %%xmm3 \n\t"
+ "addps %%xmm0, %%xmm1 \n\t"
+ "addps %%xmm0, %%xmm3 \n\t"
+ "movaps %%xmm1, (%0, %%esi) \n\t"
+ "movaps %%xmm3, 1024(%0, %%esi) \n\t"
+ "addl $16, %%esi \n\t"
+ " jnz 1b \n\t"
+ :: "r" (samples+256), "m" (bias)
+ : "%esi"
+ );
+#else
for (i = 0; i < 256; i++) {
common = samples[i + 256] + bias;
surround = samples[i + 768] + samples[i + 1024];
samples[i] += common - surround;
samples[i + 256] = samples[i + 512] + common + surround;
}
+#endif
}
static void move2to1 (sample_t * src, sample_t * dest, sample_t bias)