summaryrefslogtreecommitdiffstats
path: root/mp3lib/decode_MMX.c
diff options
context:
space:
mode:
authornick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2>2002-01-13 18:26:58 +0000
committernick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2>2002-01-13 18:26:58 +0000
commitfc77a62a658d6ec49683b1cccc933e406cab7bd0 (patch)
tree714584d7aa7bc5d5a099d73cc5ef2e090b1d9013 /mp3lib/decode_MMX.c
parentb6f69b1a9ccd55d5a820f9a1f45355ad9e180324 (diff)
downloadmpv-fc77a62a658d6ec49683b1cccc933e406cab7bd0.tar.bz2
mpv-fc77a62a658d6ec49683b1cccc933e406cab7bd0.tar.xz
S->C
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@4143 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'mp3lib/decode_MMX.c')
-rw-r--r--mp3lib/decode_MMX.c244
1 files changed, 244 insertions, 0 deletions
diff --git a/mp3lib/decode_MMX.c b/mp3lib/decode_MMX.c
new file mode 100644
index 0000000000..97ac7f34d9
--- /dev/null
+++ b/mp3lib/decode_MMX.c
@@ -0,0 +1,244 @@
+/*
+ * this code comes under GPL
+ * This code was taken from http://www.mpg123.org
+ * See ChangeLog of mpg123-0.59s-pre.1 for detail
+ * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
+ *
+ * Local ChangeLog:
+ * - Partial loops unrolling and removing MOVW insn from loops
+*/
+#define real float /* ugly - but only way */
+
+static unsigned long long __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
+static unsigned long long __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
+unsigned long __attribute__((aligned(8))) costab_mmx[] =
+{
+ 1056974725,
+ 1057056395,
+ 1057223771,
+ 1057485416,
+ 1057855544,
+ 1058356026,
+ 1059019886,
+ 1059897405,
+ 1061067246,
+ 1062657950,
+ 1064892987,
+ 1066774581,
+ 1069414683,
+ 1073984175,
+ 1079645762,
+ 1092815430,
+ 1057005197,
+ 1057342072,
+ 1058087743,
+ 1059427869,
+ 1061799040,
+ 1065862217,
+ 1071413542,
+ 1084439708,
+ 1057128951,
+ 1058664893,
+ 1063675095,
+ 1076102863,
+ 1057655764,
+ 1067924853,
+ 1060439283,
+};
+
+void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples,
+ short *buffs, int *bo)
+{
+
+__asm __volatile(
+ "movl %1,%%ecx\n\t"
+ "movl %2,%%edi\n\t"
+ "movl $15,%%ebx\n\t"
+ "movl %4,%%edx\n\t"
+ "leal (%%edi,%%ecx,2),%%edi\n\t"
+ "decl %%ecx\n\t"
+ "movl %3,%%esi\n\t"
+ "movl (%%edx),%%eax\n\t"
+ "jecxz .L1\n\t"
+ "decl %%eax\n\t"
+ "andl %%ebx,%%eax\n\t"
+ "leal 1088(%%esi),%%esi\n\t"
+ "movl %%eax,(%%edx)\n\t"
+".L1:\n\t"
+ "leal (%%esi,%%eax,2),%%edx\n\t"
+ "movl %%eax,%%ebp\n\t"
+ "incl %%eax\n\t"
+ "pushl %0\n\t"
+ "andl %%ebx,%%eax\n\t"
+ "leal 544(%%esi,%%eax,2),%%ecx\n\t"
+ "incl %%ebx\n\t"
+ "testl $1, %%eax\n\t"
+ "jnz .L2\n\t"
+ "xchgl %%edx,%%ecx\n\t"
+ "incl %%ebp\n\t"
+ "leal 544(%%esi),%%esi\n\t"
+".L2:\n\t"
+ "emms\n\t"
+ "pushl %%edx\n\t"
+ "pushl %%ecx\n\t"
+ "call *dct64_MMX_func\n\t"
+ "leal 1(%%ebx), %%ecx\n\t"
+ "subl %%ebp,%%ebx\n\t"
+ "pushl %%ecx\n\t"
+ "leal decwins(%%ebx,%%ebx,1), %%edx\n\t"
+ "shrl $1, %%ecx\n\t"
+".align 16\n\t"
+".L3:\n\t"
+ "movq (%%edx),%%mm0\n\t"
+ "movq 64(%%edx),%%mm4\n\t"
+ "pmaddwd (%%esi),%%mm0\n\t"
+ "pmaddwd 32(%%esi),%%mm4\n\t"
+ "movq 8(%%edx),%%mm1\n\t"
+ "movq 72(%%edx),%%mm5\n\t"
+ "pmaddwd 8(%%esi),%%mm1\n\t"
+ "pmaddwd 40(%%esi),%%mm5\n\t"
+ "movq 16(%%edx),%%mm2\n\t"
+ "movq 80(%%edx),%%mm6\n\t"
+ "pmaddwd 16(%%esi),%%mm2\n\t"
+ "pmaddwd 48(%%esi),%%mm6\n\t"
+ "movq 24(%%edx),%%mm3\n\t"
+ "movq 88(%%edx),%%mm7\n\t"
+ "pmaddwd 24(%%esi),%%mm3\n\t"
+ "pmaddwd 56(%%esi),%%mm7\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "paddd %%mm5,%%mm4\n\t"
+ "paddd %%mm2,%%mm0\n\t"
+ "paddd %%mm6,%%mm4\n\t"
+ "paddd %%mm3,%%mm0\n\t"
+ "paddd %%mm7,%%mm4\n\t"
+ "movq %%mm0,%%mm1\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "paddd %%mm5,%%mm4\n\t"
+ "psrad $13,%%mm0\n\t"
+ "psrad $13,%%mm4\n\t"
+ "packssdw %%mm0,%%mm0\n\t"
+ "packssdw %%mm4,%%mm4\n\t"
+
+ "movq (%%edi), %%mm1\n\t"
+ "punpckldq %%mm4, %%mm0\n\t"
+ "pand one_null, %%mm1\n\t"
+ "pand null_one, %%mm0\n\t"
+ "por %%mm0, %%mm1\n\t"
+ "movq %%mm1,(%%edi)\n\t"
+
+ "leal 64(%%esi),%%esi\n\t"
+ "leal 128(%%edx),%%edx\n\t"
+ "leal 8(%%edi),%%edi\n\t"
+
+ "decl %%ecx\n\t"
+ "jnz .L3\n\t"
+
+ "popl %%ecx\n\t"
+ "andl $1, %%ecx\n\t"
+ "jecxz .next_loop\n\t"
+
+ "movq (%%edx),%%mm0\n\t"
+ "pmaddwd (%%esi),%%mm0\n\t"
+ "movq 8(%%edx),%%mm1\n\t"
+ "pmaddwd 8(%%esi),%%mm1\n\t"
+ "movq 16(%%edx),%%mm2\n\t"
+ "pmaddwd 16(%%esi),%%mm2\n\t"
+ "movq 24(%%edx),%%mm3\n\t"
+ "pmaddwd 24(%%esi),%%mm3\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "paddd %%mm2,%%mm0\n\t"
+ "paddd %%mm3,%%mm0\n\t"
+ "movq %%mm0,%%mm1\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "psrad $13,%%mm0\n\t"
+ "packssdw %%mm0,%%mm0\n\t"
+ "movd %%mm0,%%eax\n\t"
+ "movw %%ax, (%%edi)\n\t"
+ "leal 32(%%esi),%%esi\n\t"
+ "leal 64(%%edx),%%edx\n\t"
+ "leal 4(%%edi),%%edi\n\t"
+
+".next_loop:\n\t"
+ "subl $64,%%esi\n\t"
+ "movl $7,%%ecx\n\t"
+".align 16\n\t"
+".L4:\n\t"
+ "movq (%%edx),%%mm0\n\t"
+ "movq 64(%%edx),%%mm4\n\t"
+ "pmaddwd (%%esi),%%mm0\n\t"
+ "pmaddwd -32(%%esi),%%mm4\n\t"
+ "movq 8(%%edx),%%mm1\n\t"
+ "movq 72(%%edx),%%mm5\n\t"
+ "pmaddwd 8(%%esi),%%mm1\n\t"
+ "pmaddwd -24(%%esi),%%mm5\n\t"
+ "movq 16(%%edx),%%mm2\n\t"
+ "movq 80(%%edx),%%mm6\n\t"
+ "pmaddwd 16(%%esi),%%mm2\n\t"
+ "pmaddwd -16(%%esi),%%mm6\n\t"
+ "movq 24(%%edx),%%mm3\n\t"
+ "movq 88(%%edx),%%mm7\n\t"
+ "pmaddwd 24(%%esi),%%mm3\n\t"
+ "pmaddwd -8(%%esi),%%mm7\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "paddd %%mm5,%%mm4\n\t"
+ "paddd %%mm2,%%mm0\n\t"
+ "paddd %%mm6,%%mm4\n\t"
+ "paddd %%mm3,%%mm0\n\t"
+ "paddd %%mm7,%%mm4\n\t"
+ "movq %%mm0,%%mm1\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "paddd %%mm0,%%mm1\n\t"
+ "paddd %%mm4,%%mm5\n\t"
+ "psrad $13,%%mm1\n\t"
+ "psrad $13,%%mm5\n\t"
+ "packssdw %%mm1,%%mm1\n\t"
+ "packssdw %%mm5,%%mm5\n\t"
+ "psubd %%mm0,%%mm0\n\t"
+ "psubd %%mm4,%%mm4\n\t"
+ "psubsw %%mm1,%%mm0\n\t"
+ "psubsw %%mm5,%%mm4\n\t"
+
+ "movq (%%edi), %%mm1\n\t"
+ "punpckldq %%mm4, %%mm0\n\t"
+ "pand one_null, %%mm1\n\t"
+ "pand null_one, %%mm0\n\t"
+ "por %%mm0, %%mm1\n\t"
+ "movq %%mm1,(%%edi)\n\t"
+
+ "subl $64,%%esi\n\t"
+ "addl $128,%%edx\n\t"
+ "leal 8(%%edi),%%edi\n\t"
+ "decl %%ecx\n\t"
+ "jnz .L4\n\t"
+
+ "movq (%%edx),%%mm0\n\t"
+ "pmaddwd (%%esi),%%mm0\n\t"
+ "movq 8(%%edx),%%mm1\n\t"
+ "pmaddwd 8(%%esi),%%mm1\n\t"
+ "movq 16(%%edx),%%mm2\n\t"
+ "pmaddwd 16(%%esi),%%mm2\n\t"
+ "movq 24(%%edx),%%mm3\n\t"
+ "pmaddwd 24(%%esi),%%mm3\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "paddd %%mm2,%%mm0\n\t"
+ "paddd %%mm3,%%mm0\n\t"
+ "movq %%mm0,%%mm1\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "paddd %%mm0,%%mm1\n\t"
+ "psrad $13,%%mm1\n\t"
+ "packssdw %%mm1,%%mm1\n\t"
+ "psubd %%mm0,%%mm0\n\t"
+ "psubsw %%mm1,%%mm0\n\t"
+ "movd %%mm0,%%eax\n\t"
+ "movw %%ax,(%%edi)\n\t"
+ "emms\n\t"
+ :
+ :"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo)
+ :"memory","%ebp","%edi","%esi","%ebx");
+}