summaryrefslogtreecommitdiffstats
path: root/mp3lib
diff options
context:
space:
mode:
authornick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2>2002-01-13 18:26:58 +0000
committernick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2>2002-01-13 18:26:58 +0000
commitfc77a62a658d6ec49683b1cccc933e406cab7bd0 (patch)
tree714584d7aa7bc5d5a099d73cc5ef2e090b1d9013 /mp3lib
parentb6f69b1a9ccd55d5a820f9a1f45355ad9e180324 (diff)
downloadmpv-fc77a62a658d6ec49683b1cccc933e406cab7bd0.tar.bz2
mpv-fc77a62a658d6ec49683b1cccc933e406cab7bd0.tar.xz
S->C
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@4143 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'mp3lib')
-rw-r--r--mp3lib/Makefile2
-rw-r--r--mp3lib/decode_MMX.c244
-rw-r--r--mp3lib/decode_MMX.s252
3 files changed, 245 insertions, 253 deletions
diff --git a/mp3lib/Makefile b/mp3lib/Makefile
index 49c18c3a5f..eb526f3d15 100644
--- a/mp3lib/Makefile
+++ b/mp3lib/Makefile
@@ -12,7 +12,7 @@ ifeq ($(TARGET_ARCH_X86),yes)
SRCS += d_cpu.s decode_i586.s
OBJS += d_cpu.o decode_i586.o
ifeq ($(TARGET_MMX),yes)
-SRCS += decode_MMX.s dct64_MMX.s tabinit_MMX.c
+SRCS += decode_MMX.c dct64_MMX.s tabinit_MMX.c
OBJS += decode_MMX.o dct64_MMX.o tabinit_MMX.o
endif
#ifeq ($(TARGET_SSE),yes)
diff --git a/mp3lib/decode_MMX.c b/mp3lib/decode_MMX.c
new file mode 100644
index 0000000000..97ac7f34d9
--- /dev/null
+++ b/mp3lib/decode_MMX.c
@@ -0,0 +1,244 @@
+/*
+ * this code comes under GPL
+ * This code was taken from http://www.mpg123.org
+ * See ChangeLog of mpg123-0.59s-pre.1 for detail
+ * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
+ *
+ * Local ChangeLog:
+ * - Partial loops unrolling and removing MOVW insn from loops
+*/
+#define real float /* ugly - but only way */
+
+static unsigned long long __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
+static unsigned long long __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
+unsigned long __attribute__((aligned(8))) costab_mmx[] =
+{
+ 1056974725,
+ 1057056395,
+ 1057223771,
+ 1057485416,
+ 1057855544,
+ 1058356026,
+ 1059019886,
+ 1059897405,
+ 1061067246,
+ 1062657950,
+ 1064892987,
+ 1066774581,
+ 1069414683,
+ 1073984175,
+ 1079645762,
+ 1092815430,
+ 1057005197,
+ 1057342072,
+ 1058087743,
+ 1059427869,
+ 1061799040,
+ 1065862217,
+ 1071413542,
+ 1084439708,
+ 1057128951,
+ 1058664893,
+ 1063675095,
+ 1076102863,
+ 1057655764,
+ 1067924853,
+ 1060439283,
+};
+
+void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples,
+ short *buffs, int *bo)
+{
+
+__asm __volatile(
+ "movl %1,%%ecx\n\t"
+ "movl %2,%%edi\n\t"
+ "movl $15,%%ebx\n\t"
+ "movl %4,%%edx\n\t"
+ "leal (%%edi,%%ecx,2),%%edi\n\t"
+ "decl %%ecx\n\t"
+ "movl %3,%%esi\n\t"
+ "movl (%%edx),%%eax\n\t"
+ "jecxz .L1\n\t"
+ "decl %%eax\n\t"
+ "andl %%ebx,%%eax\n\t"
+ "leal 1088(%%esi),%%esi\n\t"
+ "movl %%eax,(%%edx)\n\t"
+".L1:\n\t"
+ "leal (%%esi,%%eax,2),%%edx\n\t"
+ "movl %%eax,%%ebp\n\t"
+ "incl %%eax\n\t"
+ "pushl %0\n\t"
+ "andl %%ebx,%%eax\n\t"
+ "leal 544(%%esi,%%eax,2),%%ecx\n\t"
+ "incl %%ebx\n\t"
+ "testl $1, %%eax\n\t"
+ "jnz .L2\n\t"
+ "xchgl %%edx,%%ecx\n\t"
+ "incl %%ebp\n\t"
+ "leal 544(%%esi),%%esi\n\t"
+".L2:\n\t"
+ "emms\n\t"
+ "pushl %%edx\n\t"
+ "pushl %%ecx\n\t"
+ "call *dct64_MMX_func\n\t"
+ "leal 1(%%ebx), %%ecx\n\t"
+ "subl %%ebp,%%ebx\n\t"
+ "pushl %%ecx\n\t"
+ "leal decwins(%%ebx,%%ebx,1), %%edx\n\t"
+ "shrl $1, %%ecx\n\t"
+".align 16\n\t"
+".L3:\n\t"
+ "movq (%%edx),%%mm0\n\t"
+ "movq 64(%%edx),%%mm4\n\t"
+ "pmaddwd (%%esi),%%mm0\n\t"
+ "pmaddwd 32(%%esi),%%mm4\n\t"
+ "movq 8(%%edx),%%mm1\n\t"
+ "movq 72(%%edx),%%mm5\n\t"
+ "pmaddwd 8(%%esi),%%mm1\n\t"
+ "pmaddwd 40(%%esi),%%mm5\n\t"
+ "movq 16(%%edx),%%mm2\n\t"
+ "movq 80(%%edx),%%mm6\n\t"
+ "pmaddwd 16(%%esi),%%mm2\n\t"
+ "pmaddwd 48(%%esi),%%mm6\n\t"
+ "movq 24(%%edx),%%mm3\n\t"
+ "movq 88(%%edx),%%mm7\n\t"
+ "pmaddwd 24(%%esi),%%mm3\n\t"
+ "pmaddwd 56(%%esi),%%mm7\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "paddd %%mm5,%%mm4\n\t"
+ "paddd %%mm2,%%mm0\n\t"
+ "paddd %%mm6,%%mm4\n\t"
+ "paddd %%mm3,%%mm0\n\t"
+ "paddd %%mm7,%%mm4\n\t"
+ "movq %%mm0,%%mm1\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "paddd %%mm5,%%mm4\n\t"
+ "psrad $13,%%mm0\n\t"
+ "psrad $13,%%mm4\n\t"
+ "packssdw %%mm0,%%mm0\n\t"
+ "packssdw %%mm4,%%mm4\n\t"
+
+ "movq (%%edi), %%mm1\n\t"
+ "punpckldq %%mm4, %%mm0\n\t"
+ "pand one_null, %%mm1\n\t"
+ "pand null_one, %%mm0\n\t"
+ "por %%mm0, %%mm1\n\t"
+ "movq %%mm1,(%%edi)\n\t"
+
+ "leal 64(%%esi),%%esi\n\t"
+ "leal 128(%%edx),%%edx\n\t"
+ "leal 8(%%edi),%%edi\n\t"
+
+ "decl %%ecx\n\t"
+ "jnz .L3\n\t"
+
+ "popl %%ecx\n\t"
+ "andl $1, %%ecx\n\t"
+ "jecxz .next_loop\n\t"
+
+ "movq (%%edx),%%mm0\n\t"
+ "pmaddwd (%%esi),%%mm0\n\t"
+ "movq 8(%%edx),%%mm1\n\t"
+ "pmaddwd 8(%%esi),%%mm1\n\t"
+ "movq 16(%%edx),%%mm2\n\t"
+ "pmaddwd 16(%%esi),%%mm2\n\t"
+ "movq 24(%%edx),%%mm3\n\t"
+ "pmaddwd 24(%%esi),%%mm3\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "paddd %%mm2,%%mm0\n\t"
+ "paddd %%mm3,%%mm0\n\t"
+ "movq %%mm0,%%mm1\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "psrad $13,%%mm0\n\t"
+ "packssdw %%mm0,%%mm0\n\t"
+ "movd %%mm0,%%eax\n\t"
+ "movw %%ax, (%%edi)\n\t"
+ "leal 32(%%esi),%%esi\n\t"
+ "leal 64(%%edx),%%edx\n\t"
+ "leal 4(%%edi),%%edi\n\t"
+
+".next_loop:\n\t"
+ "subl $64,%%esi\n\t"
+ "movl $7,%%ecx\n\t"
+".align 16\n\t"
+".L4:\n\t"
+ "movq (%%edx),%%mm0\n\t"
+ "movq 64(%%edx),%%mm4\n\t"
+ "pmaddwd (%%esi),%%mm0\n\t"
+ "pmaddwd -32(%%esi),%%mm4\n\t"
+ "movq 8(%%edx),%%mm1\n\t"
+ "movq 72(%%edx),%%mm5\n\t"
+ "pmaddwd 8(%%esi),%%mm1\n\t"
+ "pmaddwd -24(%%esi),%%mm5\n\t"
+ "movq 16(%%edx),%%mm2\n\t"
+ "movq 80(%%edx),%%mm6\n\t"
+ "pmaddwd 16(%%esi),%%mm2\n\t"
+ "pmaddwd -16(%%esi),%%mm6\n\t"
+ "movq 24(%%edx),%%mm3\n\t"
+ "movq 88(%%edx),%%mm7\n\t"
+ "pmaddwd 24(%%esi),%%mm3\n\t"
+ "pmaddwd -8(%%esi),%%mm7\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "paddd %%mm5,%%mm4\n\t"
+ "paddd %%mm2,%%mm0\n\t"
+ "paddd %%mm6,%%mm4\n\t"
+ "paddd %%mm3,%%mm0\n\t"
+ "paddd %%mm7,%%mm4\n\t"
+ "movq %%mm0,%%mm1\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "paddd %%mm0,%%mm1\n\t"
+ "paddd %%mm4,%%mm5\n\t"
+ "psrad $13,%%mm1\n\t"
+ "psrad $13,%%mm5\n\t"
+ "packssdw %%mm1,%%mm1\n\t"
+ "packssdw %%mm5,%%mm5\n\t"
+ "psubd %%mm0,%%mm0\n\t"
+ "psubd %%mm4,%%mm4\n\t"
+ "psubsw %%mm1,%%mm0\n\t"
+ "psubsw %%mm5,%%mm4\n\t"
+
+ "movq (%%edi), %%mm1\n\t"
+ "punpckldq %%mm4, %%mm0\n\t"
+ "pand one_null, %%mm1\n\t"
+ "pand null_one, %%mm0\n\t"
+ "por %%mm0, %%mm1\n\t"
+ "movq %%mm1,(%%edi)\n\t"
+
+ "subl $64,%%esi\n\t"
+ "addl $128,%%edx\n\t"
+ "leal 8(%%edi),%%edi\n\t"
+ "decl %%ecx\n\t"
+ "jnz .L4\n\t"
+
+ "movq (%%edx),%%mm0\n\t"
+ "pmaddwd (%%esi),%%mm0\n\t"
+ "movq 8(%%edx),%%mm1\n\t"
+ "pmaddwd 8(%%esi),%%mm1\n\t"
+ "movq 16(%%edx),%%mm2\n\t"
+ "pmaddwd 16(%%esi),%%mm2\n\t"
+ "movq 24(%%edx),%%mm3\n\t"
+ "pmaddwd 24(%%esi),%%mm3\n\t"
+ "paddd %%mm1,%%mm0\n\t"
+ "paddd %%mm2,%%mm0\n\t"
+ "paddd %%mm3,%%mm0\n\t"
+ "movq %%mm0,%%mm1\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "paddd %%mm0,%%mm1\n\t"
+ "psrad $13,%%mm1\n\t"
+ "packssdw %%mm1,%%mm1\n\t"
+ "psubd %%mm0,%%mm0\n\t"
+ "psubsw %%mm1,%%mm0\n\t"
+ "movd %%mm0,%%eax\n\t"
+ "movw %%ax,(%%edi)\n\t"
+ "emms\n\t"
+ :
+ :"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo)
+ :"memory","%ebp","%edi","%esi","%ebx");
+}
diff --git a/mp3lib/decode_MMX.s b/mp3lib/decode_MMX.s
deleted file mode 100644
index cd770f3231..0000000000
--- a/mp3lib/decode_MMX.s
+++ /dev/null
@@ -1,252 +0,0 @@
-# this code comes under GPL
-# This code was taken from http://www.mpg123.org
-# See ChangeLog of mpg123-0.59s-pre.1 for detail
-# Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
-#
-# Local ChangeLog:
-# - Partial loops unrolling and removing MOVW insn from loops
-#
-
-.data
-.align 8
-null_one: .long 0x0000ffff, 0x0000ffff
-one_null: .long 0xffff0000, 0xffff0000
-.globl costab_mmx
-costab_mmx:
- .long 1056974725
- .long 1057056395
- .long 1057223771
- .long 1057485416
- .long 1057855544
- .long 1058356026
- .long 1059019886
- .long 1059897405
- .long 1061067246
- .long 1062657950
- .long 1064892987
- .long 1066774581
- .long 1069414683
- .long 1073984175
- .long 1079645762
- .long 1092815430
- .long 1057005197
- .long 1057342072
- .long 1058087743
- .long 1059427869
- .long 1061799040
- .long 1065862217
- .long 1071413542
- .long 1084439708
- .long 1057128951
- .long 1058664893
- .long 1063675095
- .long 1076102863
- .long 1057655764
- .long 1067924853
- .long 1060439283
-
-.text
-
-.globl synth_1to1_MMX_s
-//
-// void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples,
-// short *buffs, int *bo);
-//
-synth_1to1_MMX_s:
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %ebx
- movl 24(%esp),%ecx
- movl 28(%esp),%edi
- movl $15,%ebx
- movl 36(%esp),%edx
- leal (%edi,%ecx,2),%edi
- decl %ecx
- movl 32(%esp),%esi
- movl (%edx),%eax
- jecxz .L1
- decl %eax
- andl %ebx,%eax
- leal 1088(%esi),%esi
- movl %eax,(%edx)
-.L1:
- leal (%esi,%eax,2),%edx
- movl %eax,%ebp
- incl %eax
- pushl 20(%esp)
- andl %ebx,%eax
- leal 544(%esi,%eax,2),%ecx
- incl %ebx
- testl $1, %eax
- jnz .L2
- xchgl %edx,%ecx
- incl %ebp
- leal 544(%esi),%esi
-.L2:
- emms
- pushl %edx
- pushl %ecx
- call *dct64_MMX_func
- leal 1(%ebx), %ecx
- subl %ebp,%ebx
- pushl %ecx
- leal decwins(%ebx,%ebx,1), %edx
- shrl $1, %ecx
-.align 16
-.L3:
- movq (%edx),%mm0
- movq 64(%edx),%mm4
- pmaddwd (%esi),%mm0
- pmaddwd 32(%esi),%mm4
- movq 8(%edx),%mm1
- movq 72(%edx),%mm5
- pmaddwd 8(%esi),%mm1
- pmaddwd 40(%esi),%mm5
- movq 16(%edx),%mm2
- movq 80(%edx),%mm6
- pmaddwd 16(%esi),%mm2
- pmaddwd 48(%esi),%mm6
- movq 24(%edx),%mm3
- movq 88(%edx),%mm7
- pmaddwd 24(%esi),%mm3
- pmaddwd 56(%esi),%mm7
- paddd %mm1,%mm0
- paddd %mm5,%mm4
- paddd %mm2,%mm0
- paddd %mm6,%mm4
- paddd %mm3,%mm0
- paddd %mm7,%mm4
- movq %mm0,%mm1
- movq %mm4,%mm5
- psrlq $32,%mm1
- psrlq $32,%mm5
- paddd %mm1,%mm0
- paddd %mm5,%mm4
- psrad $13,%mm0
- psrad $13,%mm4
- packssdw %mm0,%mm0
- packssdw %mm4,%mm4
-
- movq (%edi), %mm1
- punpckldq %mm4, %mm0
- pand one_null, %mm1
- pand null_one, %mm0
- por %mm0, %mm1
- movq %mm1,(%edi)
-
- leal 64(%esi),%esi
- leal 128(%edx),%edx
- leal 8(%edi),%edi
-
- decl %ecx
- jnz .L3
-
- popl %ecx
- andl $1, %ecx
- jecxz .next_loop
-
- movq (%edx),%mm0
- pmaddwd (%esi),%mm0
- movq 8(%edx),%mm1
- pmaddwd 8(%esi),%mm1
- movq 16(%edx),%mm2
- pmaddwd 16(%esi),%mm2
- movq 24(%edx),%mm3
- pmaddwd 24(%esi),%mm3
- paddd %mm1,%mm0
- paddd %mm2,%mm0
- paddd %mm3,%mm0
- movq %mm0,%mm1
- psrlq $32,%mm1
- paddd %mm1,%mm0
- psrad $13,%mm0
- packssdw %mm0,%mm0
- movd %mm0,%eax
- movw %ax, (%edi)
- leal 32(%esi),%esi
- leal 64(%edx),%edx
- leal 4(%edi),%edi
-
-.next_loop:
- subl $64,%esi
- movl $7,%ecx
-.align 16
-.L4:
- movq (%edx),%mm0
- movq 64(%edx),%mm4
- pmaddwd (%esi),%mm0
- pmaddwd -32(%esi),%mm4
- movq 8(%edx),%mm1
- movq 72(%edx),%mm5
- pmaddwd 8(%esi),%mm1
- pmaddwd -24(%esi),%mm5
- movq 16(%edx),%mm2
- movq 80(%edx),%mm6
- pmaddwd 16(%esi),%mm2
- pmaddwd -16(%esi),%mm6
- movq 24(%edx),%mm3
- movq 88(%edx),%mm7
- pmaddwd 24(%esi),%mm3
- pmaddwd -8(%esi),%mm7
- paddd %mm1,%mm0
- paddd %mm5,%mm4
- paddd %mm2,%mm0
- paddd %mm6,%mm4
- paddd %mm3,%mm0
- paddd %mm7,%mm4
- movq %mm0,%mm1
- movq %mm4,%mm5
- psrlq $32,%mm1
- psrlq $32,%mm5
- paddd %mm0,%mm1
- paddd %mm4,%mm5
- psrad $13,%mm1
- psrad $13,%mm5
- packssdw %mm1,%mm1
- packssdw %mm5,%mm5
- psubd %mm0,%mm0
- psubd %mm4,%mm4
- psubsw %mm1,%mm0
- psubsw %mm5,%mm4
-
- movq (%edi), %mm1
- punpckldq %mm4, %mm0
- pand one_null, %mm1
- pand null_one, %mm0
- por %mm0, %mm1
- movq %mm1,(%edi)
-
- subl $64,%esi
- addl $128,%edx
- leal 8(%edi),%edi
- decl %ecx
- jnz .L4
-
- movq (%edx),%mm0
- pmaddwd (%esi),%mm0
- movq 8(%edx),%mm1
- pmaddwd 8(%esi),%mm1
- movq 16(%edx),%mm2
- pmaddwd 16(%esi),%mm2
- movq 24(%edx),%mm3
- pmaddwd 24(%esi),%mm3
- paddd %mm1,%mm0
- paddd %mm2,%mm0
- paddd %mm3,%mm0
- movq %mm0,%mm1
- psrlq $32,%mm1
- paddd %mm0,%mm1
- psrad $13,%mm1
- packssdw %mm1,%mm1
- psubd %mm0,%mm0
- psubsw %mm1,%mm0
- movd %mm0,%eax
- movw %ax,(%edi)
-
- emms
- popl %ebx
- popl %esi
- popl %edi
- popl %ebp
- ret