From fc77a62a658d6ec49683b1cccc933e406cab7bd0 Mon Sep 17 00:00:00 2001 From: nick Date: Sun, 13 Jan 2002 18:26:58 +0000 Subject: S->C git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@4143 b3059339-0415-0410-9bf9-f77b7e298cf2 --- mp3lib/Makefile | 2 +- mp3lib/decode_MMX.c | 244 ++++++++++++++++++++++++++++++++++++++++++++++++++ mp3lib/decode_MMX.s | 252 ---------------------------------------------------- 3 files changed, 245 insertions(+), 253 deletions(-) create mode 100644 mp3lib/decode_MMX.c delete mode 100644 mp3lib/decode_MMX.s (limited to 'mp3lib') diff --git a/mp3lib/Makefile b/mp3lib/Makefile index 49c18c3a5f..eb526f3d15 100644 --- a/mp3lib/Makefile +++ b/mp3lib/Makefile @@ -12,7 +12,7 @@ ifeq ($(TARGET_ARCH_X86),yes) SRCS += d_cpu.s decode_i586.s OBJS += d_cpu.o decode_i586.o ifeq ($(TARGET_MMX),yes) -SRCS += decode_MMX.s dct64_MMX.s tabinit_MMX.c +SRCS += decode_MMX.c dct64_MMX.s tabinit_MMX.c OBJS += decode_MMX.o dct64_MMX.o tabinit_MMX.o endif #ifeq ($(TARGET_SSE),yes) diff --git a/mp3lib/decode_MMX.c b/mp3lib/decode_MMX.c new file mode 100644 index 0000000000..97ac7f34d9 --- /dev/null +++ b/mp3lib/decode_MMX.c @@ -0,0 +1,244 @@ +/* + * this code comes under GPL + * This code was taken from http://www.mpg123.org + * See ChangeLog of mpg123-0.59s-pre.1 for detail + * Applied to mplayer by Nick Kurshev + * + * Local ChangeLog: + * - Partial loops unrolling and removing MOVW insn from loops +*/ +#define real float /* ugly - but only way */ + +static unsigned long long __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL; +static unsigned long long __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL; +unsigned long __attribute__((aligned(8))) costab_mmx[] = +{ + 1056974725, + 1057056395, + 1057223771, + 1057485416, + 1057855544, + 1058356026, + 1059019886, + 1059897405, + 1061067246, + 1062657950, + 1064892987, + 1066774581, + 1069414683, + 1073984175, + 1079645762, + 1092815430, + 1057005197, + 1057342072, + 1058087743, + 1059427869, + 1061799040, + 1065862217, + 1071413542, + 1084439708, + 1057128951, + 1058664893, + 1063675095, + 1076102863, + 1057655764, + 1067924853, + 1060439283, +}; + +void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples, + short *buffs, int *bo) +{ + +__asm __volatile( + "movl %1,%%ecx\n\t" + "movl %2,%%edi\n\t" + "movl $15,%%ebx\n\t" + "movl %4,%%edx\n\t" + "leal (%%edi,%%ecx,2),%%edi\n\t" + "decl %%ecx\n\t" + "movl %3,%%esi\n\t" + "movl (%%edx),%%eax\n\t" + "jecxz .L1\n\t" + "decl %%eax\n\t" + "andl %%ebx,%%eax\n\t" + "leal 1088(%%esi),%%esi\n\t" + "movl %%eax,(%%edx)\n\t" +".L1:\n\t" + "leal (%%esi,%%eax,2),%%edx\n\t" + "movl %%eax,%%ebp\n\t" + "incl %%eax\n\t" + "pushl %0\n\t" + "andl %%ebx,%%eax\n\t" + "leal 544(%%esi,%%eax,2),%%ecx\n\t" + "incl %%ebx\n\t" + "testl $1, %%eax\n\t" + "jnz .L2\n\t" + "xchgl %%edx,%%ecx\n\t" + "incl %%ebp\n\t" + "leal 544(%%esi),%%esi\n\t" +".L2:\n\t" + "emms\n\t" + "pushl %%edx\n\t" + "pushl %%ecx\n\t" + "call *dct64_MMX_func\n\t" + "leal 1(%%ebx), %%ecx\n\t" + "subl %%ebp,%%ebx\n\t" + "pushl %%ecx\n\t" + "leal decwins(%%ebx,%%ebx,1), %%edx\n\t" + "shrl $1, %%ecx\n\t" +".align 16\n\t" +".L3:\n\t" + "movq (%%edx),%%mm0\n\t" + "movq 64(%%edx),%%mm4\n\t" + "pmaddwd (%%esi),%%mm0\n\t" + "pmaddwd 32(%%esi),%%mm4\n\t" + "movq 8(%%edx),%%mm1\n\t" + "movq 72(%%edx),%%mm5\n\t" + "pmaddwd 8(%%esi),%%mm1\n\t" + "pmaddwd 40(%%esi),%%mm5\n\t" + "movq 16(%%edx),%%mm2\n\t" + "movq 80(%%edx),%%mm6\n\t" + "pmaddwd 16(%%esi),%%mm2\n\t" + "pmaddwd 48(%%esi),%%mm6\n\t" + "movq 24(%%edx),%%mm3\n\t" + "movq 88(%%edx),%%mm7\n\t" + "pmaddwd 24(%%esi),%%mm3\n\t" + "pmaddwd 56(%%esi),%%mm7\n\t" + "paddd %%mm1,%%mm0\n\t" + "paddd %%mm5,%%mm4\n\t" + "paddd %%mm2,%%mm0\n\t" + "paddd %%mm6,%%mm4\n\t" + "paddd %%mm3,%%mm0\n\t" + "paddd %%mm7,%%mm4\n\t" + "movq %%mm0,%%mm1\n\t" + "movq %%mm4,%%mm5\n\t" + "psrlq $32,%%mm1\n\t" + "psrlq $32,%%mm5\n\t" + "paddd %%mm1,%%mm0\n\t" + "paddd %%mm5,%%mm4\n\t" + "psrad $13,%%mm0\n\t" + "psrad $13,%%mm4\n\t" + "packssdw %%mm0,%%mm0\n\t" + "packssdw %%mm4,%%mm4\n\t" + + "movq (%%edi), %%mm1\n\t" + "punpckldq %%mm4, %%mm0\n\t" + "pand one_null, %%mm1\n\t" + "pand null_one, %%mm0\n\t" + "por %%mm0, %%mm1\n\t" + "movq %%mm1,(%%edi)\n\t" + + "leal 64(%%esi),%%esi\n\t" + "leal 128(%%edx),%%edx\n\t" + "leal 8(%%edi),%%edi\n\t" + + "decl %%ecx\n\t" + "jnz .L3\n\t" + + "popl %%ecx\n\t" + "andl $1, %%ecx\n\t" + "jecxz .next_loop\n\t" + + "movq (%%edx),%%mm0\n\t" + "pmaddwd (%%esi),%%mm0\n\t" + "movq 8(%%edx),%%mm1\n\t" + "pmaddwd 8(%%esi),%%mm1\n\t" + "movq 16(%%edx),%%mm2\n\t" + "pmaddwd 16(%%esi),%%mm2\n\t" + "movq 24(%%edx),%%mm3\n\t" + "pmaddwd 24(%%esi),%%mm3\n\t" + "paddd %%mm1,%%mm0\n\t" + "paddd %%mm2,%%mm0\n\t" + "paddd %%mm3,%%mm0\n\t" + "movq %%mm0,%%mm1\n\t" + "psrlq $32,%%mm1\n\t" + "paddd %%mm1,%%mm0\n\t" + "psrad $13,%%mm0\n\t" + "packssdw %%mm0,%%mm0\n\t" + "movd %%mm0,%%eax\n\t" + "movw %%ax, (%%edi)\n\t" + "leal 32(%%esi),%%esi\n\t" + "leal 64(%%edx),%%edx\n\t" + "leal 4(%%edi),%%edi\n\t" + +".next_loop:\n\t" + "subl $64,%%esi\n\t" + "movl $7,%%ecx\n\t" +".align 16\n\t" +".L4:\n\t" + "movq (%%edx),%%mm0\n\t" + "movq 64(%%edx),%%mm4\n\t" + "pmaddwd (%%esi),%%mm0\n\t" + "pmaddwd -32(%%esi),%%mm4\n\t" + "movq 8(%%edx),%%mm1\n\t" + "movq 72(%%edx),%%mm5\n\t" + "pmaddwd 8(%%esi),%%mm1\n\t" + "pmaddwd -24(%%esi),%%mm5\n\t" + "movq 16(%%edx),%%mm2\n\t" + "movq 80(%%edx),%%mm6\n\t" + "pmaddwd 16(%%esi),%%mm2\n\t" + "pmaddwd -16(%%esi),%%mm6\n\t" + "movq 24(%%edx),%%mm3\n\t" + "movq 88(%%edx),%%mm7\n\t" + "pmaddwd 24(%%esi),%%mm3\n\t" + "pmaddwd -8(%%esi),%%mm7\n\t" + "paddd %%mm1,%%mm0\n\t" + "paddd %%mm5,%%mm4\n\t" + "paddd %%mm2,%%mm0\n\t" + "paddd %%mm6,%%mm4\n\t" + "paddd %%mm3,%%mm0\n\t" + "paddd %%mm7,%%mm4\n\t" + "movq %%mm0,%%mm1\n\t" + "movq %%mm4,%%mm5\n\t" + "psrlq $32,%%mm1\n\t" + "psrlq $32,%%mm5\n\t" + "paddd %%mm0,%%mm1\n\t" + "paddd %%mm4,%%mm5\n\t" + "psrad $13,%%mm1\n\t" + "psrad $13,%%mm5\n\t" + "packssdw %%mm1,%%mm1\n\t" + "packssdw %%mm5,%%mm5\n\t" + "psubd %%mm0,%%mm0\n\t" + "psubd %%mm4,%%mm4\n\t" + "psubsw %%mm1,%%mm0\n\t" + "psubsw %%mm5,%%mm4\n\t" + + "movq (%%edi), %%mm1\n\t" + "punpckldq %%mm4, %%mm0\n\t" + "pand one_null, %%mm1\n\t" + "pand null_one, %%mm0\n\t" + "por %%mm0, %%mm1\n\t" + "movq %%mm1,(%%edi)\n\t" + + "subl $64,%%esi\n\t" + "addl $128,%%edx\n\t" + "leal 8(%%edi),%%edi\n\t" + "decl %%ecx\n\t" + "jnz .L4\n\t" + + "movq (%%edx),%%mm0\n\t" + "pmaddwd (%%esi),%%mm0\n\t" + "movq 8(%%edx),%%mm1\n\t" + "pmaddwd 8(%%esi),%%mm1\n\t" + "movq 16(%%edx),%%mm2\n\t" + "pmaddwd 16(%%esi),%%mm2\n\t" + "movq 24(%%edx),%%mm3\n\t" + "pmaddwd 24(%%esi),%%mm3\n\t" + "paddd %%mm1,%%mm0\n\t" + "paddd %%mm2,%%mm0\n\t" + "paddd %%mm3,%%mm0\n\t" + "movq %%mm0,%%mm1\n\t" + "psrlq $32,%%mm1\n\t" + "paddd %%mm0,%%mm1\n\t" + "psrad $13,%%mm1\n\t" + "packssdw %%mm1,%%mm1\n\t" + "psubd %%mm0,%%mm0\n\t" + "psubsw %%mm1,%%mm0\n\t" + "movd %%mm0,%%eax\n\t" + "movw %%ax,(%%edi)\n\t" + "emms\n\t" + : + :"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo) + :"memory","%ebp","%edi","%esi","%ebx"); +} diff --git a/mp3lib/decode_MMX.s b/mp3lib/decode_MMX.s deleted file mode 100644 index cd770f3231..0000000000 --- a/mp3lib/decode_MMX.s +++ /dev/null @@ -1,252 +0,0 @@ -# this code comes under GPL -# This code was taken from http://www.mpg123.org -# See ChangeLog of mpg123-0.59s-pre.1 for detail -# Applied to mplayer by Nick Kurshev -# -# Local ChangeLog: -# - Partial loops unrolling and removing MOVW insn from loops -# - -.data -.align 8 -null_one: .long 0x0000ffff, 0x0000ffff -one_null: .long 0xffff0000, 0xffff0000 -.globl costab_mmx -costab_mmx: - .long 1056974725 - .long 1057056395 - .long 1057223771 - .long 1057485416 - .long 1057855544 - .long 1058356026 - .long 1059019886 - .long 1059897405 - .long 1061067246 - .long 1062657950 - .long 1064892987 - .long 1066774581 - .long 1069414683 - .long 1073984175 - .long 1079645762 - .long 1092815430 - .long 1057005197 - .long 1057342072 - .long 1058087743 - .long 1059427869 - .long 1061799040 - .long 1065862217 - .long 1071413542 - .long 1084439708 - .long 1057128951 - .long 1058664893 - .long 1063675095 - .long 1076102863 - .long 1057655764 - .long 1067924853 - .long 1060439283 - -.text - -.globl synth_1to1_MMX_s -// -// void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples, -// short *buffs, int *bo); -// -synth_1to1_MMX_s: - pushl %ebp - pushl %edi - pushl %esi - pushl %ebx - movl 24(%esp),%ecx - movl 28(%esp),%edi - movl $15,%ebx - movl 36(%esp),%edx - leal (%edi,%ecx,2),%edi - decl %ecx - movl 32(%esp),%esi - movl (%edx),%eax - jecxz .L1 - decl %eax - andl %ebx,%eax - leal 1088(%esi),%esi - movl %eax,(%edx) -.L1: - leal (%esi,%eax,2),%edx - movl %eax,%ebp - incl %eax - pushl 20(%esp) - andl %ebx,%eax - leal 544(%esi,%eax,2),%ecx - incl %ebx - testl $1, %eax - jnz .L2 - xchgl %edx,%ecx - incl %ebp - leal 544(%esi),%esi -.L2: - emms - pushl %edx - pushl %ecx - call *dct64_MMX_func - leal 1(%ebx), %ecx - subl %ebp,%ebx - pushl %ecx - leal decwins(%ebx,%ebx,1), %edx - shrl $1, %ecx -.align 16 -.L3: - movq (%edx),%mm0 - movq 64(%edx),%mm4 - pmaddwd (%esi),%mm0 - pmaddwd 32(%esi),%mm4 - movq 8(%edx),%mm1 - movq 72(%edx),%mm5 - pmaddwd 8(%esi),%mm1 - pmaddwd 40(%esi),%mm5 - movq 16(%edx),%mm2 - movq 80(%edx),%mm6 - pmaddwd 16(%esi),%mm2 - pmaddwd 48(%esi),%mm6 - movq 24(%edx),%mm3 - movq 88(%edx),%mm7 - pmaddwd 24(%esi),%mm3 - pmaddwd 56(%esi),%mm7 - paddd %mm1,%mm0 - paddd %mm5,%mm4 - paddd %mm2,%mm0 - paddd %mm6,%mm4 - paddd %mm3,%mm0 - paddd %mm7,%mm4 - movq %mm0,%mm1 - movq %mm4,%mm5 - psrlq $32,%mm1 - psrlq $32,%mm5 - paddd %mm1,%mm0 - paddd %mm5,%mm4 - psrad $13,%mm0 - psrad $13,%mm4 - packssdw %mm0,%mm0 - packssdw %mm4,%mm4 - - movq (%edi), %mm1 - punpckldq %mm4, %mm0 - pand one_null, %mm1 - pand null_one, %mm0 - por %mm0, %mm1 - movq %mm1,(%edi) - - leal 64(%esi),%esi - leal 128(%edx),%edx - leal 8(%edi),%edi - - decl %ecx - jnz .L3 - - popl %ecx - andl $1, %ecx - jecxz .next_loop - - movq (%edx),%mm0 - pmaddwd (%esi),%mm0 - movq 8(%edx),%mm1 - pmaddwd 8(%esi),%mm1 - movq 16(%edx),%mm2 - pmaddwd 16(%esi),%mm2 - movq 24(%edx),%mm3 - pmaddwd 24(%esi),%mm3 - paddd %mm1,%mm0 - paddd %mm2,%mm0 - paddd %mm3,%mm0 - movq %mm0,%mm1 - psrlq $32,%mm1 - paddd %mm1,%mm0 - psrad $13,%mm0 - packssdw %mm0,%mm0 - movd %mm0,%eax - movw %ax, (%edi) - leal 32(%esi),%esi - leal 64(%edx),%edx - leal 4(%edi),%edi - -.next_loop: - subl $64,%esi - movl $7,%ecx -.align 16 -.L4: - movq (%edx),%mm0 - movq 64(%edx),%mm4 - pmaddwd (%esi),%mm0 - pmaddwd -32(%esi),%mm4 - movq 8(%edx),%mm1 - movq 72(%edx),%mm5 - pmaddwd 8(%esi),%mm1 - pmaddwd -24(%esi),%mm5 - movq 16(%edx),%mm2 - movq 80(%edx),%mm6 - pmaddwd 16(%esi),%mm2 - pmaddwd -16(%esi),%mm6 - movq 24(%edx),%mm3 - movq 88(%edx),%mm7 - pmaddwd 24(%esi),%mm3 - pmaddwd -8(%esi),%mm7 - paddd %mm1,%mm0 - paddd %mm5,%mm4 - paddd %mm2,%mm0 - paddd %mm6,%mm4 - paddd %mm3,%mm0 - paddd %mm7,%mm4 - movq %mm0,%mm1 - movq %mm4,%mm5 - psrlq $32,%mm1 - psrlq $32,%mm5 - paddd %mm0,%mm1 - paddd %mm4,%mm5 - psrad $13,%mm1 - psrad $13,%mm5 - packssdw %mm1,%mm1 - packssdw %mm5,%mm5 - psubd %mm0,%mm0 - psubd %mm4,%mm4 - psubsw %mm1,%mm0 - psubsw %mm5,%mm4 - - movq (%edi), %mm1 - punpckldq %mm4, %mm0 - pand one_null, %mm1 - pand null_one, %mm0 - por %mm0, %mm1 - movq %mm1,(%edi) - - subl $64,%esi - addl $128,%edx - leal 8(%edi),%edi - decl %ecx - jnz .L4 - - movq (%edx),%mm0 - pmaddwd (%esi),%mm0 - movq 8(%edx),%mm1 - pmaddwd 8(%esi),%mm1 - movq 16(%edx),%mm2 - pmaddwd 16(%esi),%mm2 - movq 24(%edx),%mm3 - pmaddwd 24(%esi),%mm3 - paddd %mm1,%mm0 - paddd %mm2,%mm0 - paddd %mm3,%mm0 - movq %mm0,%mm1 - psrlq $32,%mm1 - paddd %mm0,%mm1 - psrad $13,%mm1 - packssdw %mm1,%mm1 - psubd %mm0,%mm0 - psubsw %mm1,%mm0 - movd %mm0,%eax - movw %ax,(%edi) - - emms - popl %ebx - popl %esi - popl %edi - popl %ebp - ret -- cgit v1.2.3