summaryrefslogtreecommitdiffstats
path: root/mp3lib/decode_i586.c
diff options
context:
space:
mode:
authornick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2>2002-01-14 09:32:51 +0000
committernick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2>2002-01-14 09:32:51 +0000
commit2da69665f6d72867a7507510ef87b1268e9752e9 (patch)
tree60eddd579ba73f1a47916901e217098d5d102255 /mp3lib/decode_i586.c
parent8629e9e819cfce49406f55c2655bd1ad6f72d0df (diff)
downloadmpv-2da69665f6d72867a7507510ef87b1268e9752e9.tar.bz2
mpv-2da69665f6d72867a7507510ef87b1268e9752e9.tar.xz
S->C
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@4148 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'mp3lib/decode_i586.c')
-rw-r--r--mp3lib/decode_i586.c307
1 files changed, 307 insertions, 0 deletions
diff --git a/mp3lib/decode_i586.c b/mp3lib/decode_i586.c
new file mode 100644
index 0000000000..d0ec6e758c
--- /dev/null
+++ b/mp3lib/decode_i586.c
@@ -0,0 +1,307 @@
+/*
+* mpg123_synth_1to1 works the same way as the c version of this
+* file. only two types of changes have been made:
+* - reordered floating point instructions to
+* prevent pipline stalls
+* - made WRITE_SAMPLE use integer instead of
+* (slower) floating point
+* all kinds of x86 processors should benefit from these
+* modifications.
+*
+* useful sources of information on optimizing x86 code include:
+*
+* Intel Architecture Optimization Manual
+* http://www.intel.com/design/pentium/manuals/242816.htm
+*
+* Cyrix 6x86 Instruction Set Summary
+* ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
+*
+* AMD-K5 Processor Software Development
+* http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
+*
+* Stefan Bieschewski <stb@acm.org>
+*
+* $Id$
+*/
+#define real float /* ugly - but only way */
+
+static long buffs[1088];
+static long bo=1;
+
+int synth_1to1_pent(real *bandPtr, int channel, short *samples)
+{
+ real tmp[3];
+ register int retval;
+ __asm __volatile(
+" movl %1,%%eax\n\t"/*bandPtr*/
+" movl %3,%%esi\n\t"
+" xorl %%edi,%%edi\n\t"
+" movl bo,%%ebp\n\t"
+" cmpl %%edi,%2\n\t"
+" jne .L48\n\t"
+" decl %%ebp\n\t"
+" andl $15,%%ebp\n\t"
+" movl %%ebp,bo\n\t"
+" movl $buffs,%%ecx\n\t"
+" jmp .L49\n\t"
+".L48:\n\t"
+" addl $2,%%esi\n\t"
+" movl $buffs+2176,%%ecx\n\t"
+".L49:\n\t"
+" testl $1,%%ebp\n\t"
+" je .L50\n\t"
+" movl %%ecx,%%ebx\n\t"
+" movl %%ebp,%4\n\t"
+" pushl %%eax\n\t"
+" movl 4+%4,%%edx\n\t"
+" leal (%%ebx,%%edx,4),%%eax\n\t"
+" pushl %%eax\n\t"
+" movl 8+%4,%%eax\n\t"
+" incl %%eax\n\t"
+" andl $15,%%eax\n\t"
+" leal 1088(,%%eax,4),%%eax\n\t"
+" addl %%ebx,%%eax\n\t"
+" jmp .L74\n\t"
+".L50:\n\t"
+" leal 1088(%%ecx),%%ebx\n\t"
+" leal 1(%%ebp),%%edx\n\t"
+" movl %%edx,%4\n\t"
+" pushl %%eax\n\t"
+" leal 1092(%%ecx,%%ebp,4),%%eax\n\t"
+" pushl %%eax\n\t"
+" leal (%%ecx,%%ebp,4),%%eax\n\t"
+".L74:\n\t"
+" pushl %%eax\n\t"
+" call dct64\n\t"
+" addl $12,%%esp\n\t"
+" movl %4,%%edx\n\t"
+" leal 0(,%%edx,4),%%edx\n\t"
+" movl $decwin+64,%%eax\n\t"
+" movl %%eax,%%ecx\n\t"
+" subl %%edx,%%ecx\n\t"
+" movl $16,%%ebp\n\t"
+".L55:\n\t"
+" flds (%%ecx)\n\t"
+" fmuls (%%ebx)\n\t"
+" flds 4(%%ecx)\n\t"
+" fmuls 4(%%ebx)\n\t"
+" fxch %%st(1)\n\t"
+" flds 8(%%ecx)\n\t"
+" fmuls 8(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 12(%%ecx)\n\t"
+" fmuls 12(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 16(%%ecx)\n\t"
+" fmuls 16(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 20(%%ecx)\n\t"
+" fmuls 20(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 24(%%ecx)\n\t"
+" fmuls 24(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 28(%%ecx)\n\t"
+" fmuls 28(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 32(%%ecx)\n\t"
+" fmuls 32(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 36(%%ecx)\n\t"
+" fmuls 36(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 40(%%ecx)\n\t"
+" fmuls 40(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 44(%%ecx)\n\t"
+" fmuls 44(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 48(%%ecx)\n\t"
+" fmuls 48(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 52(%%ecx)\n\t"
+" fmuls 52(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 56(%%ecx)\n\t"
+" fmuls 56(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 60(%%ecx)\n\t"
+" fmuls 60(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" subl $4,%%esp\n\t"
+" faddp %%st,%%st(1)\n\t"
+" fxch %%st(1)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" fistpl (%%esp)\n\t"
+" popl %%eax\n\t"
+" cmpl $32767,%%eax\n\t"
+" jg 1f\n\t"
+" cmpl $-32768,%%eax\n\t"
+" jl 2f\n\t"
+" movw %%ax,(%%esi)\n\t"
+" jmp 4f\n\t"
+"1: movw $32767,(%%esi)\n\t"
+" jmp 3f\n\t"
+"2: movw $-32768,(%%esi)\n\t"
+"3: incl %%edi\n\t"
+"4:\n\t"
+".L54:\n\t"
+" addl $64,%%ebx\n\t"
+" subl $-128,%%ecx\n\t"
+" addl $4,%%esi\n\t"
+" decl %%ebp\n\t"
+" jnz .L55\n\t"
+" flds (%%ecx)\n\t"
+" fmuls (%%ebx)\n\t"
+" flds 8(%%ecx)\n\t"
+" fmuls 8(%%ebx)\n\t"
+" flds 16(%%ecx)\n\t"
+" fmuls 16(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 24(%%ecx)\n\t"
+" fmuls 24(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 32(%%ecx)\n\t"
+" fmuls 32(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 40(%%ecx)\n\t"
+" fmuls 40(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 48(%%ecx)\n\t"
+" fmuls 48(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 56(%%ecx)\n\t"
+" fmuls 56(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" subl $4,%%esp\n\t"
+" faddp %%st,%%st(1)\n\t"
+" fxch %%st(1)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" fistpl (%%esp)\n\t"
+" popl %%eax\n\t"
+" cmpl $32767,%%eax\n\t"
+" jg 1f\n\t"
+" cmpl $-32768,%%eax\n\t"
+" jl 2f\n\t"
+" movw %%ax,(%%esi)\n\t"
+" jmp 4f\n\t"
+"1: movw $32767,(%%esi)\n\t"
+" jmp 3f\n\t"
+"2: movw $-32768,(%%esi)\n\t"
+"3: incl %%edi\n\t"
+"4:\n\t"
+".L62:\n\t"
+" addl $-64,%%ebx\n\t"
+" addl $4,%%esi\n\t"
+" movl %4,%%edx\n\t"
+" leal -128(%%ecx,%%edx,8),%%ecx\n\t"
+" movl $15,%%ebp\n\t"
+".L68:\n\t"
+" flds -4(%%ecx)\n\t"
+" fchs\n\t"
+" fmuls (%%ebx)\n\t"
+" flds -8(%%ecx)\n\t"
+" fmuls 4(%%ebx)\n\t"
+" fxch %%st(1)\n\t"
+" flds -12(%%ecx)\n\t"
+" fmuls 8(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -16(%%ecx)\n\t"
+" fmuls 12(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -20(%%ecx)\n\t"
+" fmuls 16(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -24(%%ecx)\n\t"
+" fmuls 20(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -28(%%ecx)\n\t"
+" fmuls 24(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -32(%%ecx)\n\t"
+" fmuls 28(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -36(%%ecx)\n\t"
+" fmuls 32(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -40(%%ecx)\n\t"
+" fmuls 36(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -44(%%ecx)\n\t"
+" fmuls 40(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -48(%%ecx)\n\t"
+" fmuls 44(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -52(%%ecx)\n\t"
+" fmuls 48(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -56(%%ecx)\n\t"
+" fmuls 52(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -60(%%ecx)\n\t"
+" fmuls 56(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds (%%ecx)\n\t"
+" fmuls 60(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" subl $4,%%esp\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" fxch %%st(1)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" fistpl (%%esp)\n\t"
+" popl %%eax\n\t"
+" cmpl $32767,%%eax\n\t"
+" jg 1f\n\t"
+" cmpl $-32768,%%eax\n\t"
+" jl 2f\n\t"
+" movw %%ax,(%%esi)\n\t"
+" jmp 4f\n\t"
+"1: movw $32767,(%%esi)\n\t"
+" jmp 3f\n\t"
+"2: movw $-32768,(%%esi)\n\t"
+"3: incl %%edi\n\t"
+"4:\n\t"
+".L67:\n\t"
+" addl $-64,%%ebx\n\t"
+" addl $-128,%%ecx\n\t"
+" addl $4,%%esi\n\t"
+" decl %%ebp\n\t"
+" jnz .L68\n\t"
+" movl %%edi,%%eax\n\t"
+ :"=a"(retval)
+ :"m"(bandPtr),"m"(channel),"m"(samples),"m"(tmp[0])
+ :"memory","%ebp","%edi","%esi","%ebx");
+ return retval;
+}