Initial revision

git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2 b3059339-0415-0410-9bf9-f77b7e298cf2
author: arpi_esp <arpi_esp@b3059339-0415-0410-9bf9-f77b7e298cf2> 2001-02-24 20:28:24 +0000
committer: arpi_esp <arpi_esp@b3059339-0415-0410-9bf9-f77b7e298cf2> 2001-02-24 20:28:24 +0000
commit: d34041569e71fc9bd772354e94dc9d16061072a5 (patch)
tree: 8f481cae1c70f32d1756fbe5f39000577b73042d /mp3lib
parent: e95a95ece09bac96bdfd37322f96c6f57ef79ebc (diff)
download: mpv-d34041569e71fc9bd772354e94dc9d16061072a5.tar.bz2
mpv-d34041569e71fc9bd772354e94dc9d16061072a5.tar.xz
22 files changed, 6463 insertions, 0 deletions
diff --git a/mp3lib/Makefile b/mp3lib/Makefile
new file mode 100644
index 0000000000..774fed3fd4
--- /dev/null
+++ b/mp3lib/Makefile
@@ -0,0 +1,34 @@
+
+include config.mak
+
+SRCS = sr1.c d_cpu.s decode_i586.s $(OPTIONAL_SRCS)
+OBJS = sr1.o d_cpu.o decode_i586.o $(OPTIONAL_OBJS)
+CFLAGS  = $(OPTFLAGS)
+
+.SUFFIXES: .c .o
+
+# .PHONY: all clean
+
+.c.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+.s.o:
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+libMP3.a:	$(OBJS)
+	$(AR) r libMP3.a $(OBJS)
+
+all:	libMP3.a
+
+clean:
+	rm -f *~ *.o *.a
+
+distclean:
+	makedepend
+	rm -f *~ *.o *.a Makefile.bak
+
+dep:	depend
+
+depend:
+	makedepend -- $(CFLAGS) -- $(SRCS) &>/dev/null
+# DO NOT DELETE
diff --git a/mp3lib/config.mak b/mp3lib/config.mak
new file mode 100644
index 0000000000..8aacf246da
--- /dev/null
+++ b/mp3lib/config.mak
@@ -0,0 +1,6 @@
+
+include ../config.mak
+
+OPTIONAL_SRCS = 
+OPTIONAL_OBJS = 
+
diff --git a/mp3lib/d_cpu.h b/mp3lib/d_cpu.h
new file mode 100644
index 0000000000..d215c4d42a
--- /dev/null
+++ b/mp3lib/d_cpu.h
@@ -0,0 +1,17 @@
+
+// --------------------------------------------------------------------------
+//  Cpu function detect by Pontscho/fresh!mindworkz
+// --------------------------------------------------------------------------
+
+#ifndef __MY_CPUIDENT
+#define __MY_CPUIDENT
+
+unsigned int _CpuID;
+unsigned int _i586;
+unsigned int _3dnow;
+
+extern unsigned long CpuDetect( void );
+extern unsigned long ipentium( void );
+extern unsigned long a3dnow( void );
+
+#endif
+\ No newline at end of file
diff --git a/mp3lib/d_cpu.s b/mp3lib/d_cpu.s
new file mode 100644
index 0000000000..34ca9730c3
--- /dev/null
+++ b/mp3lib/d_cpu.s
@@ -0,0 +1,94 @@
+
+/ ---------------------------------------------------------------------------
+/  Cpu function detect by Pontscho/fresh!mindworkz
+/   (c) 2000 - 2000
+/ ---------------------------------------------------------------------------
+
+.text
+
+.globl CpuDetect
+.globl ipentium
+.globl a3dnow
+
+/ ---------------------------------------------------------------------------
+/  in C: unsigned long CpuDetect( void );
+/   return: cpu ident number.
+/ ---------------------------------------------------------------------------
+CpuDetect:
+        pushl %ebx
+        pushl %ecx
+        pushl %edx
+
+        movl  $1,%eax
+        cpuid
+
+        popl  %edx
+        popl  %ecx
+        popl  %ebx
+        ret
+
+/ ---------------------------------------------------------------------------
+/  in C: unsigled long ipentium( void );
+/   return: 0 if the processor is not P5 or above else above 1.
+/ ---------------------------------------------------------------------------
+ipentium:
+        pushl  %ebx
+        pushl  %ecx
+        pushl  %edx
+        pushfl
+        popl   %eax
+        movl   %eax,%ebx
+        xorl   $0x00200000,%eax
+        pushl  %eax
+        popfl
+        pushfl
+        popl   %eax
+        cmpl   %eax,%ebx
+        jz     no_cpuid
+        movl   $1,%eax
+        cpuid
+        shrl   $8,%eax
+        cmpl   $5,%eax
+        jb     no_cpuid
+        movl   $1,%eax
+        jmp    exit
+no_cpuid:
+        xorl   %eax,%eax
+exit:
+        popl   %edx
+        popl   %ecx
+        popl   %ebx
+        ret
+
+/ ---------------------------------------------------------------------------
+/  in C: unsigned long a3dnow( void );
+/   return: 0 if this processor not requiment 3dnow! else above 1.
+/ ---------------------------------------------------------------------------
+a3dnow:
+        pushl  %ebx
+        pushl  %edx
+        pushl  %ecx
+
+
+        call   ipentium
+        shrl   $1,%eax
+        jnc    no_3dnow
+
+        movl   $0x80000000,%eax
+        cpuid
+        cmpl   $0x80000000,%eax
+        jbe    no_3dnow
+        movl   $0x80000001,%eax
+        cpuid
+        testl  $0x80000000,%edx
+        jz     no_3dnow
+        movl   $1,%eax
+        jmp    exit2
+no_3dnow:
+        xorl   %eax,%eax
+exit2:
+
+        popl   %ecx
+        popl   %edx
+        popl   %ebx
+        ret
diff --git a/mp3lib/dct12.c b/mp3lib/dct12.c
new file mode 100644
index 0000000000..61f2b29900
--- /dev/null
+++ b/mp3lib/dct12.c
@@ -0,0 +1,139 @@
+/*
+ * new DCT12
+ */
+static void dct12(real *in,real *rawout1,real *rawout2,register real *wi,register real *ts)
+{
+#define DCT12_PART1 \
+             in5 = in[5*3];  \
+     in5 += (in4 = in[4*3]); \
+     in4 += (in3 = in[3*3]); \
+     in3 += (in2 = in[2*3]); \
+     in2 += (in1 = in[1*3]); \
+     in1 += (in0 = in[0*3]); \
+                             \
+     in5 += in3; in3 += in1; \
+                             \
+     in2 *= COS6_1; \
+     in3 *= COS6_1; \
+
+#define DCT12_PART2 \
+     in0 += in4 * COS6_2; \
+                          \
+     in4 = in0 + in2;     \
+     in0 -= in2;          \
+                          \
+     in1 += in5 * COS6_2; \
+                          \
+     in5 = (in1 + in3) * tfcos12[0]; \
+     in1 = (in1 - in3) * tfcos12[2]; \
+                         \
+     in3 = in4 + in5;    \
+     in4 -= in5;         \
+                         \
+     in2 = in0 + in1;    \
+     in0 -= in1;
+
+
+   {
+     real in0,in1,in2,in3,in4,in5;
+     register real *out1 = rawout1;
+     ts[SBLIMIT*0] = out1[0]; ts[SBLIMIT*1] = out1[1]; ts[SBLIMIT*2] = out1[2];
+     ts[SBLIMIT*3] = out1[3]; ts[SBLIMIT*4] = out1[4]; ts[SBLIMIT*5] = out1[5];
+ 
+     DCT12_PART1
+
+     {
+       real tmp0,tmp1 = (in0 - in4);
+       {
+         real tmp2 = (in1 - in5) * tfcos12[1];
+         tmp0 = tmp1 + tmp2;
+         tmp1 -= tmp2;
+       }
+       ts[(17-1)*SBLIMIT] = out1[17-1] + tmp0 * wi[11-1];
+       ts[(12+1)*SBLIMIT] = out1[12+1] + tmp0 * wi[6+1];
+       ts[(6 +1)*SBLIMIT] = out1[6 +1] + tmp1 * wi[1];
+       ts[(11-1)*SBLIMIT] = out1[11-1] + tmp1 * wi[5-1];
+     }
+
+     DCT12_PART2
+
+     ts[(17-0)*SBLIMIT] = out1[17-0] + in2 * wi[11-0];
+     ts[(12+0)*SBLIMIT] = out1[12+0] + in2 * wi[6+0];
+     ts[(12+2)*SBLIMIT] = out1[12+2] + in3 * wi[6+2];
+     ts[(17-2)*SBLIMIT] = out1[17-2] + in3 * wi[11-2];
+
+     ts[(6+0)*SBLIMIT]  = out1[6+0] + in0 * wi[0];
+     ts[(11-0)*SBLIMIT] = out1[11-0] + in0 * wi[5-0];
+     ts[(6+2)*SBLIMIT]  = out1[6+2] + in4 * wi[2];
+     ts[(11-2)*SBLIMIT] = out1[11-2] + in4 * wi[5-2];
+  }
+
+  in++;
+
+  {
+     real in0,in1,in2,in3,in4,in5;
+     register real *out2 = rawout2;
+ 
+     DCT12_PART1
+
+     {
+       real tmp0,tmp1 = (in0 - in4);
+       {
+         real tmp2 = (in1 - in5) * tfcos12[1];
+         tmp0 = tmp1 + tmp2;
+         tmp1 -= tmp2;
+       }
+       out2[5-1] = tmp0 * wi[11-1];
+       out2[0+1] = tmp0 * wi[6+1];
+       ts[(12+1)*SBLIMIT] += tmp1 * wi[1];
+       ts[(17-1)*SBLIMIT] += tmp1 * wi[5-1];
+     }
+
+     DCT12_PART2
+
+     out2[5-0] = in2 * wi[11-0];
+     out2[0+0] = in2 * wi[6+0];
+     out2[0+2] = in3 * wi[6+2];
+     out2[5-2] = in3 * wi[11-2];
+
+     ts[(12+0)*SBLIMIT] += in0 * wi[0];
+     ts[(17-0)*SBLIMIT] += in0 * wi[5-0];
+     ts[(12+2)*SBLIMIT] += in4 * wi[2];
+     ts[(17-2)*SBLIMIT] += in4 * wi[5-2];
+  }
+
+  in++; 
+
+  {
+     real in0,in1,in2,in3,in4,in5;
+     register real *out2 = rawout2;
+     out2[12]=out2[13]=out2[14]=out2[15]=out2[16]=out2[17]=0.0;
+
+     DCT12_PART1
+
+     {
+       real tmp0,tmp1 = (in0 - in4);
+       {
+         real tmp2 = (in1 - in5) * tfcos12[1];
+         tmp0 = tmp1 + tmp2;
+         tmp1 -= tmp2;
+       }
+       out2[11-1] = tmp0 * wi[11-1];
+       out2[6 +1] = tmp0 * wi[6+1];
+       out2[0+1] += tmp1 * wi[1];
+       out2[5-1] += tmp1 * wi[5-1];
+     }
+
+     DCT12_PART2
+
+     out2[11-0] = in2 * wi[11-0];
+     out2[6 +0] = in2 * wi[6+0];
+     out2[6 +2] = in3 * wi[6+2];
+     out2[11-2] = in3 * wi[11-2];
+
+     out2[0+0] += in0 * wi[0];
+     out2[5-0] += in0 * wi[5-0];
+     out2[0+2] += in4 * wi[2];
+     out2[5-2] += in4 * wi[5-2];
+  }
+}
diff --git a/mp3lib/dct36.c b/mp3lib/dct36.c
new file mode 100644
index 0000000000..04992f09cc
--- /dev/null
+++ b/mp3lib/dct36.c
@@ -0,0 +1,264 @@
+/* 
+// This is an optimized DCT from Jeff Tsay's maplay 1.2+ package.
+// Saved one multiplication by doing the 'twiddle factor' stuff
+// together with the window mul. (MH)
+//
+// This uses Byeong Gi Lee's Fast Cosine Transform algorithm, but the
+// 9 point IDCT needs to be reduced further. Unfortunately, I don't
+// know how to do that, because 9 is not an even number. - Jeff.
+//
+//////////////////////////////////////////////////////////////////
+//
+// 9 Point Inverse Discrete Cosine Transform
+//
+// This piece of code is Copyright 1997 Mikko Tommila and is freely usable
+// by anybody. The algorithm itself is of course in the public domain.
+//
+// Again derived heuristically from the 9-point WFTA.
+//
+// The algorithm is optimized (?) for speed, not for small rounding errors or
+// good readability.
+//
+// 36 additions, 11 multiplications
+//
+// Again this is very likely sub-optimal.
+//
+// The code is optimized to use a minimum number of temporary variables,
+// so it should compile quite well even on 8-register Intel x86 processors.
+// This makes the code quite obfuscated and very difficult to understand.
+//
+// References:
+// [1] S. Winograd: "On Computing the Discrete Fourier Transform",
+//     Mathematics of Computation, Volume 32, Number 141, January 1978,
+//     Pages 175-199
+*/
+
+/*------------------------------------------------------------------*/
+/*                                                                  */
+/*    Function: Calculation of the inverse MDCT                     */
+/*                                                                  */
+/*------------------------------------------------------------------*/
+
+static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf)
+{
+#ifdef NEW_DCT9
+  real tmp[18];
+#endif
+
+  {
+    register real *in = inbuf;
+
+    in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14];
+    in[14]+=in[13]; in[13]+=in[12]; in[12]+=in[11];
+    in[11]+=in[10]; in[10]+=in[9];  in[9] +=in[8];
+    in[8] +=in[7];  in[7] +=in[6];  in[6] +=in[5];
+    in[5] +=in[4];  in[4] +=in[3];  in[3] +=in[2];
+    in[2] +=in[1];  in[1] +=in[0];
+
+    in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
+    in[9] +=in[7];  in[7] +=in[5];  in[5] +=in[3];  in[3] +=in[1];
+
+
+#ifdef NEW_DCT9
+    {
+      real t0, t1, t2, t3, t4, t5, t6, t7;
+
+      t1 = COS6_2 * in[12];
+      t2 = COS6_2 * (in[8] + in[16] - in[4]);
+
+      t3 = in[0] + t1;
+      t4 = in[0] - t1 - t1;
+      t5 = t4 - t2;
+
+      t0 = cos9[0] * (in[4] + in[8]);
+      t1 = cos9[1] * (in[8] - in[16]);
+
+      tmp[4] = t4 + t2 + t2;
+      t2 = cos9[2] * (in[4] + in[16]);
+
+      t6 = t3 - t0 - t2;
+      t0 += t3 + t1;
+      t3 += t2 - t1;
+
+      t2 = cos18[0] * (in[2]  + in[10]);
+      t4 = cos18[1] * (in[10] - in[14]);
+      t7 = COS6_1 * in[6];
+
+      t1 = t2 + t4 + t7;
+      tmp[0] = t0 + t1;
+      tmp[8] = t0 - t1;
+      t1 = cos18[2] * (in[2] + in[14]);
+      t2 += t1 - t7;
+
+      tmp[3] = t3 + t2;
+      t0 = COS6_1 * (in[10] + in[14] - in[2]);
+      tmp[5] = t3 - t2;
+
+      t4 -= t1 + t7;
+
+      tmp[1] = t5 - t0;
+      tmp[7] = t5 + t0;
+      tmp[2] = t6 + t4;
+      tmp[6] = t6 - t4;
+    }
+
+    {
+      real t0, t1, t2, t3, t4, t5, t6, t7;
+
+      t1 = COS6_2 * in[13];
+      t2 = COS6_2 * (in[9] + in[17] - in[5]);
+
+      t3 = in[1] + t1;
+      t4 = in[1] - t1 - t1;
+      t5 = t4 - t2;
+
+      t0 = cos9[0] * (in[5] + in[9]);
+      t1 = cos9[1] * (in[9] - in[17]);
+
+      tmp[13] = (t4 + t2 + t2) * tfcos36[17-13];
+      t2 = cos9[2] * (in[5] + in[17]);
+
+      t6 = t3 - t0 - t2;
+      t0 += t3 + t1;
+      t3 += t2 - t1;
+
+      t2 = cos18[0] * (in[3]  + in[11]);
+      t4 = cos18[1] * (in[11] - in[15]);
+      t7 = COS6_1 * in[7];
+
+      t1 = t2 + t4 + t7;
+      tmp[17] = (t0 + t1) * tfcos36[17-17];
+      tmp[9]  = (t0 - t1) * tfcos36[17-9];
+      t1 = cos18[2] * (in[3] + in[15]);
+      t2 += t1 - t7;
+
+      tmp[14] = (t3 + t2) * tfcos36[17-14];
+      t0 = COS6_1 * (in[11] + in[15] - in[3]);
+      tmp[12] = (t3 - t2) * tfcos36[17-12];
+
+      t4 -= t1 + t7;
+
+      tmp[16] = (t5 - t0) * tfcos36[17-16];
+      tmp[10] = (t5 + t0) * tfcos36[17-10];
+      tmp[15] = (t6 + t4) * tfcos36[17-15];
+      tmp[11] = (t6 - t4) * tfcos36[17-11];
+   }
+
+#define MACRO(v) { \
+    real tmpval; \
+    real sum0 = tmp[(v)]; \
+    real sum1 = tmp[17-(v)]; \
+    out2[9+(v)] = (tmpval = sum0 + sum1) * w[27+(v)]; \
+    out2[8-(v)] = tmpval * w[26-(v)]; \
+    sum0 -= sum1; \
+    ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
+    ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; }
+
+{
+   register real *out2 = o2;
+   register real *w = wintab;
+   register real *out1 = o1;
+   register real *ts = tsbuf;
+
+   MACRO(0);
+   MACRO(1);
+   MACRO(2);
+   MACRO(3);
+   MACRO(4);
+   MACRO(5);
+   MACRO(6);
+   MACRO(7);
+   MACRO(8);
+}
+
+#else
+
+  {
+
+#define MACRO0(v) { \
+    real tmp; \
+    out2[9+(v)] = (tmp = sum0 + sum1) * w[27+(v)]; \
+    out2[8-(v)] = tmp * w[26-(v)];  } \
+    sum0 -= sum1; \
+    ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
+    ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; 
+#define MACRO1(v) { \
+	real sum0,sum1; \
+    sum0 = tmp1a + tmp2a; \
+	sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \
+	MACRO0(v); }
+#define MACRO2(v) { \
+    real sum0,sum1; \
+    sum0 = tmp2a - tmp1a; \
+    sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \
+	MACRO0(v); }
+
+    register const real *c = nCOS9;
+    register real *out2 = o2;
+	register real *w = wintab;
+	register real *out1 = o1;
+	register real *ts = tsbuf;
+
+    real ta33,ta66,tb33,tb66;
+
+    ta33 = in[2*3+0] * c[3];
+    ta66 = in[2*6+0] * c[6];
+    tb33 = in[2*3+1] * c[3];
+    tb66 = in[2*6+1] * c[6];
+
+    { 
+      real tmp1a,tmp2a,tmp1b,tmp2b;
+      tmp1a =             in[2*1+0] * c[1] + ta33 + in[2*5+0] * c[5] + in[2*7+0] * c[7];
+      tmp1b =             in[2*1+1] * c[1] + tb33 + in[2*5+1] * c[5] + in[2*7+1] * c[7];
+      tmp2a = in[2*0+0] + in[2*2+0] * c[2] + in[2*4+0] * c[4] + ta66 + in[2*8+0] * c[8];
+      tmp2b = in[2*0+1] + in[2*2+1] * c[2] + in[2*4+1] * c[4] + tb66 + in[2*8+1] * c[8];
+
+      MACRO1(0);
+      MACRO2(8);
+    }
+
+    {
+      real tmp1a,tmp2a,tmp1b,tmp2b;
+      tmp1a = ( in[2*1+0] - in[2*5+0] - in[2*7+0] ) * c[3];
+      tmp1b = ( in[2*1+1] - in[2*5+1] - in[2*7+1] ) * c[3];
+      tmp2a = ( in[2*2+0] - in[2*4+0] - in[2*8+0] ) * c[6] - in[2*6+0] + in[2*0+0];
+      tmp2b = ( in[2*2+1] - in[2*4+1] - in[2*8+1] ) * c[6] - in[2*6+1] + in[2*0+1];
+
+      MACRO1(1);
+      MACRO2(7);
+    }
+
+    {
+      real tmp1a,tmp2a,tmp1b,tmp2b;
+      tmp1a =             in[2*1+0] * c[5] - ta33 - in[2*5+0] * c[7] + in[2*7+0] * c[1];
+      tmp1b =             in[2*1+1] * c[5] - tb33 - in[2*5+1] * c[7] + in[2*7+1] * c[1];
+      tmp2a = in[2*0+0] - in[2*2+0] * c[8] - in[2*4+0] * c[2] + ta66 + in[2*8+0] * c[4];
+      tmp2b = in[2*0+1] - in[2*2+1] * c[8] - in[2*4+1] * c[2] + tb66 + in[2*8+1] * c[4];
+
+      MACRO1(2);
+      MACRO2(6);
+    }
+
+    {
+      real tmp1a,tmp2a,tmp1b,tmp2b;
+      tmp1a =             in[2*1+0] * c[7] - ta33 + in[2*5+0] * c[1] - in[2*7+0] * c[5];
+      tmp1b =             in[2*1+1] * c[7] - tb33 + in[2*5+1] * c[1] - in[2*7+1] * c[5];
+      tmp2a = in[2*0+0] - in[2*2+0] * c[4] + in[2*4+0] * c[8] + ta66 - in[2*8+0] * c[2];
+      tmp2b = in[2*0+1] - in[2*2+1] * c[4] + in[2*4+1] * c[8] + tb66 - in[2*8+1] * c[2];
+
+      MACRO1(3);
+      MACRO2(5);
+    }
+
+	{
+		real sum0,sum1;
+    	sum0 =  in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0];
+    	sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4];
+		MACRO0(4);
+	}
+  }
+#endif
+
+  }
+}
+
diff --git a/mp3lib/dct36_3dnow.s b/mp3lib/dct36_3dnow.s
new file mode 100644
index 0000000000..a729bb4646
--- /dev/null
+++ b/mp3lib/dct36_3dnow.s
@@ -0,0 +1,499 @@
+/
+/ dct36_3dnow.s - 3DNow! optimized dct36()
+/
+/ This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
+/ <squash@mb.kcom.ne.jp>,only two types of changes have been made:
+/
+/ - remove PREFETCH instruction for speedup
+/ - change function name for support 3DNow! automatic detect
+/
+/ You can find Kashiyama's original 3dnow! support patch
+/ (for mpg123-0.59o) at
+/ http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
+/
+/ by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
+/                    <kim@comtec.co.jp>               - after  1.Apr.1999
+/
+	
+///
+/// Replacement of dct36() with AMD's 3DNow! SIMD operations support
+/// 
+/// Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
+/// 
+/// The author of this program disclaim whole expressed or implied
+/// warranties with regard to this program, and in no event shall the
+/// author of this program liable to whatever resulted from the use of
+/// this program. Use it at your own risk.
+/// 
+
+	.globl dct36_3dnow
+	.type	 dct36_3dnow,@function
+dct36_3dnow:
+	pushl %ebp
+	movl %esp,%ebp
+	subl $120,%esp
+	pushl %esi
+	pushl %ebx
+	movl 8(%ebp),%eax
+	movl 12(%ebp),%esi
+	movl 16(%ebp),%ecx
+	movl 20(%ebp),%edx
+	movl 24(%ebp),%ebx
+	leal -128(%ebp),%esp
+
+	femms
+	movq (%eax),%mm0
+	movq 4(%eax),%mm1
+	pfadd %mm1,%mm0
+	movq %mm0,4(%eax)
+	psrlq $32,%mm1
+	movq 12(%eax),%mm2
+	punpckldq %mm2,%mm1
+	pfadd %mm2,%mm1
+	movq %mm1,12(%eax)
+	psrlq $32,%mm2
+	movq 20(%eax),%mm3
+	punpckldq %mm3,%mm2
+	pfadd %mm3,%mm2
+	movq %mm2,20(%eax)
+	psrlq $32,%mm3
+	movq 28(%eax),%mm4
+	punpckldq %mm4,%mm3
+	pfadd %mm4,%mm3
+	movq %mm3,28(%eax)
+	psrlq $32,%mm4
+	movq 36(%eax),%mm5
+	punpckldq %mm5,%mm4
+	pfadd %mm5,%mm4
+	movq %mm4,36(%eax)
+	psrlq $32,%mm5
+	movq 44(%eax),%mm6
+	punpckldq %mm6,%mm5
+	pfadd %mm6,%mm5
+	movq %mm5,44(%eax)
+	psrlq $32,%mm6
+	movq 52(%eax),%mm7
+	punpckldq %mm7,%mm6
+	pfadd %mm7,%mm6
+	movq %mm6,52(%eax)
+	psrlq $32,%mm7
+	movq 60(%eax),%mm0
+	punpckldq %mm0,%mm7
+	pfadd %mm0,%mm7
+	movq %mm7,60(%eax)
+	psrlq $32,%mm0
+	movd 68(%eax),%mm1
+	pfadd %mm1,%mm0
+	movd %mm0,68(%eax)
+	movd 4(%eax),%mm0
+	movd 12(%eax),%mm1
+	punpckldq %mm1,%mm0
+	punpckldq 20(%eax),%mm1
+	pfadd %mm1,%mm0
+	movd %mm0,12(%eax)
+	psrlq $32,%mm0
+	movd %mm0,20(%eax)
+	psrlq $32,%mm1
+	movd 28(%eax),%mm2
+	punpckldq %mm2,%mm1
+	punpckldq 36(%eax),%mm2
+	pfadd %mm2,%mm1
+	movd %mm1,28(%eax)
+	psrlq $32,%mm1
+	movd %mm1,36(%eax)
+	psrlq $32,%mm2
+	movd 44(%eax),%mm3
+	punpckldq %mm3,%mm2
+	punpckldq 52(%eax),%mm3
+	pfadd %mm3,%mm2
+	movd %mm2,44(%eax)
+	psrlq $32,%mm2
+	movd %mm2,52(%eax)
+	psrlq $32,%mm3
+	movd 60(%eax),%mm4
+	punpckldq %mm4,%mm3
+	punpckldq 68(%eax),%mm4
+	pfadd %mm4,%mm3
+	movd %mm3,60(%eax)
+	psrlq $32,%mm3
+	movd %mm3,68(%eax)
+
+	movq 24(%eax),%mm0
+	movq 48(%eax),%mm1
+	movd COS9+12,%mm2
+	punpckldq %mm2,%mm2
+	movd COS9+24,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm2,%mm0
+	pfmul %mm3,%mm1
+	pushl %eax
+	movl $1,%eax
+	movd %eax,%mm7
+	pi2fd %mm7,%mm7
+	popl %eax
+	movq 8(%eax),%mm2
+	movd COS9+4,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	pfadd %mm0,%mm2
+	movq 40(%eax),%mm3
+	movd COS9+20,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq 56(%eax),%mm3
+	movd COS9+28,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq (%eax),%mm3
+	movq 16(%eax),%mm4
+	movd COS9+8,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	movq 32(%eax),%mm4
+	movd COS9+16,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	pfadd %mm1,%mm3
+	movq 64(%eax),%mm4
+	movd COS9+32,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq tfcos36+0,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 108(%edx),%mm6
+	punpckldq 104(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,36(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,32(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 32(%edx),%mm6
+	punpckldq 36(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 32(%esi),%mm6
+	punpckldq 36(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,1024(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1152(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq tfcos36+32,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 140(%edx),%mm6
+	punpckldq 72(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,68(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,0(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 0(%edx),%mm6
+	punpckldq 68(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 0(%esi),%mm6
+	punpckldq 68(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,0(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,2176(%ebx)
+	movq 8(%eax),%mm2
+	movq 40(%eax),%mm3
+	pfsub %mm3,%mm2
+	movq 56(%eax),%mm3
+	pfsub %mm3,%mm2
+	movd COS9+12,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	movq 16(%eax),%mm3
+	movq 32(%eax),%mm4
+	pfsub %mm4,%mm3
+	movq 64(%eax),%mm4
+	pfsub %mm4,%mm3
+	movd COS9+24,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	movq 48(%eax),%mm4
+	pfsub %mm4,%mm3
+	movq (%eax),%mm4
+	pfadd %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq tfcos36+4,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 112(%edx),%mm6
+	punpckldq 100(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,40(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,28(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 28(%edx),%mm6
+	punpckldq 40(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 28(%esi),%mm6
+	punpckldq 40(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,896(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1280(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq tfcos36+28,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 136(%edx),%mm6
+	punpckldq 76(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,64(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,4(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 4(%edx),%mm6
+	punpckldq 64(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 4(%esi),%mm6
+	punpckldq 64(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,128(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,2048(%ebx)
+
+	movq 8(%eax),%mm2
+	movd COS9+20,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	pfsub %mm0,%mm2
+	movq 40(%eax),%mm3
+	movd COS9+28,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfsub %mm3,%mm2
+	movq 56(%eax),%mm3
+	movd COS9+4,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq (%eax),%mm3
+	movq 16(%eax),%mm4
+	movd COS9+32,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	movq 32(%eax),%mm4
+	movd COS9+8,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	pfadd %mm1,%mm3
+	movq 64(%eax),%mm4
+	movd COS9+16,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq tfcos36+8,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 116(%edx),%mm6
+	punpckldq 96(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,44(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,24(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 24(%edx),%mm6
+	punpckldq 44(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 24(%esi),%mm6
+	punpckldq 44(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,768(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1408(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq tfcos36+24,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 132(%edx),%mm6
+	punpckldq 80(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,60(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,8(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 8(%edx),%mm6
+	punpckldq 60(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 8(%esi),%mm6
+	punpckldq 60(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,256(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1920(%ebx)
+	movq 8(%eax),%mm2
+	movd COS9+28,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	pfsub %mm0,%mm2
+	movq 40(%eax),%mm3
+	movd COS9+4,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq 56(%eax),%mm3
+	movd COS9+20,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfsub %mm3,%mm2
+	movq (%eax),%mm3
+	movq 16(%eax),%mm4
+	movd COS9+16,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	movq 32(%eax),%mm4
+	movd COS9+32,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	pfadd %mm1,%mm3
+	movq 64(%eax),%mm4
+	movd COS9+8,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq tfcos36+12,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 120(%edx),%mm6
+	punpckldq 92(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,48(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,20(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 20(%edx),%mm6
+	punpckldq 48(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 20(%esi),%mm6
+	punpckldq 48(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,640(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1536(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq tfcos36+20,%mm5
author	arpi_esp <arpi_esp@b3059339-0415-0410-9bf9-f77b7e298cf2>	2001-02-24 20:28:24 +0000
committer	arpi_esp <arpi_esp@b3059339-0415-0410-9bf9-f77b7e298cf2>	2001-02-24 20:28:24 +0000
commit	d34041569e71fc9bd772354e94dc9d16061072a5 (patch)
tree	8f481cae1c70f32d1756fbe5f39000577b73042d /mp3lib
parent	e95a95ece09bac96bdfd37322f96c6f57ef79ebc (diff)
download	mpv-d34041569e71fc9bd772354e94dc9d16061072a5.tar.bz2 mpv-d34041569e71fc9bd772354e94dc9d16061072a5.tar.xz