diff options
Diffstat (limited to 'mp3lib')
-rw-r--r-- | mp3lib/dct12.c | 139 | ||||
-rw-r--r-- | mp3lib/dct36.c | 269 | ||||
-rw-r--r-- | mp3lib/dct36_3dnow.c | 502 | ||||
-rw-r--r-- | mp3lib/dct36_k7.c | 34 | ||||
-rw-r--r-- | mp3lib/dct64.c | 323 | ||||
-rw-r--r-- | mp3lib/dct64_3dnow.c | 929 | ||||
-rw-r--r-- | mp3lib/dct64_altivec.c | 524 | ||||
-rw-r--r-- | mp3lib/dct64_i386.c | 319 | ||||
-rw-r--r-- | mp3lib/dct64_k7.c | 767 | ||||
-rw-r--r-- | mp3lib/dct64_mmx.c | 987 | ||||
-rw-r--r-- | mp3lib/dct64_sse.c | 423 | ||||
-rw-r--r-- | mp3lib/decod386.c | 253 | ||||
-rw-r--r-- | mp3lib/decode_i586.c | 318 | ||||
-rw-r--r-- | mp3lib/decode_mmx.c | 369 | ||||
-rw-r--r-- | mp3lib/equalizer.c | 78 | ||||
-rw-r--r-- | mp3lib/huffman.h | 335 | ||||
-rw-r--r-- | mp3lib/l2tables.h | 166 | ||||
-rw-r--r-- | mp3lib/layer1.c | 165 | ||||
-rw-r--r-- | mp3lib/layer2.c | 322 | ||||
-rw-r--r-- | mp3lib/layer3.c | 1349 | ||||
-rw-r--r-- | mp3lib/mp3.h | 39 | ||||
-rw-r--r-- | mp3lib/mpg123.h | 144 | ||||
-rw-r--r-- | mp3lib/sr1.c | 605 | ||||
-rw-r--r-- | mp3lib/tabinit.c | 75 | ||||
-rw-r--r-- | mp3lib/test.c | 89 | ||||
-rw-r--r-- | mp3lib/test2.c | 87 |
26 files changed, 0 insertions, 9610 deletions
diff --git a/mp3lib/dct12.c b/mp3lib/dct12.c deleted file mode 100644 index 5ba45af389..0000000000 --- a/mp3lib/dct12.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * new DCT12 - */ -static void dct12(real *in,real *rawout1,real *rawout2,register real *wi,register real *ts) -{ -#define DCT12_PART1 \ - in5 = in[5*3]; \ - in5 += (in4 = in[4*3]); \ - in4 += (in3 = in[3*3]); \ - in3 += (in2 = in[2*3]); \ - in2 += (in1 = in[1*3]); \ - in1 += (in0 = in[0*3]); \ - \ - in5 += in3; in3 += in1; \ - \ - in2 *= COS6_1; \ - in3 *= COS6_1; \ - -#define DCT12_PART2 \ - in0 += in4 * COS6_2; \ - \ - in4 = in0 + in2; \ - in0 -= in2; \ - \ - in1 += in5 * COS6_2; \ - \ - in5 = (in1 + in3) * tfcos12[0]; \ - in1 = (in1 - in3) * tfcos12[2]; \ - \ - in3 = in4 + in5; \ - in4 -= in5; \ - \ - in2 = in0 + in1; \ - in0 -= in1; - - - { - real in0,in1,in2,in3,in4,in5; - register real *out1 = rawout1; - ts[SBLIMIT*0] = out1[0]; ts[SBLIMIT*1] = out1[1]; ts[SBLIMIT*2] = out1[2]; - ts[SBLIMIT*3] = out1[3]; ts[SBLIMIT*4] = out1[4]; ts[SBLIMIT*5] = out1[5]; - - DCT12_PART1 - - { - real tmp0,tmp1 = (in0 - in4); - { - real tmp2 = (in1 - in5) * tfcos12[1]; - tmp0 = tmp1 + tmp2; - tmp1 -= tmp2; - } - ts[(17-1)*SBLIMIT] = out1[17-1] + tmp0 * wi[11-1]; - ts[(12+1)*SBLIMIT] = out1[12+1] + tmp0 * wi[6+1]; - ts[(6 +1)*SBLIMIT] = out1[6 +1] + tmp1 * wi[1]; - ts[(11-1)*SBLIMIT] = out1[11-1] + tmp1 * wi[5-1]; - } - - DCT12_PART2 - - ts[(17-0)*SBLIMIT] = out1[17-0] + in2 * wi[11-0]; - ts[(12+0)*SBLIMIT] = out1[12+0] + in2 * wi[6+0]; - ts[(12+2)*SBLIMIT] = out1[12+2] + in3 * wi[6+2]; - ts[(17-2)*SBLIMIT] = out1[17-2] + in3 * wi[11-2]; - - ts[(6+0)*SBLIMIT] = out1[6+0] + in0 * wi[0]; - ts[(11-0)*SBLIMIT] = out1[11-0] + in0 * wi[5-0]; - ts[(6+2)*SBLIMIT] = out1[6+2] + in4 * wi[2]; - ts[(11-2)*SBLIMIT] = out1[11-2] + in4 * wi[5-2]; - } - - in++; - - { - real in0,in1,in2,in3,in4,in5; - register real *out2 = rawout2; - - DCT12_PART1 - - { - real tmp0,tmp1 = (in0 - in4); - { - real tmp2 = (in1 - in5) * tfcos12[1]; - tmp0 = tmp1 + tmp2; - tmp1 -= tmp2; - } - out2[5-1] = tmp0 * wi[11-1]; - out2[0+1] = tmp0 * wi[6+1]; - ts[(12+1)*SBLIMIT] += tmp1 * wi[1]; - ts[(17-1)*SBLIMIT] += tmp1 * wi[5-1]; - } - - DCT12_PART2 - - out2[5-0] = in2 * wi[11-0]; - out2[0+0] = in2 * wi[6+0]; - out2[0+2] = in3 * wi[6+2]; - out2[5-2] = in3 * wi[11-2]; - - ts[(12+0)*SBLIMIT] += in0 * wi[0]; - ts[(17-0)*SBLIMIT] += in0 * wi[5-0]; - ts[(12+2)*SBLIMIT] += in4 * wi[2]; - ts[(17-2)*SBLIMIT] += in4 * wi[5-2]; - } - - in++; - - { - real in0,in1,in2,in3,in4,in5; - register real *out2 = rawout2; - out2[12]=out2[13]=out2[14]=out2[15]=out2[16]=out2[17]=0.0; - - DCT12_PART1 - - { - real tmp0,tmp1 = (in0 - in4); - { - real tmp2 = (in1 - in5) * tfcos12[1]; - tmp0 = tmp1 + tmp2; - tmp1 -= tmp2; - } - out2[11-1] = tmp0 * wi[11-1]; - out2[6 +1] = tmp0 * wi[6+1]; - out2[0+1] += tmp1 * wi[1]; - out2[5-1] += tmp1 * wi[5-1]; - } - - DCT12_PART2 - - out2[11-0] = in2 * wi[11-0]; - out2[6 +0] = in2 * wi[6+0]; - out2[6 +2] = in3 * wi[6+2]; - out2[11-2] = in3 * wi[11-2]; - - out2[0+0] += in0 * wi[0]; - out2[5-0] += in0 * wi[5-0]; - out2[0+2] += in4 * wi[2]; - out2[5-2] += in4 * wi[5-2]; - } -} diff --git a/mp3lib/dct36.c b/mp3lib/dct36.c deleted file mode 100644 index 22ec273277..0000000000 --- a/mp3lib/dct36.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Modified for use with MPlayer, for details see the changelog at - * http://svn.mplayerhq.hu/mplayer/trunk/ - * $Id$ - */ - -/* -// This is an optimized DCT from Jeff Tsay's maplay 1.2+ package. -// Saved one multiplication by doing the 'twiddle factor' stuff -// together with the window mul. (MH) -// -// This uses Byeong Gi Lee's Fast Cosine Transform algorithm, but the -// 9 point IDCT needs to be reduced further. Unfortunately, I don't -// know how to do that, because 9 is not an even number. - Jeff. -// -////////////////////////////////////////////////////////////////// -// -// 9 Point Inverse Discrete Cosine Transform -// -// This piece of code is Copyright 1997 Mikko Tommila and is freely usable -// by anybody. The algorithm itself is of course in the public domain. -// -// Again derived heuristically from the 9-point WFTA. -// -// The algorithm is optimized (?) for speed, not for small rounding errors or -// good readability. -// -// 36 additions, 11 multiplications -// -// Again this is very likely sub-optimal. -// -// The code is optimized to use a minimum number of temporary variables, -// so it should compile quite well even on 8-register Intel x86 processors. -// This makes the code quite obfuscated and very difficult to understand. -// -// References: -// [1] S. Winograd: "On Computing the Discrete Fourier Transform", -// Mathematics of Computation, Volume 32, Number 141, January 1978, -// Pages 175-199 -*/ - -/*------------------------------------------------------------------*/ -/* */ -/* Function: Calculation of the inverse MDCT */ -/* */ -/*------------------------------------------------------------------*/ - -static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf) -{ -#ifdef NEW_DCT9 - real tmp[18]; -#endif - - { - register real *in = inbuf; - - in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14]; - in[14]+=in[13]; in[13]+=in[12]; in[12]+=in[11]; - in[11]+=in[10]; in[10]+=in[9]; in[9] +=in[8]; - in[8] +=in[7]; in[7] +=in[6]; in[6] +=in[5]; - in[5] +=in[4]; in[4] +=in[3]; in[3] +=in[2]; - in[2] +=in[1]; in[1] +=in[0]; - - in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9]; - in[9] +=in[7]; in[7] +=in[5]; in[5] +=in[3]; in[3] +=in[1]; - - -#ifdef NEW_DCT9 - { - real t0, t1, t2, t3, t4, t5, t6, t7; - - t1 = COS6_2 * in[12]; - t2 = COS6_2 * (in[8] + in[16] - in[4]); - - t3 = in[0] + t1; - t4 = in[0] - t1 - t1; - t5 = t4 - t2; - - t0 = cos9[0] * (in[4] + in[8]); - t1 = cos9[1] * (in[8] - in[16]); - - tmp[4] = t4 + t2 + t2; - t2 = cos9[2] * (in[4] + in[16]); - - t6 = t3 - t0 - t2; - t0 += t3 + t1; - t3 += t2 - t1; - - t2 = cos18[0] * (in[2] + in[10]); - t4 = cos18[1] * (in[10] - in[14]); - t7 = COS6_1 * in[6]; - - t1 = t2 + t4 + t7; - tmp[0] = t0 + t1; - tmp[8] = t0 - t1; - t1 = cos18[2] * (in[2] + in[14]); - t2 += t1 - t7; - - tmp[3] = t3 + t2; - t0 = COS6_1 * (in[10] + in[14] - in[2]); - tmp[5] = t3 - t2; - - t4 -= t1 + t7; - - tmp[1] = t5 - t0; - tmp[7] = t5 + t0; - tmp[2] = t6 + t4; - tmp[6] = t6 - t4; - } - - { - real t0, t1, t2, t3, t4, t5, t6, t7; - - t1 = COS6_2 * in[13]; - t2 = COS6_2 * (in[9] + in[17] - in[5]); - - t3 = in[1] + t1; - t4 = in[1] - t1 - t1; - t5 = t4 - t2; - - t0 = cos9[0] * (in[5] + in[9]); - t1 = cos9[1] * (in[9] - in[17]); - - tmp[13] = (t4 + t2 + t2) * tfcos36[17-13]; - t2 = cos9[2] * (in[5] + in[17]); - - t6 = t3 - t0 - t2; - t0 += t3 + t1; - t3 += t2 - t1; - - t2 = cos18[0] * (in[3] + in[11]); - t4 = cos18[1] * (in[11] - in[15]); - t7 = COS6_1 * in[7]; - - t1 = t2 + t4 + t7; - tmp[17] = (t0 + t1) * tfcos36[17-17]; - tmp[9] = (t0 - t1) * tfcos36[17-9]; - t1 = cos18[2] * (in[3] + in[15]); - t2 += t1 - t7; - - tmp[14] = (t3 + t2) * tfcos36[17-14]; - t0 = COS6_1 * (in[11] + in[15] - in[3]); - tmp[12] = (t3 - t2) * tfcos36[17-12]; - - t4 -= t1 + t7; - - tmp[16] = (t5 - t0) * tfcos36[17-16]; - tmp[10] = (t5 + t0) * tfcos36[17-10]; - tmp[15] = (t6 + t4) * tfcos36[17-15]; - tmp[11] = (t6 - t4) * tfcos36[17-11]; - } - -#define MACRO(v) { \ - real tmpval; \ - real sum0 = tmp[(v)]; \ - real sum1 = tmp[17-(v)]; \ - out2[9+(v)] = (tmpval = sum0 + sum1) * w[27+(v)]; \ - out2[8-(v)] = tmpval * w[26-(v)]; \ - sum0 -= sum1; \ - ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \ - ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; } - -{ - register real *out2 = o2; - register real *w = wintab; - register real *out1 = o1; - register real *ts = tsbuf; - - MACRO(0); - MACRO(1); - MACRO(2); - MACRO(3); - MACRO(4); - MACRO(5); - MACRO(6); - MACRO(7); - MACRO(8); -} - -#else - - { - -#define MACRO0(v) { \ - real tmp; \ - out2[9+(v)] = (tmp = sum0 + sum1) * w[27+(v)]; \ - out2[8-(v)] = tmp * w[26-(v)]; } \ - sum0 -= sum1; \ - ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \ - ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; -#define MACRO1(v) { \ - real sum0, sum1; \ - sum0 = tmp1a + tmp2a; \ - sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \ - MACRO0(v); } -#define MACRO2(v) { \ - real sum0, sum1; \ - sum0 = tmp2a - tmp1a; \ - sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \ - MACRO0(v); } - - register const real *c = COS9; - register real *out2 = o2; - register real *w = wintab; - register real *out1 = o1; - register real *ts = tsbuf; - - real ta33,ta66,tb33,tb66; - - ta33 = in[2*3+0] * c[3]; - ta66 = in[2*6+0] * c[6]; - tb33 = in[2*3+1] * c[3]; - tb66 = in[2*6+1] * c[6]; - - { - real tmp1a,tmp2a,tmp1b,tmp2b; - tmp1a = in[2*1+0] * c[1] + ta33 + in[2*5+0] * c[5] + in[2*7+0] * c[7]; - tmp1b = in[2*1+1] * c[1] + tb33 + in[2*5+1] * c[5] + in[2*7+1] * c[7]; - tmp2a = in[2*0+0] + in[2*2+0] * c[2] + in[2*4+0] * c[4] + ta66 + in[2*8+0] * c[8]; - tmp2b = in[2*0+1] + in[2*2+1] * c[2] + in[2*4+1] * c[4] + tb66 + in[2*8+1] * c[8]; - - MACRO1(0); - MACRO2(8); - } - - { - real tmp1a,tmp2a,tmp1b,tmp2b; - tmp1a = ( in[2*1+0] - in[2*5+0] - in[2*7+0] ) * c[3]; - tmp1b = ( in[2*1+1] - in[2*5+1] - in[2*7+1] ) * c[3]; - tmp2a = ( in[2*2+0] - in[2*4+0] - in[2*8+0] ) * c[6] - in[2*6+0] + in[2*0+0]; - tmp2b = ( in[2*2+1] - in[2*4+1] - in[2*8+1] ) * c[6] - in[2*6+1] + in[2*0+1]; - - MACRO1(1); - MACRO2(7); - } - - { - real tmp1a,tmp2a,tmp1b,tmp2b; - tmp1a = in[2*1+0] * c[5] - ta33 - in[2*5+0] * c[7] + in[2*7+0] * c[1]; - tmp1b = in[2*1+1] * c[5] - tb33 - in[2*5+1] * c[7] + in[2*7+1] * c[1]; - tmp2a = in[2*0+0] - in[2*2+0] * c[8] - in[2*4+0] * c[2] + ta66 + in[2*8+0] * c[4]; - tmp2b = in[2*0+1] - in[2*2+1] * c[8] - in[2*4+1] * c[2] + tb66 + in[2*8+1] * c[4]; - - MACRO1(2); - MACRO2(6); - } - - { - real tmp1a,tmp2a,tmp1b,tmp2b; - tmp1a = in[2*1+0] * c[7] - ta33 + in[2*5+0] * c[1] - in[2*7+0] * c[5]; - tmp1b = in[2*1+1] * c[7] - tb33 + in[2*5+1] * c[1] - in[2*7+1] * c[5]; - tmp2a = in[2*0+0] - in[2*2+0] * c[4] + in[2*4+0] * c[8] + ta66 - in[2*8+0] * c[2]; - tmp2b = in[2*0+1] - in[2*2+1] * c[4] + in[2*4+1] * c[8] + tb66 - in[2*8+1] * c[2]; - - MACRO1(3); - MACRO2(5); - } - - { - real sum0,sum1; - sum0 = in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0]; - sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4]; - MACRO0(4); - } - } -#endif - - } -} diff --git a/mp3lib/dct36_3dnow.c b/mp3lib/dct36_3dnow.c deleted file mode 100644 index 4362d0582b..0000000000 --- a/mp3lib/dct36_3dnow.c +++ /dev/null @@ -1,502 +0,0 @@ -/* - * dct36_3dnow.c - 3DNow! optimized dct36() - * - * This code based 'dct36_3dnow.s' by Syuuhei Kashiyama - * <squash@mb.kcom.ne.jp>, only two types of changes have been made: - * - * - removed PREFETCH instruction for speedup - * - changed function name for support 3DNow! automatic detection - * - * You can find Kashiyama's original 3dnow! support patch - * (for mpg123-0.59o) at - * http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese). - * - * by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999 - * <kim@comtec.co.jp> - after 1.Apr.1999 - * - * Modified for use with MPlayer, for details see the changelog at - * http://svn.mplayerhq.hu/mplayer/trunk/ - * $Id$ - * - * Original disclaimer: - * The author of this program disclaim whole expressed or implied - * warranties with regard to this program, and in no event shall the - * author of this program liable to whatever resulted from the use of - * this program. Use it at your own risk. - * - * 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi - */ - -#include "config.h" -#include "mangle.h" -#include "mpg123.h" -#include "ffmpeg_files/x86_cpu.h" - -#ifdef DCT36_OPTIMIZE_FOR_K7 -void dct36_3dnowex(real *inbuf, real *o1, - real *o2, real *wintab, real *tsbuf) -#else -void dct36_3dnow(real *inbuf, real *o1, - real *o2, real *wintab, real *tsbuf) -#endif -{ - __asm__ volatile( - "movq (%%"REG_a"),%%mm0\n\t" - "movq 4(%%"REG_a"),%%mm1\n\t" - "pfadd %%mm1,%%mm0\n\t" - "movq %%mm0,4(%%"REG_a")\n\t" - "psrlq $32,%%mm1\n\t" - "movq 12(%%"REG_a"),%%mm2\n\t" - "punpckldq %%mm2,%%mm1\n\t" - "pfadd %%mm2,%%mm1\n\t" - "movq %%mm1,12(%%"REG_a")\n\t" - "psrlq $32,%%mm2\n\t" - "movq 20(%%"REG_a"),%%mm3\n\t" - "punpckldq %%mm3,%%mm2\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq %%mm2,20(%%"REG_a")\n\t" - "psrlq $32,%%mm3\n\t" - "movq 28(%%"REG_a"),%%mm4\n\t" - "punpckldq %%mm4,%%mm3\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm3,28(%%"REG_a")\n\t" - "psrlq $32,%%mm4\n\t" - "movq 36(%%"REG_a"),%%mm5\n\t" - "punpckldq %%mm5,%%mm4\n\t" - "pfadd %%mm5,%%mm4\n\t" - "movq %%mm4,36(%%"REG_a")\n\t" - "psrlq $32,%%mm5\n\t" - "movq 44(%%"REG_a"),%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movq %%mm5,44(%%"REG_a")\n\t" - "psrlq $32,%%mm6\n\t" - "movq 52(%%"REG_a"),%%mm7\n\t" - "punpckldq %%mm7,%%mm6\n\t" - "pfadd %%mm7,%%mm6\n\t" - "movq %%mm6,52(%%"REG_a")\n\t" - "psrlq $32,%%mm7\n\t" - "movq 60(%%"REG_a"),%%mm0\n\t" - "punpckldq %%mm0,%%mm7\n\t" - "pfadd %%mm0,%%mm7\n\t" - "movq %%mm7,60(%%"REG_a")\n\t" - "psrlq $32,%%mm0\n\t" - "movd 68(%%"REG_a"),%%mm1\n\t" - "pfadd %%mm1,%%mm0\n\t" - "movd %%mm0,68(%%"REG_a")\n\t" - "movd 4(%%"REG_a"),%%mm0\n\t" - "movd 12(%%"REG_a"),%%mm1\n\t" - "punpckldq %%mm1,%%mm0\n\t" - "punpckldq 20(%%"REG_a"),%%mm1\n\t" - "pfadd %%mm1,%%mm0\n\t" - "movd %%mm0,12(%%"REG_a")\n\t" - "psrlq $32,%%mm0\n\t" - "movd %%mm0,20(%%"REG_a")\n\t" - "psrlq $32,%%mm1\n\t" - "movd 28(%%"REG_a"),%%mm2\n\t" - "punpckldq %%mm2,%%mm1\n\t" - "punpckldq 36(%%"REG_a"),%%mm2\n\t" - "pfadd %%mm2,%%mm1\n\t" - "movd %%mm1,28(%%"REG_a")\n\t" - "psrlq $32,%%mm1\n\t" - "movd %%mm1,36(%%"REG_a")\n\t" - "psrlq $32,%%mm2\n\t" - "movd 44(%%"REG_a"),%%mm3\n\t" - "punpckldq %%mm3,%%mm2\n\t" - "punpckldq 52(%%"REG_a"),%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movd %%mm2,44(%%"REG_a")\n\t" - "psrlq $32,%%mm2\n\t" - "movd %%mm2,52(%%"REG_a")\n\t" - "psrlq $32,%%mm3\n\t" - "movd 60(%%"REG_a"),%%mm4\n\t" - "punpckldq %%mm4,%%mm3\n\t" - "punpckldq 68(%%"REG_a"),%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movd %%mm3,60(%%"REG_a")\n\t" - "psrlq $32,%%mm3\n\t" - "movd %%mm3,68(%%"REG_a")\n\t" - - "movq 24(%%"REG_a"),%%mm0\n\t" - "movq 48(%%"REG_a"),%%mm1\n\t" - "movd "MANGLE(COS9)"+12,%%mm2\n\t" - "punpckldq %%mm2,%%mm2\n\t" - "movd "MANGLE(COS9)"+24,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm2,%%mm0\n\t" - "pfmul %%mm3,%%mm1\n\t" - "push %%"REG_a"\n\t" - "movl $1,%%eax\n\t" - "movd %%eax,%%mm7\n\t" - "pi2fd %%mm7,%%mm7\n\t" - "pop %%"REG_a"\n\t" - "movq 8(%%"REG_a"),%%mm2\n\t" - "movd "MANGLE(COS9)"+4,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "pfadd %%mm0,%%mm2\n\t" - "movq 40(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+20,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq 56(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+28,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq (%%"REG_a"),%%mm3\n\t" - "movq 16(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+8,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq 32(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+16,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "pfadd %%mm1,%%mm3\n\t" - "movq 64(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+32,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+0,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 108(%%"REG_d"),%%mm6\n\t" - "punpckldq 104(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" -#ifdef DCT36_OPTIMIZE_FOR_K7 - "pswapd %%mm5,%%mm5\n\t" - "movq %%mm5,32(%%"REG_c")\n\t" -#else - "movd %%mm5,36(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,32(%%"REG_c")\n\t" -#endif - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 32(%%"REG_d"),%%mm6\n\t" - "punpckldq 36(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 32(%%"REG_S"),%%mm6\n\t" - "punpckldq 36(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,1024(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1152(%%"REG_D")\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+32,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 140(%%"REG_d"),%%mm6\n\t" - "punpckldq 72(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,68(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,0(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 0(%%"REG_d"),%%mm6\n\t" - "punpckldq 68(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 0(%%"REG_S"),%%mm6\n\t" - "punpckldq 68(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,0(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,2176(%%"REG_D")\n\t" - "movq 8(%%"REG_a"),%%mm2\n\t" - "movq 40(%%"REG_a"),%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movq 56(%%"REG_a"),%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movd "MANGLE(COS9)"+12,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "movq 16(%%"REG_a"),%%mm3\n\t" - "movq 32(%%"REG_a"),%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq 64(%%"REG_a"),%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movd "MANGLE(COS9)"+24,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "movq 48(%%"REG_a"),%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq (%%"REG_a"),%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+4,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 112(%%"REG_d"),%%mm6\n\t" - "punpckldq 100(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,40(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,28(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 28(%%"REG_d"),%%mm6\n\t" - "punpckldq 40(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 28(%%"REG_S"),%%mm6\n\t" - "punpckldq 40(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,896(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1280(%%"REG_D")\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+28,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 136(%%"REG_d"),%%mm6\n\t" - "punpckldq 76(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,64(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,4(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 4(%%"REG_d"),%%mm6\n\t" - "punpckldq 64(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 4(%%"REG_S"),%%mm6\n\t" - "punpckldq 64(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,128(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,2048(%%"REG_D")\n\t" - - "movq 8(%%"REG_a"),%%mm2\n\t" - "movd "MANGLE(COS9)"+20,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "pfsub %%mm0,%%mm2\n\t" - "movq 40(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+28,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movq 56(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+4,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq (%%"REG_a"),%%mm3\n\t" - "movq 16(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+32,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq 32(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+8,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "pfadd %%mm1,%%mm3\n\t" - "movq 64(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+16,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+8,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 116(%%"REG_d"),%%mm6\n\t" - "punpckldq 96(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,44(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,24(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 24(%%"REG_d"),%%mm6\n\t" - "punpckldq 44(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 24(%%"REG_S"),%%mm6\n\t" - "punpckldq 44(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,768(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1408(%%"REG_D")\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+24,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 132(%%"REG_d"),%%mm6\n\t" - "punpckldq 80(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,60(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,8(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 8(%%"REG_d"),%%mm6\n\t" - "punpckldq 60(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 8(%%"REG_S"),%%mm6\n\t" - "punpckldq 60(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,256(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1920(%%"REG_D")\n\t" - "movq 8(%%"REG_a"),%%mm2\n\t" - "movd "MANGLE(COS9)"+28,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "pfsub %%mm0,%%mm2\n\t" - "movq 40(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+4,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq 56(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+20,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movq (%%"REG_a"),%%mm3\n\t" - "movq 16(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+16,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq 32(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+32,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "pfadd %%mm1,%%mm3\n\t" - "movq 64(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+8,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+12,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 120(%%"REG_d"),%%mm6\n\t" - "punpckldq 92(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,48(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,20(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 20(%%"REG_d"),%%mm6\n\t" - "punpckldq 48(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 20(%%"REG_S"),%%mm6\n\t" - "punpckldq 48(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,640(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1536(%%"REG_D")\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+20,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 128(%%"REG_d"),%%mm6\n\t" - "punpckldq 84(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,56(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,12(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 12(%%"REG_d"),%%mm6\n\t" - "punpckldq 56(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 12(%%"REG_S"),%%mm6\n\t" - "punpckldq 56(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,384(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1792(%%"REG_D")\n\t" - - "movq (%%"REG_a |