From f9b5f2870cd7ebb8fe70eeb65e22d11bb88d5202 Mon Sep 17 00:00:00 2001 From: Uoti Urpala Date: Sat, 2 Apr 2011 07:02:43 +0300 Subject: mp3lib: drop internal mp3lib tree Delete mp3lib which has been the default mp3 decoder until now. In addition to being an unnecessary embedded library it now fails to compile correctly with the new gcc-4.6, producing noise. After the deletion the default decoder priority for mp3 will be first libmpg123 (a newer version of the code that mp3lib was based on) if available, then ffmp3float which should be available in all normal compiles. I think that some tweaking may be required as these decoder alternatives get wider testing, but any problems should be solvable and there should be no need for mp3lib. --- mp3lib/dct12.c | 139 ----- mp3lib/dct36.c | 269 ---------- mp3lib/dct36_3dnow.c | 502 ------------------ mp3lib/dct36_k7.c | 34 -- mp3lib/dct64.c | 323 ------------ mp3lib/dct64_3dnow.c | 929 --------------------------------- mp3lib/dct64_altivec.c | 524 ------------------- mp3lib/dct64_i386.c | 319 ------------ mp3lib/dct64_k7.c | 767 --------------------------- mp3lib/dct64_mmx.c | 987 ----------------------------------- mp3lib/dct64_sse.c | 423 --------------- mp3lib/decod386.c | 253 --------- mp3lib/decode_i586.c | 318 ------------ mp3lib/decode_mmx.c | 369 ------------- mp3lib/equalizer.c | 78 --- mp3lib/huffman.h | 335 ------------ mp3lib/l2tables.h | 166 ------ mp3lib/layer1.c | 165 ------ mp3lib/layer2.c | 322 ------------ mp3lib/layer3.c | 1349 ------------------------------------------------ mp3lib/mp3.h | 39 -- mp3lib/mpg123.h | 144 ------ mp3lib/sr1.c | 605 ---------------------- mp3lib/tabinit.c | 75 --- mp3lib/test.c | 89 ---- mp3lib/test2.c | 87 ---- 26 files changed, 9610 deletions(-) delete mode 100644 mp3lib/dct12.c delete mode 100644 mp3lib/dct36.c delete mode 100644 mp3lib/dct36_3dnow.c delete mode 100644 mp3lib/dct36_k7.c delete mode 100644 mp3lib/dct64.c delete mode 100644 mp3lib/dct64_3dnow.c delete mode 100644 mp3lib/dct64_altivec.c delete mode 100644 mp3lib/dct64_i386.c delete mode 100644 mp3lib/dct64_k7.c delete mode 100644 mp3lib/dct64_mmx.c delete mode 100644 mp3lib/dct64_sse.c delete mode 100644 mp3lib/decod386.c delete mode 100644 mp3lib/decode_i586.c delete mode 100644 mp3lib/decode_mmx.c delete mode 100644 mp3lib/equalizer.c delete mode 100644 mp3lib/huffman.h delete mode 100644 mp3lib/l2tables.h delete mode 100644 mp3lib/layer1.c delete mode 100644 mp3lib/layer2.c delete mode 100644 mp3lib/layer3.c delete mode 100644 mp3lib/mp3.h delete mode 100644 mp3lib/mpg123.h delete mode 100644 mp3lib/sr1.c delete mode 100644 mp3lib/tabinit.c delete mode 100644 mp3lib/test.c delete mode 100644 mp3lib/test2.c (limited to 'mp3lib') diff --git a/mp3lib/dct12.c b/mp3lib/dct12.c deleted file mode 100644 index 5ba45af389..0000000000 --- a/mp3lib/dct12.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * new DCT12 - */ -static void dct12(real *in,real *rawout1,real *rawout2,register real *wi,register real *ts) -{ -#define DCT12_PART1 \ - in5 = in[5*3]; \ - in5 += (in4 = in[4*3]); \ - in4 += (in3 = in[3*3]); \ - in3 += (in2 = in[2*3]); \ - in2 += (in1 = in[1*3]); \ - in1 += (in0 = in[0*3]); \ - \ - in5 += in3; in3 += in1; \ - \ - in2 *= COS6_1; \ - in3 *= COS6_1; \ - -#define DCT12_PART2 \ - in0 += in4 * COS6_2; \ - \ - in4 = in0 + in2; \ - in0 -= in2; \ - \ - in1 += in5 * COS6_2; \ - \ - in5 = (in1 + in3) * tfcos12[0]; \ - in1 = (in1 - in3) * tfcos12[2]; \ - \ - in3 = in4 + in5; \ - in4 -= in5; \ - \ - in2 = in0 + in1; \ - in0 -= in1; - - - { - real in0,in1,in2,in3,in4,in5; - register real *out1 = rawout1; - ts[SBLIMIT*0] = out1[0]; ts[SBLIMIT*1] = out1[1]; ts[SBLIMIT*2] = out1[2]; - ts[SBLIMIT*3] = out1[3]; ts[SBLIMIT*4] = out1[4]; ts[SBLIMIT*5] = out1[5]; - - DCT12_PART1 - - { - real tmp0,tmp1 = (in0 - in4); - { - real tmp2 = (in1 - in5) * tfcos12[1]; - tmp0 = tmp1 + tmp2; - tmp1 -= tmp2; - } - ts[(17-1)*SBLIMIT] = out1[17-1] + tmp0 * wi[11-1]; - ts[(12+1)*SBLIMIT] = out1[12+1] + tmp0 * wi[6+1]; - ts[(6 +1)*SBLIMIT] = out1[6 +1] + tmp1 * wi[1]; - ts[(11-1)*SBLIMIT] = out1[11-1] + tmp1 * wi[5-1]; - } - - DCT12_PART2 - - ts[(17-0)*SBLIMIT] = out1[17-0] + in2 * wi[11-0]; - ts[(12+0)*SBLIMIT] = out1[12+0] + in2 * wi[6+0]; - ts[(12+2)*SBLIMIT] = out1[12+2] + in3 * wi[6+2]; - ts[(17-2)*SBLIMIT] = out1[17-2] + in3 * wi[11-2]; - - ts[(6+0)*SBLIMIT] = out1[6+0] + in0 * wi[0]; - ts[(11-0)*SBLIMIT] = out1[11-0] + in0 * wi[5-0]; - ts[(6+2)*SBLIMIT] = out1[6+2] + in4 * wi[2]; - ts[(11-2)*SBLIMIT] = out1[11-2] + in4 * wi[5-2]; - } - - in++; - - { - real in0,in1,in2,in3,in4,in5; - register real *out2 = rawout2; - - DCT12_PART1 - - { - real tmp0,tmp1 = (in0 - in4); - { - real tmp2 = (in1 - in5) * tfcos12[1]; - tmp0 = tmp1 + tmp2; - tmp1 -= tmp2; - } - out2[5-1] = tmp0 * wi[11-1]; - out2[0+1] = tmp0 * wi[6+1]; - ts[(12+1)*SBLIMIT] += tmp1 * wi[1]; - ts[(17-1)*SBLIMIT] += tmp1 * wi[5-1]; - } - - DCT12_PART2 - - out2[5-0] = in2 * wi[11-0]; - out2[0+0] = in2 * wi[6+0]; - out2[0+2] = in3 * wi[6+2]; - out2[5-2] = in3 * wi[11-2]; - - ts[(12+0)*SBLIMIT] += in0 * wi[0]; - ts[(17-0)*SBLIMIT] += in0 * wi[5-0]; - ts[(12+2)*SBLIMIT] += in4 * wi[2]; - ts[(17-2)*SBLIMIT] += in4 * wi[5-2]; - } - - in++; - - { - real in0,in1,in2,in3,in4,in5; - register real *out2 = rawout2; - out2[12]=out2[13]=out2[14]=out2[15]=out2[16]=out2[17]=0.0; - - DCT12_PART1 - - { - real tmp0,tmp1 = (in0 - in4); - { - real tmp2 = (in1 - in5) * tfcos12[1]; - tmp0 = tmp1 + tmp2; - tmp1 -= tmp2; - } - out2[11-1] = tmp0 * wi[11-1]; - out2[6 +1] = tmp0 * wi[6+1]; - out2[0+1] += tmp1 * wi[1]; - out2[5-1] += tmp1 * wi[5-1]; - } - - DCT12_PART2 - - out2[11-0] = in2 * wi[11-0]; - out2[6 +0] = in2 * wi[6+0]; - out2[6 +2] = in3 * wi[6+2]; - out2[11-2] = in3 * wi[11-2]; - - out2[0+0] += in0 * wi[0]; - out2[5-0] += in0 * wi[5-0]; - out2[0+2] += in4 * wi[2]; - out2[5-2] += in4 * wi[5-2]; - } -} diff --git a/mp3lib/dct36.c b/mp3lib/dct36.c deleted file mode 100644 index 22ec273277..0000000000 --- a/mp3lib/dct36.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Modified for use with MPlayer, for details see the changelog at - * http://svn.mplayerhq.hu/mplayer/trunk/ - * $Id$ - */ - -/* -// This is an optimized DCT from Jeff Tsay's maplay 1.2+ package. -// Saved one multiplication by doing the 'twiddle factor' stuff -// together with the window mul. (MH) -// -// This uses Byeong Gi Lee's Fast Cosine Transform algorithm, but the -// 9 point IDCT needs to be reduced further. Unfortunately, I don't -// know how to do that, because 9 is not an even number. - Jeff. -// -////////////////////////////////////////////////////////////////// -// -// 9 Point Inverse Discrete Cosine Transform -// -// This piece of code is Copyright 1997 Mikko Tommila and is freely usable -// by anybody. The algorithm itself is of course in the public domain. -// -// Again derived heuristically from the 9-point WFTA. -// -// The algorithm is optimized (?) for speed, not for small rounding errors or -// good readability. -// -// 36 additions, 11 multiplications -// -// Again this is very likely sub-optimal. -// -// The code is optimized to use a minimum number of temporary variables, -// so it should compile quite well even on 8-register Intel x86 processors. -// This makes the code quite obfuscated and very difficult to understand. -// -// References: -// [1] S. Winograd: "On Computing the Discrete Fourier Transform", -// Mathematics of Computation, Volume 32, Number 141, January 1978, -// Pages 175-199 -*/ - -/*------------------------------------------------------------------*/ -/* */ -/* Function: Calculation of the inverse MDCT */ -/* */ -/*------------------------------------------------------------------*/ - -static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf) -{ -#ifdef NEW_DCT9 - real tmp[18]; -#endif - - { - register real *in = inbuf; - - in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14]; - in[14]+=in[13]; in[13]+=in[12]; in[12]+=in[11]; - in[11]+=in[10]; in[10]+=in[9]; in[9] +=in[8]; - in[8] +=in[7]; in[7] +=in[6]; in[6] +=in[5]; - in[5] +=in[4]; in[4] +=in[3]; in[3] +=in[2]; - in[2] +=in[1]; in[1] +=in[0]; - - in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9]; - in[9] +=in[7]; in[7] +=in[5]; in[5] +=in[3]; in[3] +=in[1]; - - -#ifdef NEW_DCT9 - { - real t0, t1, t2, t3, t4, t5, t6, t7; - - t1 = COS6_2 * in[12]; - t2 = COS6_2 * (in[8] + in[16] - in[4]); - - t3 = in[0] + t1; - t4 = in[0] - t1 - t1; - t5 = t4 - t2; - - t0 = cos9[0] * (in[4] + in[8]); - t1 = cos9[1] * (in[8] - in[16]); - - tmp[4] = t4 + t2 + t2; - t2 = cos9[2] * (in[4] + in[16]); - - t6 = t3 - t0 - t2; - t0 += t3 + t1; - t3 += t2 - t1; - - t2 = cos18[0] * (in[2] + in[10]); - t4 = cos18[1] * (in[10] - in[14]); - t7 = COS6_1 * in[6]; - - t1 = t2 + t4 + t7; - tmp[0] = t0 + t1; - tmp[8] = t0 - t1; - t1 = cos18[2] * (in[2] + in[14]); - t2 += t1 - t7; - - tmp[3] = t3 + t2; - t0 = COS6_1 * (in[10] + in[14] - in[2]); - tmp[5] = t3 - t2; - - t4 -= t1 + t7; - - tmp[1] = t5 - t0; - tmp[7] = t5 + t0; - tmp[2] = t6 + t4; - tmp[6] = t6 - t4; - } - - { - real t0, t1, t2, t3, t4, t5, t6, t7; - - t1 = COS6_2 * in[13]; - t2 = COS6_2 * (in[9] + in[17] - in[5]); - - t3 = in[1] + t1; - t4 = in[1] - t1 - t1; - t5 = t4 - t2; - - t0 = cos9[0] * (in[5] + in[9]); - t1 = cos9[1] * (in[9] - in[17]); - - tmp[13] = (t4 + t2 + t2) * tfcos36[17-13]; - t2 = cos9[2] * (in[5] + in[17]); - - t6 = t3 - t0 - t2; - t0 += t3 + t1; - t3 += t2 - t1; - - t2 = cos18[0] * (in[3] + in[11]); - t4 = cos18[1] * (in[11] - in[15]); - t7 = COS6_1 * in[7]; - - t1 = t2 + t4 + t7; - tmp[17] = (t0 + t1) * tfcos36[17-17]; - tmp[9] = (t0 - t1) * tfcos36[17-9]; - t1 = cos18[2] * (in[3] + in[15]); - t2 += t1 - t7; - - tmp[14] = (t3 + t2) * tfcos36[17-14]; - t0 = COS6_1 * (in[11] + in[15] - in[3]); - tmp[12] = (t3 - t2) * tfcos36[17-12]; - - t4 -= t1 + t7; - - tmp[16] = (t5 - t0) * tfcos36[17-16]; - tmp[10] = (t5 + t0) * tfcos36[17-10]; - tmp[15] = (t6 + t4) * tfcos36[17-15]; - tmp[11] = (t6 - t4) * tfcos36[17-11]; - } - -#define MACRO(v) { \ - real tmpval; \ - real sum0 = tmp[(v)]; \ - real sum1 = tmp[17-(v)]; \ - out2[9+(v)] = (tmpval = sum0 + sum1) * w[27+(v)]; \ - out2[8-(v)] = tmpval * w[26-(v)]; \ - sum0 -= sum1; \ - ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \ - ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; } - -{ - register real *out2 = o2; - register real *w = wintab; - register real *out1 = o1; - register real *ts = tsbuf; - - MACRO(0); - MACRO(1); - MACRO(2); - MACRO(3); - MACRO(4); - MACRO(5); - MACRO(6); - MACRO(7); - MACRO(8); -} - -#else - - { - -#define MACRO0(v) { \ - real tmp; \ - out2[9+(v)] = (tmp = sum0 + sum1) * w[27+(v)]; \ - out2[8-(v)] = tmp * w[26-(v)]; } \ - sum0 -= sum1; \ - ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \ - ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; -#define MACRO1(v) { \ - real sum0, sum1; \ - sum0 = tmp1a + tmp2a; \ - sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \ - MACRO0(v); } -#define MACRO2(v) { \ - real sum0, sum1; \ - sum0 = tmp2a - tmp1a; \ - sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \ - MACRO0(v); } - - register const real *c = COS9; - register real *out2 = o2; - register real *w = wintab; - register real *out1 = o1; - register real *ts = tsbuf; - - real ta33,ta66,tb33,tb66; - - ta33 = in[2*3+0] * c[3]; - ta66 = in[2*6+0] * c[6]; - tb33 = in[2*3+1] * c[3]; - tb66 = in[2*6+1] * c[6]; - - { - real tmp1a,tmp2a,tmp1b,tmp2b; - tmp1a = in[2*1+0] * c[1] + ta33 + in[2*5+0] * c[5] + in[2*7+0] * c[7]; - tmp1b = in[2*1+1] * c[1] + tb33 + in[2*5+1] * c[5] + in[2*7+1] * c[7]; - tmp2a = in[2*0+0] + in[2*2+0] * c[2] + in[2*4+0] * c[4] + ta66 + in[2*8+0] * c[8]; - tmp2b = in[2*0+1] + in[2*2+1] * c[2] + in[2*4+1] * c[4] + tb66 + in[2*8+1] * c[8]; - - MACRO1(0); - MACRO2(8); - } - - { - real tmp1a,tmp2a,tmp1b,tmp2b; - tmp1a = ( in[2*1+0] - in[2*5+0] - in[2*7+0] ) * c[3]; - tmp1b = ( in[2*1+1] - in[2*5+1] - in[2*7+1] ) * c[3]; - tmp2a = ( in[2*2+0] - in[2*4+0] - in[2*8+0] ) * c[6] - in[2*6+0] + in[2*0+0]; - tmp2b = ( in[2*2+1] - in[2*4+1] - in[2*8+1] ) * c[6] - in[2*6+1] + in[2*0+1]; - - MACRO1(1); - MACRO2(7); - } - - { - real tmp1a,tmp2a,tmp1b,tmp2b; - tmp1a = in[2*1+0] * c[5] - ta33 - in[2*5+0] * c[7] + in[2*7+0] * c[1]; - tmp1b = in[2*1+1] * c[5] - tb33 - in[2*5+1] * c[7] + in[2*7+1] * c[1]; - tmp2a = in[2*0+0] - in[2*2+0] * c[8] - in[2*4+0] * c[2] + ta66 + in[2*8+0] * c[4]; - tmp2b = in[2*0+1] - in[2*2+1] * c[8] - in[2*4+1] * c[2] + tb66 + in[2*8+1] * c[4]; - - MACRO1(2); - MACRO2(6); - } - - { - real tmp1a,tmp2a,tmp1b,tmp2b; - tmp1a = in[2*1+0] * c[7] - ta33 + in[2*5+0] * c[1] - in[2*7+0] * c[5]; - tmp1b = in[2*1+1] * c[7] - tb33 + in[2*5+1] * c[1] - in[2*7+1] * c[5]; - tmp2a = in[2*0+0] - in[2*2+0] * c[4] + in[2*4+0] * c[8] + ta66 - in[2*8+0] * c[2]; - tmp2b = in[2*0+1] - in[2*2+1] * c[4] + in[2*4+1] * c[8] + tb66 - in[2*8+1] * c[2]; - - MACRO1(3); - MACRO2(5); - } - - { - real sum0,sum1; - sum0 = in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0]; - sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4]; - MACRO0(4); - } - } -#endif - - } -} diff --git a/mp3lib/dct36_3dnow.c b/mp3lib/dct36_3dnow.c deleted file mode 100644 index 4362d0582b..0000000000 --- a/mp3lib/dct36_3dnow.c +++ /dev/null @@ -1,502 +0,0 @@ -/* - * dct36_3dnow.c - 3DNow! optimized dct36() - * - * This code based 'dct36_3dnow.s' by Syuuhei Kashiyama - * , only two types of changes have been made: - * - * - removed PREFETCH instruction for speedup - * - changed function name for support 3DNow! automatic detection - * - * You can find Kashiyama's original 3dnow! support patch - * (for mpg123-0.59o) at - * http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese). - * - * by KIMURA Takuhiro - until 31.Mar.1999 - * - after 1.Apr.1999 - * - * Modified for use with MPlayer, for details see the changelog at - * http://svn.mplayerhq.hu/mplayer/trunk/ - * $Id$ - * - * Original disclaimer: - * The author of this program disclaim whole expressed or implied - * warranties with regard to this program, and in no event shall the - * author of this program liable to whatever resulted from the use of - * this program. Use it at your own risk. - * - * 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi - */ - -#include "config.h" -#include "mangle.h" -#include "mpg123.h" -#include "ffmpeg_files/x86_cpu.h" - -#ifdef DCT36_OPTIMIZE_FOR_K7 -void dct36_3dnowex(real *inbuf, real *o1, - real *o2, real *wintab, real *tsbuf) -#else -void dct36_3dnow(real *inbuf, real *o1, - real *o2, real *wintab, real *tsbuf) -#endif -{ - __asm__ volatile( - "movq (%%"REG_a"),%%mm0\n\t" - "movq 4(%%"REG_a"),%%mm1\n\t" - "pfadd %%mm1,%%mm0\n\t" - "movq %%mm0,4(%%"REG_a")\n\t" - "psrlq $32,%%mm1\n\t" - "movq 12(%%"REG_a"),%%mm2\n\t" - "punpckldq %%mm2,%%mm1\n\t" - "pfadd %%mm2,%%mm1\n\t" - "movq %%mm1,12(%%"REG_a")\n\t" - "psrlq $32,%%mm2\n\t" - "movq 20(%%"REG_a"),%%mm3\n\t" - "punpckldq %%mm3,%%mm2\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq %%mm2,20(%%"REG_a")\n\t" - "psrlq $32,%%mm3\n\t" - "movq 28(%%"REG_a"),%%mm4\n\t" - "punpckldq %%mm4,%%mm3\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm3,28(%%"REG_a")\n\t" - "psrlq $32,%%mm4\n\t" - "movq 36(%%"REG_a"),%%mm5\n\t" - "punpckldq %%mm5,%%mm4\n\t" - "pfadd %%mm5,%%mm4\n\t" - "movq %%mm4,36(%%"REG_a")\n\t" - "psrlq $32,%%mm5\n\t" - "movq 44(%%"REG_a"),%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movq %%mm5,44(%%"REG_a")\n\t" - "psrlq $32,%%mm6\n\t" - "movq 52(%%"REG_a"),%%mm7\n\t" - "punpckldq %%mm7,%%mm6\n\t" - "pfadd %%mm7,%%mm6\n\t" - "movq %%mm6,52(%%"REG_a")\n\t" - "psrlq $32,%%mm7\n\t" - "movq 60(%%"REG_a"),%%mm0\n\t" - "punpckldq %%mm0,%%mm7\n\t" - "pfadd %%mm0,%%mm7\n\t" - "movq %%mm7,60(%%"REG_a")\n\t" - "psrlq $32,%%mm0\n\t" - "movd 68(%%"REG_a"),%%mm1\n\t" - "pfadd %%mm1,%%mm0\n\t" - "movd %%mm0,68(%%"REG_a")\n\t" - "movd 4(%%"REG_a"),%%mm0\n\t" - "movd 12(%%"REG_a"),%%mm1\n\t" - "punpckldq %%mm1,%%mm0\n\t" - "punpckldq 20(%%"REG_a"),%%mm1\n\t" - "pfadd %%mm1,%%mm0\n\t" - "movd %%mm0,12(%%"REG_a")\n\t" - "psrlq $32,%%mm0\n\t" - "movd %%mm0,20(%%"REG_a")\n\t" - "psrlq $32,%%mm1\n\t" - "movd 28(%%"REG_a"),%%mm2\n\t" - "punpckldq %%mm2,%%mm1\n\t" - "punpckldq 36(%%"REG_a"),%%mm2\n\t" - "pfadd %%mm2,%%mm1\n\t" - "movd %%mm1,28(%%"REG_a")\n\t" - "psrlq $32,%%mm1\n\t" - "movd %%mm1,36(%%"REG_a")\n\t" - "psrlq $32,%%mm2\n\t" - "movd 44(%%"REG_a"),%%mm3\n\t" - "punpckldq %%mm3,%%mm2\n\t" - "punpckldq 52(%%"REG_a"),%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movd %%mm2,44(%%"REG_a")\n\t" - "psrlq $32,%%mm2\n\t" - "movd %%mm2,52(%%"REG_a")\n\t" - "psrlq $32,%%mm3\n\t" - "movd 60(%%"REG_a"),%%mm4\n\t" - "punpckldq %%mm4,%%mm3\n\t" - "punpckldq 68(%%"REG_a"),%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movd %%mm3,60(%%"REG_a")\n\t" - "psrlq $32,%%mm3\n\t" - "movd %%mm3,68(%%"REG_a")\n\t" - - "movq 24(%%"REG_a"),%%mm0\n\t" - "movq 48(%%"REG_a"),%%mm1\n\t" - "movd "MANGLE(COS9)"+12,%%mm2\n\t" - "punpckldq %%mm2,%%mm2\n\t" - "movd "MANGLE(COS9)"+24,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm2,%%mm0\n\t" - "pfmul %%mm3,%%mm1\n\t" - "push %%"REG_a"\n\t" - "movl $1,%%eax\n\t" - "movd %%eax,%%mm7\n\t" - "pi2fd %%mm7,%%mm7\n\t" - "pop %%"REG_a"\n\t" - "movq 8(%%"REG_a"),%%mm2\n\t" - "movd "MANGLE(COS9)"+4,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "pfadd %%mm0,%%mm2\n\t" - "movq 40(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+20,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq 56(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+28,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq (%%"REG_a"),%%mm3\n\t" - "movq 16(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+8,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq 32(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+16,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "pfadd %%mm1,%%mm3\n\t" - "movq 64(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+32,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+0,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 108(%%"REG_d"),%%mm6\n\t" - "punpckldq 104(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" -#ifdef DCT36_OPTIMIZE_FOR_K7 - "pswapd %%mm5,%%mm5\n\t" - "movq %%mm5,32(%%"REG_c")\n\t" -#else - "movd %%mm5,36(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,32(%%"REG_c")\n\t" -#endif - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 32(%%"REG_d"),%%mm6\n\t" - "punpckldq 36(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 32(%%"REG_S"),%%mm6\n\t" - "punpckldq 36(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,1024(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1152(%%"REG_D")\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+32,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 140(%%"REG_d"),%%mm6\n\t" - "punpckldq 72(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,68(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,0(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 0(%%"REG_d"),%%mm6\n\t" - "punpckldq 68(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 0(%%"REG_S"),%%mm6\n\t" - "punpckldq 68(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,0(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,2176(%%"REG_D")\n\t" - "movq 8(%%"REG_a"),%%mm2\n\t" - "movq 40(%%"REG_a"),%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movq 56(%%"REG_a"),%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movd "MANGLE(COS9)"+12,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "movq 16(%%"REG_a"),%%mm3\n\t" - "movq 32(%%"REG_a"),%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq 64(%%"REG_a"),%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movd "MANGLE(COS9)"+24,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "movq 48(%%"REG_a"),%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq (%%"REG_a"),%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+4,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 112(%%"REG_d"),%%mm6\n\t" - "punpckldq 100(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,40(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,28(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 28(%%"REG_d"),%%mm6\n\t" - "punpckldq 40(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 28(%%"REG_S"),%%mm6\n\t" - "punpckldq 40(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,896(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1280(%%"REG_D")\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+28,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 136(%%"REG_d"),%%mm6\n\t" - "punpckldq 76(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,64(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,4(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 4(%%"REG_d"),%%mm6\n\t" - "punpckldq 64(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 4(%%"REG_S"),%%mm6\n\t" - "punpckldq 64(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,128(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,2048(%%"REG_D")\n\t" - - "movq 8(%%"REG_a"),%%mm2\n\t" - "movd "MANGLE(COS9)"+20,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "pfsub %%mm0,%%mm2\n\t" - "movq 40(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+28,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movq 56(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+4,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq (%%"REG_a"),%%mm3\n\t" - "movq 16(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+32,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq 32(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+8,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "pfadd %%mm1,%%mm3\n\t" - "movq 64(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+16,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+8,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 116(%%"REG_d"),%%mm6\n\t" - "punpckldq 96(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,44(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,24(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 24(%%"REG_d"),%%mm6\n\t" - "punpckldq 44(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 24(%%"REG_S"),%%mm6\n\t" - "punpckldq 44(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,768(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1408(%%"REG_D")\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+24,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 132(%%"REG_d"),%%mm6\n\t" - "punpckldq 80(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,60(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,8(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 8(%%"REG_d"),%%mm6\n\t" - "punpckldq 60(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 8(%%"REG_S"),%%mm6\n\t" - "punpckldq 60(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,256(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1920(%%"REG_D")\n\t" - "movq 8(%%"REG_a"),%%mm2\n\t" - "movd "MANGLE(COS9)"+28,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "pfsub %%mm0,%%mm2\n\t" - "movq 40(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+4,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq 56(%%"REG_a"),%%mm3\n\t" - "movd "MANGLE(COS9)"+20,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movq (%%"REG_a"),%%mm3\n\t" - "movq 16(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+16,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq 32(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+32,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "pfadd %%mm1,%%mm3\n\t" - "movq 64(%%"REG_a"),%%mm4\n\t" - "movd "MANGLE(COS9)"+8,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+12,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 120(%%"REG_d"),%%mm6\n\t" - "punpckldq 92(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,48(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,20(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 20(%%"REG_d"),%%mm6\n\t" - "punpckldq 48(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 20(%%"REG_S"),%%mm6\n\t" - "punpckldq 48(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,640(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1536(%%"REG_D")\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+20,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 128(%%"REG_d"),%%mm6\n\t" - "punpckldq 84(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,56(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,12(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 12(%%"REG_d"),%%mm6\n\t" - "punpckldq 56(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 12(%%"REG_S"),%%mm6\n\t" - "punpckldq 56(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,384(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1792(%%"REG_D")\n\t" - - "movq (%%"REG_a"),%%mm4\n\t" - "movq 16(%%"REG_a"),%%mm3\n\t" - "pfsub %%mm3,%%mm4\n\t" - "movq 32(%%"REG_a"),%%mm3\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq 48(%%"REG_a"),%%mm3\n\t" - "pfsub %%mm3,%%mm4\n\t" - "movq 64(%%"REG_a"),%%mm3\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+16,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 124(%%"REG_d"),%%mm6\n\t" - "punpckldq 88(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,52(%%"REG_c")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,16(%%"REG_c")\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 16(%%"REG_d"),%%mm6\n\t" - "punpckldq 52(%%"REG_d"),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 16(%%"REG_S"),%%mm6\n\t" - "punpckldq 52(%%"REG_S"),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,512(%%"REG_D")\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1664(%%"REG_D")\n\t" - - "femms\n\t" - : - : "a" (inbuf), "S" (o1), "c" (o2), "d" (wintab), "D" (tsbuf) - : "memory"); -} diff --git a/mp3lib/dct36_k7.c b/mp3lib/dct36_k7.c deleted file mode 100644 index f9da0f2e90..0000000000 --- a/mp3lib/dct36_k7.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * dct36_k7.c - 3DNowEx(DSP)! optimized dct36() - * - * This code based 'dct36_3dnow.s' by Syuuhei Kashiyama - * , only two types of changes have been made: - * - * - added new opcode PSWAPD - * - removed PREFETCH instruction for speedup - * - changed function name for support 3DNowEx! automatic detection - * - * note: because K7 processors are an aggresive out-of-order three-way - * superscalar ones instruction order is not significand for them. - * - * You can find Kashiyama's original 3dnow! support patch - * (for mpg123-0.59o) at - * http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese). - * - * by KIMURA Takuhiro - until 31.Mar.1999 - * - after 1.Apr.1999 - * - * Original disclaimer: - * The author of this program disclaim whole expressed or implied - * warranties with regard to this program, and in no event shall the - * author of this program liable to whatever resulted from the use of - * this program. Use it at your own risk. - * - * Modified by Nick Kurshev - * - * 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi - */ - -#define DCT36_OPTIMIZE_FOR_K7 - -#include "dct36_3dnow.c" diff --git a/mp3lib/dct64.c b/mp3lib/dct64.c deleted file mode 100644 index 514aa4fc65..0000000000 --- a/mp3lib/dct64.c +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Modified for use with MPlayer, for details see the changelog at - * http://svn.mplayerhq.hu/mplayer/trunk/ - * $Id$ - */ - -/* - * Discrete Cosine Tansform (DCT) for subband synthesis - * optimized for machines with no auto-increment. - * The performance is highly compiler dependend. Maybe - * the dct64.c version for 'normal' processor may be faster - * even for Intel processors. - */ - -static void dct64_1(real *out0,real *out1,real *b1,real *b2,real *samples) -{ - - { - register real *costab = mp3lib_pnts[0]; - - b1[0x00] = samples[0x00] + samples[0x1F]; - b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0]; - - b1[0x01] = samples[0x01] + samples[0x1E]; - b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1]; - - b1[0x02] = samples[0x02] + samples[0x1D]; - b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2]; - - b1[0x03] = samples[0x03] + samples[0x1C]; - b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3]; - - b1[0x04] = samples[0x04] + samples[0x1B]; - b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4]; - - b1[0x05] = samples[0x05] + samples[0x1A]; - b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5]; - - b1[0x06] = samples[0x06] + samples[0x19]; - b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6]; - - b1[0x07] = samples[0x07] + samples[0x18]; - b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7]; - - b1[0x08] = samples[0x08] + samples[0x17]; - b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8]; - - b1[0x09] = samples[0x09] + samples[0x16]; - b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9]; - - b1[0x0A] = samples[0x0A] + samples[0x15]; - b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA]; - - b1[0x0B] = samples[0x0B] + samples[0x14]; - b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB]; - - b1[0x0C] = samples[0x0C] + samples[0x13]; - b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC]; - - b1[0x0D] = samples[0x0D] + samples[0x12]; - b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD]; - - b1[0x0E] = samples[0x0E] + samples[0x11]; - b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE]; - - b1[0x0F] = samples[0x0F] + samples[0x10]; - b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF]; - } - - - { - register real *costab = mp3lib_pnts[1]; - - b2[0x00] = b1[0x00] + b1[0x0F]; - b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0]; - b2[0x01] = b1[0x01] + b1[0x0E]; - b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1]; - b2[0x02] = b1[0x02] + b1[0x0D]; - b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2]; - b2[0x03] = b1[0x03] + b1[0x0C]; - b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3]; - b2[0x04] = b1[0x04] + b1[0x0B]; - b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4]; - b2[0x05] = b1[0x05] + b1[0x0A]; - b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5]; - b2[0x06] = b1[0x06] + b1[0x09]; - b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6]; - b2[0x07] = b1[0x07] + b1[0x08]; - b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7]; - - b2[0x10] = b1[0x10] + b1[0x1F]; - b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0]; - b2[0x11] = b1[0x11] + b1[0x1E]; - b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1]; - b2[0x12] = b1[0x12] + b1[0x1D]; - b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2]; - b2[0x13] = b1[0x13] + b1[0x1C]; - b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3]; - b2[0x14] = b1[0x14] + b1[0x1B]; - b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4]; - b2[0x15] = b1[0x15] + b1[0x1A]; - b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5]; - b2[0x16] = b1[0x16] + b1[0x19]; - b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6]; - b2[0x17] = b1[0x17] + b1[0x18]; - b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7]; - } - - { - register real *costab = mp3lib_pnts[2]; - - b1[0x00] = b2[0x00] + b2[0x07]; - b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0]; - b1[0x01] = b2[0x01] + b2[0x06]; - b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1]; - b1[0x02] = b2[0x02] + b2[0x05]; - b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2]; - b1[0x03] = b2[0x03] + b2[0x04]; - b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3]; - - b1[0x08] = b2[0x08] + b2[0x0F]; - b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0]; - b1[0x09] = b2[0x09] + b2[0x0E]; - b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1]; - b1[0x0A] = b2[0x0A] + b2[0x0D]; - b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2]; - b1[0x0B] = b2[0x0B] + b2[0x0C]; - b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3]; - - b1[0x10] = b2[0x10] + b2[0x17]; - b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0]; - b1[0x11] = b2[0x11] + b2[0x16]; - b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1]; - b1[0x12] = b2[0x12] + b2[0x15]; - b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2]; - b1[0x13] = b2[0x13] + b2[0x14]; - b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3]; - - b1[0x18] = b2[0x18] + b2[0x1F]; - b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0]; - b1[0x19] = b2[0x19] + b2[0x1E]; - b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1]; - b1[0x1A] = b2[0x1A] + b2[0x1D]; - b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2]; - b1[0x1B] = b2[0x1B] + b2[0x1C]; - b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3]; - } - - { - register real const cos0 = mp3lib_pnts[3][0]; - register real const cos1 = mp3lib_pnts[3][1]; - - b2[0x00] = b1[0x00] + b1[0x03]; - b2[0x03] = (b1[0x00] - b1[0x03]) * cos0; - b2[0x01] = b1[0x01] + b1[0x02]; - b2[0x02] = (b1[0x01] - b1[0x02]) * cos1; - - b2[0x04] = b1[0x04] + b1[0x07]; - b2[0x07] = (b1[0x07] - b1[0x04]) * cos0; - b2[0x05] = b1[0x05] + b1[0x06]; - b2[0x06] = (b1[0x06] - b1[0x05]) * cos1; - - b2[0x08] = b1[0x08] + b1[0x0B]; - b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0; - b2[0x09] = b1[0x09] + b1[0x0A]; - b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1; - - b2[0x0C] = b1[0x0C] + b1[0x0F]; - b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0; - b2[0x0D] = b1[0x0D] + b1[0x0E]; - b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1; - - b2[0x10] = b1[0x10] + b1[0x13]; - b2[0x13] = (b1[0x10] - b1[0x13]) * cos0; - b2[0x11] = b1[0x11] + b1[0x12]; - b2[0x12] = (b1[0x11] - b1[0x12]) * cos1; - - b2[0x14] = b1[0x14] + b1[0x17]; - b2[0x17] = (b1[0x17] - b1[0x14]) * cos0; - b2[0x15] = b1[0x15] + b1[0x16]; - b2[0x16] = (b1[0x16] - b1[0x15]) * cos1; - - b2[0x18] = b1[0x18] + b1[0x1B]; - b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0; - b2[0x19] = b1[0x19] + b1[0x1A]; - b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1; - - b2[0x1C] = b1[0x1C] + b1[0x1F]; - b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0; - b2[0x1D] = b1[0x1D] + b1[0x1E]; - b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1; - } - - { - register real const cos0 = mp3lib_pnts[4][0]; - - b1[0x00] = b2[0x00] + b2[0x01]; - b1[0x01] = (b2[0x00] - b2[0x01]) * cos0; - b1[0x02] = b2[0x02] + b2[0x03]; - b1[0x03] = (b2[0x03] - b2[0x02]) * cos0; - b1[0x02] += b1[0x03]; - - b1[0x04] = b2[0x04] + b2[0x05]; - b1[0x05] = (b2[0x04] - b2[0x05]) * cos0; - b1[0x06] = b2[0x06] + b2[0x07]; - b1[0x07] = (b2[0x07] - b2[0x06]) * cos0; - b1[0x06] += b1[0x07]; - b1[0x04] += b1[0x06]; - b1[0x06] += b1[0x05]; - b1[0x05] += b1[0x07]; - - b1[0x08] = b2[0x08] + b2[0x09]; - b1[0x09] = (b2[0x08] - b2[0x09]) * cos0; - b1[0x0A] = b2[0x0A] + b2[0x0B]; - b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0; - b1[0x0A] += b1[0x0B]; - - b1[0x0C] = b2[0x0C] + b2[0x0D]; - b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0; - b1[0x0E] = b2[0x0E] + b2[0x0F]; - b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0; - b1[0x0E] += b1[0x0F]; - b1[0x0C] += b1[0x0E]; - b1[0x0E] += b1[0x0D]; - b1[0x0D] += b1[0x0F]; - - b1[0x10] = b2[0x10] + b2[0x11]; - b1[0x11] = (b2[0x10] - b2[0x11]) * cos0; - b1[0x12] = b2[0x12] + b2[0x13]; - b1[0x13] = (b2[0x13] - b2[0x12]) * cos0; - b1[0x12] += b1[0x13]; - - b1[0x14] = b2[0x14] + b2[0x15]; - b1[0x15] = (b2[0x14] - b2[0x15]) * cos0; - b1[0x16] = b2[0x16] + b2[0x17]; - b1[0x17] = (b2[0x17] - b2[0x16]) * cos0; - b1[0x16] += b1[0x17]; - b1[0x14] += b1[0x16]; - b1[0x16] += b1[0x15]; - b1[0x15] += b1[0x17]; - - b1[0x18] = b2[0x18] + b2[0x19]; - b1[0x19] = (b2[0x18] - b2[0x19]) * cos0; - b1[0x1A] = b2[0x1A] + b2[0x1B]; - b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0; - b1[0x1A] += b1[0x1B]; - - b1[0x1C] = b2[0x1C] + b2[0x1D]; - b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0; - b1[0x1E] = b2[0x1E] + b2[0x1F]; - b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0; - b1[0x1E] += b1[0x1F]; - b1[0x1C] += b1[0x1E]; - b1[0x1E] += b1[0x1D]; - b1[0x1D] += b1[0x1F]; - } - - out0[0x10*16] = b1[0x00]; - out0[0x10*12] = b1[0x04]; - out0[0x10* 8] = b1[0x02]; - out0[0x10* 4] = b1[0x06]; - out0[0x10* 0] = b1[0x01]; - out1[0x10* 0] = b1[0x01]; - out1[0x10* 4] = b1[0x05]; - out1[0x10* 8] = b1[0x03]; - out1[0x10*12] = b1[0x07]; - - b1[0x08] += b1[0x0C]; - out0[0x10*14] = b1[0x08]; - b1[0x0C] += b1[0x0a]; - out0[0x10*10] = b1[0x0C]; - b1[0x0A] += b1[0x0E]; - out0[0x10* 6] = b1[0x0A]; - b1[0x0E] += b1[0x09]; - out0[0x10* 2] = b1[0x0E]; - b1[0x09] += b1[0x0D]; - out1[0x10* 2] = b1[0x09]; - b1[0x0D] += b1[0x0B]; - out1[0x10* 6] = b1[0x0D]; - b1[0x0B] += b1[0x0F]; - out1[0x10*10] = b1[0x0B]; - out1[0x10*14] = b1[0x0F]; - - b1[0x18] += b1[0x1C]; - out0[0x10*15] = b1[0x10] + b1[0x18]; - out0[0x10*13] = b1[0x18] + b1[0x14]; - b1[0x1C] += b1[0x1a]; - out0[0x10*11] = b1[0x14] + b1[0x1C]; - out0[0x10* 9] = b1[0x1C] + b1[0x12]; - b1[0x1A] += b1[0x1E]; - out0[0x10* 7] = b1[0x12] + b1[0x1A]; - out0[0x10* 5] = b1[0x1A] + b1[0x16]; - b1[0x1E] += b1[0x19]; - out0[0x10* 3] = b1[0x16] + b1[0x1E]; - out0[0x10* 1] = b1[0x1E] + b1[0x11]; - b1[0x19] += b1[0x1D]; - out1[0x10* 1] = b1[0x11] + b1[0x19]; - out1[0x10* 3] = b1[0x19] + b1[0x15]; - b1[0x1D] += b1[0x1B]; - out1[0x10* 5] = b1[0x15] + b1[0x1D]; - out1[0x10* 7] = b1[0x1D] + b1[0x13]; - b1[0x1B] += b1[0x1F]; - out1[0x10* 9] = b1[0x13] + b1[0x1B]; - out1[0x10*11] = b1[0x1B] + b1[0x17]; - out1[0x10*13] = b1[0x17] + b1[0x1F]; - out1[0x10*15] = b1[0x1F]; -} - -/* - * the call via dct64 is a trick to force GCC to use - * (new) registers for the b1,b2 pointer to the bufs[xx] field - */ -static void dct64(real *a,real *b,real *c) -{ - real bufs[0x40]; - dct64_1(a,b,bufs,bufs+0x20,c); -} - -void mp3lib_dct64(real *a,real *b,real *c) -{ - real bufs[0x40]; - dct64_1(a,b,bufs,bufs+0x20,c); -} diff --git a/mp3lib/dct64_3dnow.c b/mp3lib/dct64_3dnow.c deleted file mode 100644 index 7cd8603941..0000000000 --- a/mp3lib/dct64_3dnow.c +++ /dev/null @@ -1,929 +0,0 @@ -/* -* This code was taken from http://www.mpg123.org -* See ChangeLog of mpg123-0.59s-pre.1 for detail -* Applied to mplayer by Nick Kurshev -* Partial 3dnow! optimization by Nick Kurshev -* -* TODO: optimize scalar 3dnow! code -* Warning: Phases 7 & 8 are not tested -*/ - -#include "config.h" -#include "mangle.h" -#include "mpg123.h" -#include "ffmpeg_files/x86_cpu.h" - -static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL; -static float attribute_used plus_1f = 1.0; - -void dct64_MMX_3dnow(short *a,short *b,real *c) -{ - char tmp[256]; - __asm__ volatile( -" mov %2,%%"REG_a"\n\t" - -" lea 128+%3,%%"REG_d"\n\t" -" mov %0,%%"REG_S"\n\t" -" mov %1,%%"REG_D"\n\t" -" mov $"MANGLE(costab_mmx)",%%"REG_b"\n\t" -" lea %3,%%"REG_c"\n\t" - -/* Phase 1*/ -" movq (%%"REG_a"), %%mm0\n\t" -" movq 8(%%"REG_a"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 120(%%"REG_a"), %%mm1\n\t" -" movq 112(%%"REG_a"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, (%%"REG_d")\n\t" -" movq %%mm4, 8(%%"REG_d")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul (%%"REG_b"), %%mm3\n\t" -" pfmul 8(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 124(%%"REG_d")\n\t" -" movd %%mm7, 116(%%"REG_d")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 120(%%"REG_d")\n\t" -" movd %%mm7, 112(%%"REG_d")\n\t" - -" movq 16(%%"REG_a"), %%mm0\n\t" -" movq 24(%%"REG_a"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 104(%%"REG_a"), %%mm1\n\t" -" movq 96(%%"REG_a"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 16(%%"REG_d")\n\t" -" movq %%mm4, 24(%%"REG_d")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 16(%%"REG_b"), %%mm3\n\t" -" pfmul 24(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 108(%%"REG_d")\n\t" -" movd %%mm7, 100(%%"REG_d")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 104(%%"REG_d")\n\t" -" movd %%mm7, 96(%%"REG_d")\n\t" - -" movq 32(%%"REG_a"), %%mm0\n\t" -" movq 40(%%"REG_a"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 88(%%"REG_a"), %%mm1\n\t" -" movq 80(%%"REG_a"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 32(%%"REG_d")\n\t" -" movq %%mm4, 40(%%"REG_d")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 32(%%"REG_b"), %%mm3\n\t" -" pfmul 40(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 92(%%"REG_d")\n\t" -" movd %%mm7, 84(%%"REG_d")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 88(%%"REG_d")\n\t" -" movd %%mm7, 80(%%"REG_d")\n\t" - -" movq 48(%%"REG_a"), %%mm0\n\t" -" movq 56(%%"REG_a"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 72(%%"REG_a"), %%mm1\n\t" -" movq 64(%%"REG_a"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 48(%%"REG_d")\n\t" -" movq %%mm4, 56(%%"REG_d")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 48(%%"REG_b"), %%mm3\n\t" -" pfmul 56(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 76(%%"REG_d")\n\t" -" movd %%mm7, 68(%%"REG_d")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 72(%%"REG_d")\n\t" -" movd %%mm7, 64(%%"REG_d")\n\t" - -/* Phase 2*/ - -" movq (%%"REG_d"), %%mm0\n\t" -" movq 8(%%"REG_d"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 56(%%"REG_d"), %%mm1\n\t" -" movq 48(%%"REG_d"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, (%%"REG_c")\n\t" -" movq %%mm4, 8(%%"REG_c")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 64(%%"REG_b"), %%mm3\n\t" -" pfmul 72(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 60(%%"REG_c")\n\t" -" movd %%mm7, 52(%%"REG_c")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 56(%%"REG_c")\n\t" -" movd %%mm7, 48(%%"REG_c")\n\t" - -" movq 16(%%"REG_d"), %%mm0\n\t" -" movq 24(%%"REG_d"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 40(%%"REG_d"), %%mm1\n\t" -" movq 32(%%"REG_d"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 16(%%"REG_c")\n\t" -" movq %%mm4, 24(%%"REG_c")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 80(%%"REG_b"), %%mm3\n\t" -" pfmul 88(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 44(%%"REG_c")\n\t" -" movd %%mm7, 36(%%"REG_c")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 40(%%"REG_c")\n\t" -" movd %%mm7, 32(%%"REG_c")\n\t" - -/* Phase 3*/ - -" movq 64(%%"REG_d"), %%mm0\n\t" -" movq 72(%%"REG_d"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 120(%%"REG_d"), %%mm1\n\t" -" movq 112(%%"REG_d"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 64(%%"REG_c")\n\t" -" movq %%mm4, 72(%%"REG_c")\n\t" -" pfsubr %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 64(%%"REG_b"), %%mm3\n\t" -" pfmul 72(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 124(%%"REG_c")\n\t" -" movd %%mm7, 116(%%"REG_c")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 120(%%"REG_c")\n\t" -" movd %%mm7, 112(%%"REG_c")\n\t" - -" movq 80(%%"REG_d"), %%mm0\n\t" -" movq 88(%%"REG_d"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 104(%%"REG_d"), %%mm1\n\t" -" movq 96(%%"REG_d"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 80(%%"REG_c")\n\t" -" movq %%mm4, 88(%%"REG_c")\n\t" -" pfsubr %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 80(%%"REG_b"), %%mm3\n\t" -" pfmul 88(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 108(%%"REG_c")\n\t" -" movd %%mm7, 100(%%"REG_c")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 104(%%"REG_c")\n\t" -" movd %%mm7, 96(%%"REG_c")\n\t" - -/* Phase 4*/ - -" movq (%%"REG_c"), %%mm0\n\t" -" movq 8(%%"REG_c"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 24(%%"REG_c"), %%mm1\n\t" -" movq 16(%%"REG_c"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, (%%"REG_d")\n\t" -" movq %%mm4, 8(%%"REG_d")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 96(%%"REG_b"), %%mm3\n\t" -" pfmul 104(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 28(%%"REG_d")\n\t" -" movd %%mm7, 20(%%"REG_d")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 24(%%"REG_d")\n\t" -" movd %%mm7, 16(%%"REG_d")\n\t" - -" movq 32(%%"REG_c"), %%mm0\n\t" -" movq 40(%%"REG_c"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 56(%%"REG_c"), %%mm1\n\t" -" movq 48(%%"REG_c"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 32(%%"REG_d")\n\t" -" movq %%mm4, 40(%%"REG_d")\n\t" -" pfsubr %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 96(%%"REG_b"), %%mm3\n\t" -" pfmul 104(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 60(%%"REG_d")\n\t" -" movd %%mm7, 52(%%"REG_d")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 56(%%"REG_d")\n\t" -" movd %%mm7, 48(%%"REG_d")\n\t" - -" movq 64(%%"REG_c"), %%mm0\n\t" -" movq 72(%%"REG_c"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 88(%%"REG_c"), %%mm1\n\t" -" movq 80(%%"REG_c"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 64(%%"REG_d")\n\t" -" movq %%mm4, 72(%%"REG_d")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 96(%%"REG_b"), %%mm3\n\t" -" pfmul 104(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 92(%%"REG_d")\n\t" -" movd %%mm7, 84(%%"REG_d")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 88(%%"REG_d")\n\t" -" movd %%mm7, 80(%%"REG_d")\n\t" - -" movq 96(%%"REG_c"), %%mm0\n\t" -" movq 104(%%"REG_c"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 120(%%"REG_c"), %%mm1\n\t" -" movq 112(%%"REG_c"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 96(%%"REG_d")\n\t" -" movq %%mm4, 104(%%"REG_d")\n\t" -" pfsubr %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 96(%%"REG_b"), %%mm3\n\t" -" pfmul 104(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 124(%%"REG_d")\n\t" -" movd %%mm7, 116(%%"REG_d")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 120(%%"REG_d")\n\t" -" movd %%mm7, 112(%%"REG_d")\n\t" - -/* Phase 5 */ - -" movq (%%"REG_d"), %%mm0\n\t" -" movq 16(%%"REG_d"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 8(%%"REG_d"), %%mm1\n\t" -" movq 24(%%"REG_d"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, (%%"REG_c")\n\t" -" movq %%mm4, 16(%%"REG_c")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 112(%%"REG_b"), %%mm3\n\t" -" pfmul 112(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 12(%%"REG_c")\n\t" -" movd %%mm7, 28(%%"REG_c")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 8(%%"REG_c")\n\t" -" movd %%mm7, 24(%%"REG_c")\n\t" - -" movq 32(%%"REG_d"), %%mm0\n\t" -" movq 48(%%"REG_d"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 40(%%"REG_d"), %%mm1\n\t" -" movq 56(%%"REG_d"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 32(%%"REG_c")\n\t" -" movq %%mm4, 48(%%"REG_c")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 112(%%"REG_b"), %%mm3\n\t" -" pfmul 112(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 44(%%"REG_c")\n\t" -" movd %%mm7, 60(%%"REG_c")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 40(%%"REG_c")\n\t" -" movd %%mm7, 56(%%"REG_c")\n\t" - -" movq 64(%%"REG_d"), %%mm0\n\t" -" movq 80(%%"REG_d"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 72(%%"REG_d"), %%mm1\n\t" -" movq 88(%%"REG_d"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 64(%%"REG_c")\n\t" -" movq %%mm4, 80(%%"REG_c")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 112(%%"REG_b"), %%mm3\n\t" -" pfmul 112(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 76(%%"REG_c")\n\t" -" movd %%mm7, 92(%%"REG_c")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 72(%%"REG_c")\n\t" -" movd %%mm7, 88(%%"REG_c")\n\t" - -" movq 96(%%"REG_d"), %%mm0\n\t" -" movq 112(%%"REG_d"), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 104(%%"REG_d"), %%mm1\n\t" -" movq 120(%%"REG_d"), %%mm5\n\t" -/* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" -/**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 96(%%"REG_c")\n\t" -" movq %%mm4, 112(%%"REG_c")\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 112(%%"REG_b"), %%mm3\n\t" -" pfmul 112(%%"REG_b"), %%mm7\n\t" -" movd %%mm3, 108(%%"REG_c")\n\t" -" movd %%mm7, 124(%%"REG_c")\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 104(%%"REG_c")\n\t" -" movd %%mm7, 120(%%"REG_c")\n\t" - -/* Phase 6. This is the end of easy road. */ -/* Code below is coded in scalar mode. Should be optimized */ - -" movd "MANGLE(plus_1f)", %%mm6\n\t" -" punpckldq 120(%%"REG_b"), %%mm6\n\t" /* mm6 = 1.0 | 120(%%"REG_b")*/ -" movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */ - -" movq 32(%%"REG_c"), %%mm0\n\t" -" movq 64(%%"REG_c"), %%mm2\n\t" -" movq %%mm0, %%mm1\n\t" -" movq %%mm2, %%mm3\n\t" -" pxor %%mm7, %%mm1\n\t" -" pxor %%mm7, %%mm3\n\t" -" pfacc %%mm1, %%mm0\n\t" -" pfacc %%mm3, %%mm2\n\t" -" pfmul %%mm6, %%mm0\n\t" -" pfmul %%mm6, %%mm2\n\t" -" movq %%mm0, 32(%%"REG_d")\n\t" -" movq %%mm2, 64(%%"REG_d")\n\t" - -" movd 44(%%"REG_c"), %%mm0\n\t" -" movd 40(%%"REG_c"), %%mm2\n\t" -" movd 120(%%"REG_b"), %%mm3\n\t" -" punpckldq 76(%%"REG_c"), %%mm0\n\t" -" punpckldq 72(%%"REG_c"), %%mm2\n\t" -" punpckldq %%mm3, %%mm3\n\t" -" movq %%mm0, %%mm4\n\t" -" movq %%mm2, %%mm5\n\t" -" pfsub %%mm2, %%mm0\n\t" -" pfmul %%mm3, %%mm0\n\t" -" movq %%mm0, %%mm1\n\t" -" pfadd %%mm5, %%mm0\n\t" -" pfadd %%mm4, %%mm0\n\t" -" movq %%mm0, %%mm2\n\t" -" punpckldq %%mm1, %%mm0\n\t" -" punpckhdq %%mm1, %%mm2\n\t" -" movq %%mm0, 40(%%"REG_d")\n\t" -" movq %%mm2, 72(%%"REG_d")\n\t" - -" movd 48(%%"REG_c"), %%mm3\n\t" -" movd 60(%%"REG_c"), %%mm2\n\t" -" pfsub 52(%%"REG_c"), %%mm3\n\t" -" pfsub 56(%%"REG_c"), %%mm2\n\t" -" pfmul 120(%%"REG_b"), %%mm3\n\t" -" pfmul 120(%%"REG_b"), %%mm2\n\t" -" movq %%mm2, %%mm1\n\t" - -" pfadd 56(%%"REG_c"), %%mm1\n\t" -" pfadd 60(%%"REG_c"), %%mm1\n\t" -" movq %%mm1, %%mm0\n\t" - -" pfadd 48(%%"REG_c"), %%mm0\n\t" -" pfadd 52(%%"REG_c"), %%mm0\n\t" -" pfadd %%mm3, %%mm1\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" pfadd %%mm3, %%mm2\n\t" -" punpckldq %%mm2, %%mm0\n\t" -" movq %%mm1, 56(%%"REG_d")\n\t" -" movq %%mm0, 48(%%"REG_d")\n\t" - -/*---*/ - -" movd 92(%%"REG_c"), %%mm1\n\t" -" pfsub 88(%%"REG_c"), %%mm1\n\t" -" pfmul 120(%%"REG_b"), %%mm1\n\t" -" movd %%mm1, 92(%%"REG_d")\n\t" -" pfadd 92(%%"REG_c"), %%mm1\n\t" -" pfadd 88(%%"REG_c"), %%mm1\n\t" -" movq %%mm1, %%mm0\n\t" - -" pfadd 80(%%"REG_c"), %%mm0\n\t" -" pfadd 84(%%"REG_c"), %%mm0\n\t" -" movd %%mm0, 80(%%"REG_d")\n\t" - -" movd 80(%%"REG_c"), %%mm0\n\t" -" pfsub 84(%%"REG_c"), %%mm0\n\t" -" pfmul 120(%%"REG_b"), %%mm0\n\t" -"