From 7573c29480850d715e2f06cae70f252573098123 Mon Sep 17 00:00:00 2001 From: diego Date: Mon, 12 Apr 2010 10:56:17 +0000 Subject: the great MPlayer tab removal: part I git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@31032 b3059339-0415-0410-9bf9-f77b7e298cf2 --- mp3lib/dct36.c | 28 +- mp3lib/dct36_3dnow.c | 902 ++++++++++++------------ mp3lib/dct64_3dnow.c | 1666 ++++++++++++++++++++++---------------------- mp3lib/dct64_k7.c | 1404 ++++++++++++++++++------------------- mp3lib/dct64_mmx.c | 1892 +++++++++++++++++++++++++------------------------- mp3lib/decod386.c | 10 +- mp3lib/decode_i586.c | 6 +- mp3lib/decode_mmx.c | 382 +++++----- mp3lib/equalizer.c | 119 ++-- mp3lib/l2tables.h | 258 +++---- mp3lib/layer1.c | 2 +- mp3lib/layer2.c | 6 +- mp3lib/layer3.c | 28 +- mp3lib/mpg123.h | 4 +- mp3lib/sr1.c | 36 +- 15 files changed, 3371 insertions(+), 3372 deletions(-) (limited to 'mp3lib') diff --git a/mp3lib/dct36.c b/mp3lib/dct36.c index b884bb949d..22ec273277 100644 --- a/mp3lib/dct36.c +++ b/mp3lib/dct36.c @@ -189,21 +189,21 @@ static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf) ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \ ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; #define MACRO1(v) { \ - real sum0,sum1; \ + real sum0, sum1; \ sum0 = tmp1a + tmp2a; \ - sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \ - MACRO0(v); } + sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \ + MACRO0(v); } #define MACRO2(v) { \ - real sum0,sum1; \ + real sum0, sum1; \ sum0 = tmp2a - tmp1a; \ sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \ - MACRO0(v); } + MACRO0(v); } register const real *c = COS9; register real *out2 = o2; - register real *w = wintab; - register real *out1 = o1; - register real *ts = tsbuf; + register real *w = wintab; + register real *out1 = o1; + register real *ts = tsbuf; real ta33,ta66,tb33,tb66; @@ -256,12 +256,12 @@ static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf) MACRO2(5); } - { - real sum0,sum1; - sum0 = in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0]; - sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4]; - MACRO0(4); - } + { + real sum0,sum1; + sum0 = in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0]; + sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4]; + MACRO0(4); + } } #endif diff --git a/mp3lib/dct36_3dnow.c b/mp3lib/dct36_3dnow.c index 8263639149..68cc33014b 100644 --- a/mp3lib/dct36_3dnow.c +++ b/mp3lib/dct36_3dnow.c @@ -40,462 +40,462 @@ void dct36_3dnow(real *inbuf, real *o1, #endif { __asm__ volatile( - "movq (%%eax),%%mm0\n\t" - "movq 4(%%eax),%%mm1\n\t" - "pfadd %%mm1,%%mm0\n\t" - "movq %%mm0,4(%%eax)\n\t" - "psrlq $32,%%mm1\n\t" - "movq 12(%%eax),%%mm2\n\t" - "punpckldq %%mm2,%%mm1\n\t" - "pfadd %%mm2,%%mm1\n\t" - "movq %%mm1,12(%%eax)\n\t" - "psrlq $32,%%mm2\n\t" - "movq 20(%%eax),%%mm3\n\t" - "punpckldq %%mm3,%%mm2\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq %%mm2,20(%%eax)\n\t" - "psrlq $32,%%mm3\n\t" - "movq 28(%%eax),%%mm4\n\t" - "punpckldq %%mm4,%%mm3\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm3,28(%%eax)\n\t" - "psrlq $32,%%mm4\n\t" - "movq 36(%%eax),%%mm5\n\t" - "punpckldq %%mm5,%%mm4\n\t" - "pfadd %%mm5,%%mm4\n\t" - "movq %%mm4,36(%%eax)\n\t" - "psrlq $32,%%mm5\n\t" - "movq 44(%%eax),%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movq %%mm5,44(%%eax)\n\t" - "psrlq $32,%%mm6\n\t" - "movq 52(%%eax),%%mm7\n\t" - "punpckldq %%mm7,%%mm6\n\t" - "pfadd %%mm7,%%mm6\n\t" - "movq %%mm6,52(%%eax)\n\t" - "psrlq $32,%%mm7\n\t" - "movq 60(%%eax),%%mm0\n\t" - "punpckldq %%mm0,%%mm7\n\t" - "pfadd %%mm0,%%mm7\n\t" - "movq %%mm7,60(%%eax)\n\t" - "psrlq $32,%%mm0\n\t" - "movd 68(%%eax),%%mm1\n\t" - "pfadd %%mm1,%%mm0\n\t" - "movd %%mm0,68(%%eax)\n\t" - "movd 4(%%eax),%%mm0\n\t" - "movd 12(%%eax),%%mm1\n\t" - "punpckldq %%mm1,%%mm0\n\t" - "punpckldq 20(%%eax),%%mm1\n\t" - "pfadd %%mm1,%%mm0\n\t" - "movd %%mm0,12(%%eax)\n\t" - "psrlq $32,%%mm0\n\t" - "movd %%mm0,20(%%eax)\n\t" - "psrlq $32,%%mm1\n\t" - "movd 28(%%eax),%%mm2\n\t" - "punpckldq %%mm2,%%mm1\n\t" - "punpckldq 36(%%eax),%%mm2\n\t" - "pfadd %%mm2,%%mm1\n\t" - "movd %%mm1,28(%%eax)\n\t" - "psrlq $32,%%mm1\n\t" - "movd %%mm1,36(%%eax)\n\t" - "psrlq $32,%%mm2\n\t" - "movd 44(%%eax),%%mm3\n\t" - "punpckldq %%mm3,%%mm2\n\t" - "punpckldq 52(%%eax),%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movd %%mm2,44(%%eax)\n\t" - "psrlq $32,%%mm2\n\t" - "movd %%mm2,52(%%eax)\n\t" - "psrlq $32,%%mm3\n\t" - "movd 60(%%eax),%%mm4\n\t" - "punpckldq %%mm4,%%mm3\n\t" - "punpckldq 68(%%eax),%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movd %%mm3,60(%%eax)\n\t" - "psrlq $32,%%mm3\n\t" - "movd %%mm3,68(%%eax)\n\t" + "movq (%%eax),%%mm0\n\t" + "movq 4(%%eax),%%mm1\n\t" + "pfadd %%mm1,%%mm0\n\t" + "movq %%mm0,4(%%eax)\n\t" + "psrlq $32,%%mm1\n\t" + "movq 12(%%eax),%%mm2\n\t" + "punpckldq %%mm2,%%mm1\n\t" + "pfadd %%mm2,%%mm1\n\t" + "movq %%mm1,12(%%eax)\n\t" + "psrlq $32,%%mm2\n\t" + "movq 20(%%eax),%%mm3\n\t" + "punpckldq %%mm3,%%mm2\n\t" + "pfadd %%mm3,%%mm2\n\t" + "movq %%mm2,20(%%eax)\n\t" + "psrlq $32,%%mm3\n\t" + "movq 28(%%eax),%%mm4\n\t" + "punpckldq %%mm4,%%mm3\n\t" + "pfadd %%mm4,%%mm3\n\t" + "movq %%mm3,28(%%eax)\n\t" + "psrlq $32,%%mm4\n\t" + "movq 36(%%eax),%%mm5\n\t" + "punpckldq %%mm5,%%mm4\n\t" + "pfadd %%mm5,%%mm4\n\t" + "movq %%mm4,36(%%eax)\n\t" + "psrlq $32,%%mm5\n\t" + "movq 44(%%eax),%%mm6\n\t" + "punpckldq %%mm6,%%mm5\n\t" + "pfadd %%mm6,%%mm5\n\t" + "movq %%mm5,44(%%eax)\n\t" + "psrlq $32,%%mm6\n\t" + "movq 52(%%eax),%%mm7\n\t" + "punpckldq %%mm7,%%mm6\n\t" + "pfadd %%mm7,%%mm6\n\t" + "movq %%mm6,52(%%eax)\n\t" + "psrlq $32,%%mm7\n\t" + "movq 60(%%eax),%%mm0\n\t" + "punpckldq %%mm0,%%mm7\n\t" + "pfadd %%mm0,%%mm7\n\t" + "movq %%mm7,60(%%eax)\n\t" + "psrlq $32,%%mm0\n\t" + "movd 68(%%eax),%%mm1\n\t" + "pfadd %%mm1,%%mm0\n\t" + "movd %%mm0,68(%%eax)\n\t" + "movd 4(%%eax),%%mm0\n\t" + "movd 12(%%eax),%%mm1\n\t" + "punpckldq %%mm1,%%mm0\n\t" + "punpckldq 20(%%eax),%%mm1\n\t" + "pfadd %%mm1,%%mm0\n\t" + "movd %%mm0,12(%%eax)\n\t" + "psrlq $32,%%mm0\n\t" + "movd %%mm0,20(%%eax)\n\t" + "psrlq $32,%%mm1\n\t" + "movd 28(%%eax),%%mm2\n\t" + "punpckldq %%mm2,%%mm1\n\t" + "punpckldq 36(%%eax),%%mm2\n\t" + "pfadd %%mm2,%%mm1\n\t" + "movd %%mm1,28(%%eax)\n\t" + "psrlq $32,%%mm1\n\t" + "movd %%mm1,36(%%eax)\n\t" + "psrlq $32,%%mm2\n\t" + "movd 44(%%eax),%%mm3\n\t" + "punpckldq %%mm3,%%mm2\n\t" + "punpckldq 52(%%eax),%%mm3\n\t" + "pfadd %%mm3,%%mm2\n\t" + "movd %%mm2,44(%%eax)\n\t" + "psrlq $32,%%mm2\n\t" + "movd %%mm2,52(%%eax)\n\t" + "psrlq $32,%%mm3\n\t" + "movd 60(%%eax),%%mm4\n\t" + "punpckldq %%mm4,%%mm3\n\t" + "punpckldq 68(%%eax),%%mm4\n\t" + "pfadd %%mm4,%%mm3\n\t" + "movd %%mm3,60(%%eax)\n\t" + "psrlq $32,%%mm3\n\t" + "movd %%mm3,68(%%eax)\n\t" - "movq 24(%%eax),%%mm0\n\t" - "movq 48(%%eax),%%mm1\n\t" - "movd "MANGLE(COS9)"+12,%%mm2\n\t" - "punpckldq %%mm2,%%mm2\n\t" - "movd "MANGLE(COS9)"+24,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm2,%%mm0\n\t" - "pfmul %%mm3,%%mm1\n\t" - "pushl %%eax\n\t" - "movl $1,%%eax\n\t" - "movd %%eax,%%mm7\n\t" - "pi2fd %%mm7,%%mm7\n\t" - "popl %%eax\n\t" - "movq 8(%%eax),%%mm2\n\t" - "movd "MANGLE(COS9)"+4,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "pfadd %%mm0,%%mm2\n\t" - "movq 40(%%eax),%%mm3\n\t" - "movd "MANGLE(COS9)"+20,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq 56(%%eax),%%mm3\n\t" - "movd "MANGLE(COS9)"+28,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq (%%eax),%%mm3\n\t" - "movq 16(%%eax),%%mm4\n\t" - "movd "MANGLE(COS9)"+8,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq 32(%%eax),%%mm4\n\t" - "movd "MANGLE(COS9)"+16,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "pfadd %%mm1,%%mm3\n\t" - "movq 64(%%eax),%%mm4\n\t" - "movd "MANGLE(COS9)"+32,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+0,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 108(%%edx),%%mm6\n\t" - "punpckldq 104(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" + "movq 24(%%eax),%%mm0\n\t" + "movq 48(%%eax),%%mm1\n\t" + "movd "MANGLE(COS9)"+12,%%mm2\n\t" + "punpckldq %%mm2,%%mm2\n\t" + "movd "MANGLE(COS9)"+24,%%mm3\n\t" + "punpckldq %%mm3,%%mm3\n\t" + "pfmul %%mm2,%%mm0\n\t" + "pfmul %%mm3,%%mm1\n\t" + "pushl %%eax\n\t" + "movl $1,%%eax\n\t" + "movd %%eax,%%mm7\n\t" + "pi2fd %%mm7,%%mm7\n\t" + "popl %%eax\n\t" + "movq 8(%%eax),%%mm2\n\t" + "movd "MANGLE(COS9)"+4,%%mm3\n\t" + "punpckldq %%mm3,%%mm3\n\t" + "pfmul %%mm3,%%mm2\n\t" + "pfadd %%mm0,%%mm2\n\t" + "movq 40(%%eax),%%mm3\n\t" + "movd "MANGLE(COS9)"+20,%%mm4\n\t" + "punpckldq %%mm4,%%mm4\n\t" + "pfmul %%mm4,%%mm3\n\t" + "pfadd %%mm3,%%mm2\n\t" + "movq 56(%%eax),%%mm3\n\t" + "movd "MANGLE(COS9)"+28,%%mm4\n\t" + "punpckldq %%mm4,%%mm4\n\t" + "pfmul %%mm4,%%mm3\n\t" + "pfadd %%mm3,%%mm2\n\t" + "movq (%%eax),%%mm3\n\t" + "movq 16(%%eax),%%mm4\n\t" + "movd "MANGLE(COS9)"+8,%%mm5\n\t" + "punpckldq %%mm5,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "pfadd %%mm4,%%mm3\n\t" + "movq 32(%%eax),%%mm4\n\t" + "movd "MANGLE(COS9)"+16,%%mm5\n\t" + "punpckldq %%mm5,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "pfadd %%mm4,%%mm3\n\t" + "pfadd %%mm1,%%mm3\n\t" + "movq 64(%%eax),%%mm4\n\t" + "movd "MANGLE(COS9)"+32,%%mm5\n\t" + "punpckldq %%mm5,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "pfadd %%mm4,%%mm3\n\t" + "movq %%mm2,%%mm4\n\t" + "pfadd %%mm3,%%mm4\n\t" + "movq %%mm7,%%mm5\n\t" + "punpckldq "MANGLE(tfcos36)"+0,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "movq %%mm4,%%mm5\n\t" + "pfacc %%mm5,%%mm5\n\t" + "movd 108(%%edx),%%mm6\n\t" + "punpckldq 104(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" #ifdef DCT36_OPTIMIZE_FOR_K7 - "pswapd %%mm5,%%mm5\n\t" - "movq %%mm5,32(%%ecx)\n\t" + "pswapd %%mm5,%%mm5\n\t" + "movq %%mm5,32(%%ecx)\n\t" #else - "movd %%mm5,36(%%ecx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,32(%%ecx)\n\t" + "movd %%mm5,36(%%ecx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,32(%%ecx)\n\t" #endif - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 32(%%edx),%%mm6\n\t" - "punpckldq 36(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 32(%%esi),%%mm6\n\t" - "punpckldq 36(%%esi),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,1024(%%ebx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1152(%%ebx)\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+32,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 140(%%edx),%%mm6\n\t" - "punpckldq 72(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,68(%%ecx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,0(%%ecx)\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 0(%%edx),%%mm6\n\t" - "punpckldq 68(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 0(%%esi),%%mm6\n\t" - "punpckldq 68(%%esi),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,0(%%ebx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,2176(%%ebx)\n\t" - "movq 8(%%eax),%%mm2\n\t" - "movq 40(%%eax),%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movq 56(%%eax),%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movd "MANGLE(COS9)"+12,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "movq 16(%%eax),%%mm3\n\t" - "movq 32(%%eax),%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq 64(%%eax),%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movd "MANGLE(COS9)"+24,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "movq 48(%%eax),%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq (%%eax),%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+4,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 112(%%edx),%%mm6\n\t" - "punpckldq 100(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,40(%%ecx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,28(%%ecx)\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 28(%%edx),%%mm6\n\t" - "punpckldq 40(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 28(%%esi),%%mm6\n\t" - "punpckldq 40(%%esi),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,896(%%ebx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1280(%%ebx)\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+28,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 136(%%edx),%%mm6\n\t" - "punpckldq 76(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,64(%%ecx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,4(%%ecx)\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 4(%%edx),%%mm6\n\t" - "punpckldq 64(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 4(%%esi),%%mm6\n\t" - "punpckldq 64(%%esi),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,128(%%ebx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,2048(%%ebx)\n\t" + "movq %%mm4,%%mm6\n\t" + "punpckldq %%mm6,%%mm5\n\t" + "pfsub %%mm6,%%mm5\n\t" + "punpckhdq %%mm5,%%mm5\n\t" + "movd 32(%%edx),%%mm6\n\t" + "punpckldq 36(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd 32(%%esi),%%mm6\n\t" + "punpckldq 36(%%esi),%%mm6\n\t" + "pfadd %%mm6,%%mm5\n\t" + "movd %%mm5,1024(%%ebx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,1152(%%ebx)\n\t" + "movq %%mm3,%%mm4\n\t" + "pfsub %%mm2,%%mm4\n\t" + "movq %%mm7,%%mm5\n\t" + "punpckldq "MANGLE(tfcos36)"+32,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "movq %%mm4,%%mm5\n\t" + "pfacc %%mm5,%%mm5\n\t" + "movd 140(%%edx),%%mm6\n\t" + "punpckldq 72(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd %%mm5,68(%%ecx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,0(%%ecx)\n\t" + "movq %%mm4,%%mm6\n\t" + "punpckldq %%mm6,%%mm5\n\t" + "pfsub %%mm6,%%mm5\n\t" + "punpckhdq %%mm5,%%mm5\n\t" + "movd 0(%%edx),%%mm6\n\t" + "punpckldq 68(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd 0(%%esi),%%mm6\n\t" + "punpckldq 68(%%esi),%%mm6\n\t" + "pfadd %%mm6,%%mm5\n\t" + "movd %%mm5,0(%%ebx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,2176(%%ebx)\n\t" + "movq 8(%%eax),%%mm2\n\t" + "movq 40(%%eax),%%mm3\n\t" + "pfsub %%mm3,%%mm2\n\t" + "movq 56(%%eax),%%mm3\n\t" + "pfsub %%mm3,%%mm2\n\t" + "movd "MANGLE(COS9)"+12,%%mm3\n\t" + "punpckldq %%mm3,%%mm3\n\t" + "pfmul %%mm3,%%mm2\n\t" + "movq 16(%%eax),%%mm3\n\t" + "movq 32(%%eax),%%mm4\n\t" + "pfsub %%mm4,%%mm3\n\t" + "movq 64(%%eax),%%mm4\n\t" + "pfsub %%mm4,%%mm3\n\t" + "movd "MANGLE(COS9)"+24,%%mm4\n\t" + "punpckldq %%mm4,%%mm4\n\t" + "pfmul %%mm4,%%mm3\n\t" + "movq 48(%%eax),%%mm4\n\t" + "pfsub %%mm4,%%mm3\n\t" + "movq (%%eax),%%mm4\n\t" + "pfadd %%mm4,%%mm3\n\t" + "movq %%mm2,%%mm4\n\t" + "pfadd %%mm3,%%mm4\n\t" + "movq %%mm7,%%mm5\n\t" + "punpckldq "MANGLE(tfcos36)"+4,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "movq %%mm4,%%mm5\n\t" + "pfacc %%mm5,%%mm5\n\t" + "movd 112(%%edx),%%mm6\n\t" + "punpckldq 100(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd %%mm5,40(%%ecx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,28(%%ecx)\n\t" + "movq %%mm4,%%mm6\n\t" + "punpckldq %%mm6,%%mm5\n\t" + "pfsub %%mm6,%%mm5\n\t" + "punpckhdq %%mm5,%%mm5\n\t" + "movd 28(%%edx),%%mm6\n\t" + "punpckldq 40(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd 28(%%esi),%%mm6\n\t" + "punpckldq 40(%%esi),%%mm6\n\t" + "pfadd %%mm6,%%mm5\n\t" + "movd %%mm5,896(%%ebx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,1280(%%ebx)\n\t" + "movq %%mm3,%%mm4\n\t" + "pfsub %%mm2,%%mm4\n\t" + "movq %%mm7,%%mm5\n\t" + "punpckldq "MANGLE(tfcos36)"+28,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "movq %%mm4,%%mm5\n\t" + "pfacc %%mm5,%%mm5\n\t" + "movd 136(%%edx),%%mm6\n\t" + "punpckldq 76(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd %%mm5,64(%%ecx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,4(%%ecx)\n\t" + "movq %%mm4,%%mm6\n\t" + "punpckldq %%mm6,%%mm5\n\t" + "pfsub %%mm6,%%mm5\n\t" + "punpckhdq %%mm5,%%mm5\n\t" + "movd 4(%%edx),%%mm6\n\t" + "punpckldq 64(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd 4(%%esi),%%mm6\n\t" + "punpckldq 64(%%esi),%%mm6\n\t" + "pfadd %%mm6,%%mm5\n\t" + "movd %%mm5,128(%%ebx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,2048(%%ebx)\n\t" - "movq 8(%%eax),%%mm2\n\t" - "movd "MANGLE(COS9)"+20,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "pfsub %%mm0,%%mm2\n\t" - "movq 40(%%eax),%%mm3\n\t" - "movd "MANGLE(COS9)"+28,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movq 56(%%eax),%%mm3\n\t" - "movd "MANGLE(COS9)"+4,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq (%%eax),%%mm3\n\t" - "movq 16(%%eax),%%mm4\n\t" - "movd "MANGLE(COS9)"+32,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq 32(%%eax),%%mm4\n\t" - "movd "MANGLE(COS9)"+8,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "pfadd %%mm1,%%mm3\n\t" - "movq 64(%%eax),%%mm4\n\t" - "movd "MANGLE(COS9)"+16,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+8,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 116(%%edx),%%mm6\n\t" - "punpckldq 96(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,44(%%ecx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,24(%%ecx)\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 24(%%edx),%%mm6\n\t" - "punpckldq 44(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 24(%%esi),%%mm6\n\t" - "punpckldq 44(%%esi),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,768(%%ebx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1408(%%ebx)\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+24,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 132(%%edx),%%mm6\n\t" - "punpckldq 80(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,60(%%ecx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,8(%%ecx)\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 8(%%edx),%%mm6\n\t" - "punpckldq 60(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 8(%%esi),%%mm6\n\t" - "punpckldq 60(%%esi),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,256(%%ebx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1920(%%ebx)\n\t" - "movq 8(%%eax),%%mm2\n\t" - "movd "MANGLE(COS9)"+28,%%mm3\n\t" - "punpckldq %%mm3,%%mm3\n\t" - "pfmul %%mm3,%%mm2\n\t" - "pfsub %%mm0,%%mm2\n\t" - "movq 40(%%eax),%%mm3\n\t" - "movd "MANGLE(COS9)"+4,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfadd %%mm3,%%mm2\n\t" - "movq 56(%%eax),%%mm3\n\t" - "movd "MANGLE(COS9)"+20,%%mm4\n\t" - "punpckldq %%mm4,%%mm4\n\t" - "pfmul %%mm4,%%mm3\n\t" - "pfsub %%mm3,%%mm2\n\t" - "movq (%%eax),%%mm3\n\t" - "movq 16(%%eax),%%mm4\n\t" - "movd "MANGLE(COS9)"+16,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq 32(%%eax),%%mm4\n\t" - "movd "MANGLE(COS9)"+32,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfadd %%mm4,%%mm3\n\t" - "pfadd %%mm1,%%mm3\n\t" - "movq 64(%%eax),%%mm4\n\t" - "movd "MANGLE(COS9)"+8,%%mm5\n\t" - "punpckldq %%mm5,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "pfsub %%mm4,%%mm3\n\t" - "movq %%mm2,%%mm4\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+12,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 120(%%edx),%%mm6\n\t" - "punpckldq 92(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,48(%%ecx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,20(%%ecx)\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 20(%%edx),%%mm6\n\t" - "punpckldq 48(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 20(%%esi),%%mm6\n\t" - "punpckldq 48(%%esi),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,640(%%ebx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1536(%%ebx)\n\t" - "movq %%mm3,%%mm4\n\t" - "pfsub %%mm2,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+20,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 128(%%edx),%%mm6\n\t" - "punpckldq 84(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,56(%%ecx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,12(%%ecx)\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 12(%%edx),%%mm6\n\t" - "punpckldq 56(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 12(%%esi),%%mm6\n\t" - "punpckldq 56(%%esi),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,384(%%ebx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1792(%%ebx)\n\t" + "movq 8(%%eax),%%mm2\n\t" + "movd "MANGLE(COS9)"+20,%%mm3\n\t" + "punpckldq %%mm3,%%mm3\n\t" + "pfmul %%mm3,%%mm2\n\t" + "pfsub %%mm0,%%mm2\n\t" + "movq 40(%%eax),%%mm3\n\t" + "movd "MANGLE(COS9)"+28,%%mm4\n\t" + "punpckldq %%mm4,%%mm4\n\t" + "pfmul %%mm4,%%mm3\n\t" + "pfsub %%mm3,%%mm2\n\t" + "movq 56(%%eax),%%mm3\n\t" + "movd "MANGLE(COS9)"+4,%%mm4\n\t" + "punpckldq %%mm4,%%mm4\n\t" + "pfmul %%mm4,%%mm3\n\t" + "pfadd %%mm3,%%mm2\n\t" + "movq (%%eax),%%mm3\n\t" + "movq 16(%%eax),%%mm4\n\t" + "movd "MANGLE(COS9)"+32,%%mm5\n\t" + "punpckldq %%mm5,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "pfsub %%mm4,%%mm3\n\t" + "movq 32(%%eax),%%mm4\n\t" + "movd "MANGLE(COS9)"+8,%%mm5\n\t" + "punpckldq %%mm5,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "pfsub %%mm4,%%mm3\n\t" + "pfadd %%mm1,%%mm3\n\t" + "movq 64(%%eax),%%mm4\n\t" + "movd "MANGLE(COS9)"+16,%%mm5\n\t" + "punpckldq %%mm5,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "pfadd %%mm4,%%mm3\n\t" + "movq %%mm2,%%mm4\n\t" + "pfadd %%mm3,%%mm4\n\t" + "movq %%mm7,%%mm5\n\t" + "punpckldq "MANGLE(tfcos36)"+8,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "movq %%mm4,%%mm5\n\t" + "pfacc %%mm5,%%mm5\n\t" + "movd 116(%%edx),%%mm6\n\t" + "punpckldq 96(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd %%mm5,44(%%ecx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,24(%%ecx)\n\t" + "movq %%mm4,%%mm6\n\t" + "punpckldq %%mm6,%%mm5\n\t" + "pfsub %%mm6,%%mm5\n\t" + "punpckhdq %%mm5,%%mm5\n\t" + "movd 24(%%edx),%%mm6\n\t" + "punpckldq 44(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd 24(%%esi),%%mm6\n\t" + "punpckldq 44(%%esi),%%mm6\n\t" + "pfadd %%mm6,%%mm5\n\t" + "movd %%mm5,768(%%ebx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,1408(%%ebx)\n\t" + "movq %%mm3,%%mm4\n\t" + "pfsub %%mm2,%%mm4\n\t" + "movq %%mm7,%%mm5\n\t" + "punpckldq "MANGLE(tfcos36)"+24,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "movq %%mm4,%%mm5\n\t" + "pfacc %%mm5,%%mm5\n\t" + "movd 132(%%edx),%%mm6\n\t" + "punpckldq 80(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd %%mm5,60(%%ecx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,8(%%ecx)\n\t" + "movq %%mm4,%%mm6\n\t" + "punpckldq %%mm6,%%mm5\n\t" + "pfsub %%mm6,%%mm5\n\t" + "punpckhdq %%mm5,%%mm5\n\t" + "movd 8(%%edx),%%mm6\n\t" + "punpckldq 60(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd 8(%%esi),%%mm6\n\t" + "punpckldq 60(%%esi),%%mm6\n\t" + "pfadd %%mm6,%%mm5\n\t" + "movd %%mm5,256(%%ebx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,1920(%%ebx)\n\t" + "movq 8(%%eax),%%mm2\n\t" + "movd "MANGLE(COS9)"+28,%%mm3\n\t" + "punpckldq %%mm3,%%mm3\n\t" + "pfmul %%mm3,%%mm2\n\t" + "pfsub %%mm0,%%mm2\n\t" + "movq 40(%%eax),%%mm3\n\t" + "movd "MANGLE(COS9)"+4,%%mm4\n\t" + "punpckldq %%mm4,%%mm4\n\t" + "pfmul %%mm4,%%mm3\n\t" + "pfadd %%mm3,%%mm2\n\t" + "movq 56(%%eax),%%mm3\n\t" + "movd "MANGLE(COS9)"+20,%%mm4\n\t" + "punpckldq %%mm4,%%mm4\n\t" + "pfmul %%mm4,%%mm3\n\t" + "pfsub %%mm3,%%mm2\n\t" + "movq (%%eax),%%mm3\n\t" + "movq 16(%%eax),%%mm4\n\t" + "movd "MANGLE(COS9)"+16,%%mm5\n\t" + "punpckldq %%mm5,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "pfsub %%mm4,%%mm3\n\t" + "movq 32(%%eax),%%mm4\n\t" + "movd "MANGLE(COS9)"+32,%%mm5\n\t" + "punpckldq %%mm5,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "pfadd %%mm4,%%mm3\n\t" + "pfadd %%mm1,%%mm3\n\t" + "movq 64(%%eax),%%mm4\n\t" + "movd "MANGLE(COS9)"+8,%%mm5\n\t" + "punpckldq %%mm5,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "pfsub %%mm4,%%mm3\n\t" + "movq %%mm2,%%mm4\n\t" + "pfadd %%mm3,%%mm4\n\t" + "movq %%mm7,%%mm5\n\t" + "punpckldq "MANGLE(tfcos36)"+12,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "movq %%mm4,%%mm5\n\t" + "pfacc %%mm5,%%mm5\n\t" + "movd 120(%%edx),%%mm6\n\t" + "punpckldq 92(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd %%mm5,48(%%ecx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,20(%%ecx)\n\t" + "movq %%mm4,%%mm6\n\t" + "punpckldq %%mm6,%%mm5\n\t" + "pfsub %%mm6,%%mm5\n\t" + "punpckhdq %%mm5,%%mm5\n\t" + "movd 20(%%edx),%%mm6\n\t" + "punpckldq 48(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd 20(%%esi),%%mm6\n\t" + "punpckldq 48(%%esi),%%mm6\n\t" + "pfadd %%mm6,%%mm5\n\t" + "movd %%mm5,640(%%ebx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,1536(%%ebx)\n\t" + "movq %%mm3,%%mm4\n\t" + "pfsub %%mm2,%%mm4\n\t" + "movq %%mm7,%%mm5\n\t" + "punpckldq "MANGLE(tfcos36)"+20,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "movq %%mm4,%%mm5\n\t" + "pfacc %%mm5,%%mm5\n\t" + "movd 128(%%edx),%%mm6\n\t" + "punpckldq 84(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd %%mm5,56(%%ecx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,12(%%ecx)\n\t" + "movq %%mm4,%%mm6\n\t" + "punpckldq %%mm6,%%mm5\n\t" + "pfsub %%mm6,%%mm5\n\t" + "punpckhdq %%mm5,%%mm5\n\t" + "movd 12(%%edx),%%mm6\n\t" + "punpckldq 56(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd 12(%%esi),%%mm6\n\t" + "punpckldq 56(%%esi),%%mm6\n\t" + "pfadd %%mm6,%%mm5\n\t" + "movd %%mm5,384(%%ebx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,1792(%%ebx)\n\t" - "movq (%%eax),%%mm4\n\t" - "movq 16(%%eax),%%mm3\n\t" - "pfsub %%mm3,%%mm4\n\t" - "movq 32(%%eax),%%mm3\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq 48(%%eax),%%mm3\n\t" - "pfsub %%mm3,%%mm4\n\t" - "movq 64(%%eax),%%mm3\n\t" - "pfadd %%mm3,%%mm4\n\t" - "movq %%mm7,%%mm5\n\t" - "punpckldq "MANGLE(tfcos36)"+16,%%mm5\n\t" - "pfmul %%mm5,%%mm4\n\t" - "movq %%mm4,%%mm5\n\t" - "pfacc %%mm5,%%mm5\n\t" - "movd 124(%%edx),%%mm6\n\t" - "punpckldq 88(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd %%mm5,52(%%ecx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,16(%%ecx)\n\t" - "movq %%mm4,%%mm6\n\t" - "punpckldq %%mm6,%%mm5\n\t" - "pfsub %%mm6,%%mm5\n\t" - "punpckhdq %%mm5,%%mm5\n\t" - "movd 16(%%edx),%%mm6\n\t" - "punpckldq 52(%%edx),%%mm6\n\t" - "pfmul %%mm6,%%mm5\n\t" - "movd 16(%%esi),%%mm6\n\t" - "punpckldq 52(%%esi),%%mm6\n\t" - "pfadd %%mm6,%%mm5\n\t" - "movd %%mm5,512(%%ebx)\n\t" - "psrlq $32,%%mm5\n\t" - "movd %%mm5,1664(%%ebx)\n\t" + "movq (%%eax),%%mm4\n\t" + "movq 16(%%eax),%%mm3\n\t" + "pfsub %%mm3,%%mm4\n\t" + "movq 32(%%eax),%%mm3\n\t" + "pfadd %%mm3,%%mm4\n\t" + "movq 48(%%eax),%%mm3\n\t" + "pfsub %%mm3,%%mm4\n\t" + "movq 64(%%eax),%%mm3\n\t" + "pfadd %%mm3,%%mm4\n\t" + "movq %%mm7,%%mm5\n\t" + "punpckldq "MANGLE(tfcos36)"+16,%%mm5\n\t" + "pfmul %%mm5,%%mm4\n\t" + "movq %%mm4,%%mm5\n\t" + "pfacc %%mm5,%%mm5\n\t" + "movd 124(%%edx),%%mm6\n\t" + "punpckldq 88(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd %%mm5,52(%%ecx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,16(%%ecx)\n\t" + "movq %%mm4,%%mm6\n\t" + "punpckldq %%mm6,%%mm5\n\t" + "pfsub %%mm6,%%mm5\n\t" + "punpckhdq %%mm5,%%mm5\n\t" + "movd 16(%%edx),%%mm6\n\t" + "punpckldq 52(%%edx),%%mm6\n\t" + "pfmul %%mm6,%%mm5\n\t" + "movd 16(%%esi),%%mm6\n\t" + "punpckldq 52(%%esi),%%mm6\n\t" + "pfadd %%mm6,%%mm5\n\t" + "movd %%mm5,512(%%ebx)\n\t" + "psrlq $32,%%mm5\n\t" + "movd %%mm5,1664(%%ebx)\n\t" - "femms\n\t" - : - : "a" (inbuf), "S" (o1), "c" (o2), "d" (wintab), "b" (tsbuf) - : "memory"); + "femms\n\t" + : + : "a" (inbuf), "S" (o1), "c" (o2), "d" (wintab), "b" (tsbuf) + : "memory"); } diff --git a/mp3lib/dct64_3dnow.c b/mp3lib/dct64_3dnow.c index 55340b0512..d528654207 100644 --- a/mp3lib/dct64_3dnow.c +++ b/mp3lib/dct64_3dnow.c @@ -19,910 +19,910 @@ void dct64_MMX_3dnow(short *a,short *b,real *c) { char tmp[256]; __asm__ volatile( -" movl %2,%%eax\n\t" +" movl %2,%%eax\n\t" -" leal 128+%3,%%edx\n\t" -" movl %0,%%esi\n\t" -" movl %1,%%edi\n\t" -" movl $"MANGLE(costab_mmx)",%%ebx\n\t" -" leal %3,%%ecx\n\t" +" leal 128+%3,%%edx\n\t" +" movl %0,%%esi\n\t" +" movl %1,%%edi\n\t" +" movl $"MANGLE(costab_mmx)",%%ebx\n\t" +" leal %3,%%ecx\n\t" /* Phase 1*/ -" movq (%%eax), %%mm0\n\t" -" movq 8(%%eax), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 120(%%eax), %%mm1\n\t" -" movq 112(%%eax), %%mm5\n\t" +" movq (%%eax), %%mm0\n\t" +" movq 8(%%eax), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 120(%%eax), %%mm1\n\t" +" movq 112(%%eax), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, (%%edx)\n\t" -" movq %%mm4, 8(%%edx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul (%%ebx), %%mm3\n\t" -" pfmul 8(%%ebx), %%mm7\n\t" -" movd %%mm3, 124(%%edx)\n\t" -" movd %%mm7, 116(%%edx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 120(%%edx)\n\t" -" movd %%mm7, 112(%%edx)\n\t" - -" movq 16(%%eax), %%mm0\n\t" -" movq 24(%%eax), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 104(%%eax), %%mm1\n\t" -" movq 96(%%eax), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, (%%edx)\n\t" +" movq %%mm4, 8(%%edx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsub %%mm5, %%mm7\n\t" +" pfmul (%%ebx), %%mm3\n\t" +" pfmul 8(%%ebx), %%mm7\n\t" +" movd %%mm3, 124(%%edx)\n\t" +" movd %%mm7, 116(%%edx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 120(%%edx)\n\t" +" movd %%mm7, 112(%%edx)\n\t" + +" movq 16(%%eax), %%mm0\n\t" +" movq 24(%%eax), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 104(%%eax), %%mm1\n\t" +" movq 96(%%eax), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 16(%%edx)\n\t" -" movq %%mm4, 24(%%edx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 16(%%ebx), %%mm3\n\t" -" pfmul 24(%%ebx), %%mm7\n\t" -" movd %%mm3, 108(%%edx)\n\t" -" movd %%mm7, 100(%%edx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 104(%%edx)\n\t" -" movd %%mm7, 96(%%edx)\n\t" - -" movq 32(%%eax), %%mm0\n\t" -" movq 40(%%eax), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 88(%%eax), %%mm1\n\t" -" movq 80(%%eax), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 16(%%edx)\n\t" +" movq %%mm4, 24(%%edx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsub %%mm5, %%mm7\n\t" +" pfmul 16(%%ebx), %%mm3\n\t" +" pfmul 24(%%ebx), %%mm7\n\t" +" movd %%mm3, 108(%%edx)\n\t" +" movd %%mm7, 100(%%edx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 104(%%edx)\n\t" +" movd %%mm7, 96(%%edx)\n\t" + +" movq 32(%%eax), %%mm0\n\t" +" movq 40(%%eax), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 88(%%eax), %%mm1\n\t" +" movq 80(%%eax), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 32(%%edx)\n\t" -" movq %%mm4, 40(%%edx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 32(%%ebx), %%mm3\n\t" -" pfmul 40(%%ebx), %%mm7\n\t" -" movd %%mm3, 92(%%edx)\n\t" -" movd %%mm7, 84(%%edx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 88(%%edx)\n\t" -" movd %%mm7, 80(%%edx)\n\t" - -" movq 48(%%eax), %%mm0\n\t" -" movq 56(%%eax), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 72(%%eax), %%mm1\n\t" -" movq 64(%%eax), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 32(%%edx)\n\t" +" movq %%mm4, 40(%%edx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsub %%mm5, %%mm7\n\t" +" pfmul 32(%%ebx), %%mm3\n\t" +" pfmul 40(%%ebx), %%mm7\n\t" +" movd %%mm3, 92(%%edx)\n\t" +" movd %%mm7, 84(%%edx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 88(%%edx)\n\t" +" movd %%mm7, 80(%%edx)\n\t" + +" movq 48(%%eax), %%mm0\n\t" +" movq 56(%%eax), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 72(%%eax), %%mm1\n\t" +" movq 64(%%eax), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 48(%%edx)\n\t" -" movq %%mm4, 56(%%edx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 48(%%ebx), %%mm3\n\t" -" pfmul 56(%%ebx), %%mm7\n\t" -" movd %%mm3, 76(%%edx)\n\t" -" movd %%mm7, 68(%%edx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 72(%%edx)\n\t" -" movd %%mm7, 64(%%edx)\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 48(%%edx)\n\t" +" movq %%mm4, 56(%%edx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsub %%mm5, %%mm7\n\t" +" pfmul 48(%%ebx), %%mm3\n\t" +" pfmul 56(%%ebx), %%mm7\n\t" +" movd %%mm3, 76(%%edx)\n\t" +" movd %%mm7, 68(%%edx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 72(%%edx)\n\t" +" movd %%mm7, 64(%%edx)\n\t" /* Phase 2*/ -" movq (%%edx), %%mm0\n\t" -" movq 8(%%edx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 56(%%edx), %%mm1\n\t" -" movq 48(%%edx), %%mm5\n\t" +" movq (%%edx), %%mm0\n\t" +" movq 8(%%edx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 56(%%edx), %%mm1\n\t" +" movq 48(%%edx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, (%%ecx)\n\t" -" movq %%mm4, 8(%%ecx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 64(%%ebx), %%mm3\n\t" -" pfmul 72(%%ebx), %%mm7\n\t" -" movd %%mm3, 60(%%ecx)\n\t" -" movd %%mm7, 52(%%ecx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 56(%%ecx)\n\t" -" movd %%mm7, 48(%%ecx)\n\t" - -" movq 16(%%edx), %%mm0\n\t" -" movq 24(%%edx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 40(%%edx), %%mm1\n\t" -" movq 32(%%edx), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, (%%ecx)\n\t" +" movq %%mm4, 8(%%ecx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsub %%mm5, %%mm7\n\t" +" pfmul 64(%%ebx), %%mm3\n\t" +" pfmul 72(%%ebx), %%mm7\n\t" +" movd %%mm3, 60(%%ecx)\n\t" +" movd %%mm7, 52(%%ecx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 56(%%ecx)\n\t" +" movd %%mm7, 48(%%ecx)\n\t" + +" movq 16(%%edx), %%mm0\n\t" +" movq 24(%%edx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 40(%%edx), %%mm1\n\t" +" movq 32(%%edx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 16(%%ecx)\n\t" -" movq %%mm4, 24(%%ecx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 80(%%ebx), %%mm3\n\t" -" pfmul 88(%%ebx), %%mm7\n\t" -" movd %%mm3, 44(%%ecx)\n\t" -" movd %%mm7, 36(%%ecx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 40(%%ecx)\n\t" -" movd %%mm7, 32(%%ecx)\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 16(%%ecx)\n\t" +" movq %%mm4, 24(%%ecx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsub %%mm5, %%mm7\n\t" +" pfmul 80(%%ebx), %%mm3\n\t" +" pfmul 88(%%ebx), %%mm7\n\t" +" movd %%mm3, 44(%%ecx)\n\t" +" movd %%mm7, 36(%%ecx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 40(%%ecx)\n\t" +" movd %%mm7, 32(%%ecx)\n\t" /* Phase 3*/ -" movq 64(%%edx), %%mm0\n\t" -" movq 72(%%edx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 120(%%edx), %%mm1\n\t" -" movq 112(%%edx), %%mm5\n\t" +" movq 64(%%edx), %%mm0\n\t" +" movq 72(%%edx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 120(%%edx), %%mm1\n\t" +" movq 112(%%edx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 64(%%ecx)\n\t" -" movq %%mm4, 72(%%ecx)\n\t" -" pfsubr %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 64(%%ebx), %%mm3\n\t" -" pfmul 72(%%ebx), %%mm7\n\t" -" movd %%mm3, 124(%%ecx)\n\t" -" movd %%mm7, 116(%%ecx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 120(%%ecx)\n\t" -" movd %%mm7, 112(%%ecx)\n\t" - -" movq 80(%%edx), %%mm0\n\t" -" movq 88(%%edx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 104(%%edx), %%mm1\n\t" -" movq 96(%%edx), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 64(%%ecx)\n\t" +" movq %%mm4, 72(%%ecx)\n\t" +" pfsubr %%mm1, %%mm3\n\t" +" pfsubr %%mm5, %%mm7\n\t" +" pfmul 64(%%ebx), %%mm3\n\t" +" pfmul 72(%%ebx), %%mm7\n\t" +" movd %%mm3, 124(%%ecx)\n\t" +" movd %%mm7, 116(%%ecx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 120(%%ecx)\n\t" +" movd %%mm7, 112(%%ecx)\n\t" + +" movq 80(%%edx), %%mm0\n\t" +" movq 88(%%edx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 104(%%edx), %%mm1\n\t" +" movq 96(%%edx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 80(%%ecx)\n\t" -" movq %%mm4, 88(%%ecx)\n\t" -" pfsubr %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 80(%%ebx), %%mm3\n\t" -" pfmul 88(%%ebx), %%mm7\n\t" -" movd %%mm3, 108(%%ecx)\n\t" -" movd %%mm7, 100(%%ecx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 104(%%ecx)\n\t" -" movd %%mm7, 96(%%ecx)\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 80(%%ecx)\n\t" +" movq %%mm4, 88(%%ecx)\n\t" +" pfsubr %%mm1, %%mm3\n\t" +" pfsubr %%mm5, %%mm7\n\t" +" pfmul 80(%%ebx), %%mm3\n\t" +" pfmul 88(%%ebx), %%mm7\n\t" +" movd %%mm3, 108(%%ecx)\n\t" +" movd %%mm7, 100(%%ecx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 104(%%ecx)\n\t" +" movd %%mm7, 96(%%ecx)\n\t" /* Phase 4*/ -" movq (%%ecx), %%mm0\n\t" -" movq 8(%%ecx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 24(%%ecx), %%mm1\n\t" -" movq 16(%%ecx), %%mm5\n\t" +" movq (%%ecx), %%mm0\n\t" +" movq 8(%%ecx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 24(%%ecx), %%mm1\n\t" +" movq 16(%%ecx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, (%%edx)\n\t" -" movq %%mm4, 8(%%edx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 96(%%ebx), %%mm3\n\t" -" pfmul 104(%%ebx), %%mm7\n\t" -" movd %%mm3, 28(%%edx)\n\t" -" movd %%mm7, 20(%%edx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 24(%%edx)\n\t" -" movd %%mm7, 16(%%edx)\n\t" - -" movq 32(%%ecx), %%mm0\n\t" -" movq 40(%%ecx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 56(%%ecx), %%mm1\n\t" -" movq 48(%%ecx), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, (%%edx)\n\t" +" movq %%mm4, 8(%%edx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsub %%mm5, %%mm7\n\t" +" pfmul 96(%%ebx), %%mm3\n\t" +" pfmul 104(%%ebx), %%mm7\n\t" +" movd %%mm3, 28(%%edx)\n\t" +" movd %%mm7, 20(%%edx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 24(%%edx)\n\t" +" movd %%mm7, 16(%%edx)\n\t" + +" movq 32(%%ecx), %%mm0\n\t" +" movq 40(%%ecx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 56(%%ecx), %%mm1\n\t" +" movq 48(%%ecx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 32(%%edx)\n\t" -" movq %%mm4, 40(%%edx)\n\t" -" pfsubr %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 96(%%ebx), %%mm3\n\t" -" pfmul 104(%%ebx), %%mm7\n\t" -" movd %%mm3, 60(%%edx)\n\t" -" movd %%mm7, 52(%%edx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 56(%%edx)\n\t" -" movd %%mm7, 48(%%edx)\n\t" - -" movq 64(%%ecx), %%mm0\n\t" -" movq 72(%%ecx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 88(%%ecx), %%mm1\n\t" -" movq 80(%%ecx), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 32(%%edx)\n\t" +" movq %%mm4, 40(%%edx)\n\t" +" pfsubr %%mm1, %%mm3\n\t" +" pfsubr %%mm5, %%mm7\n\t" +" pfmul 96(%%ebx), %%mm3\n\t" +" pfmul 104(%%ebx), %%mm7\n\t" +" movd %%mm3, 60(%%edx)\n\t" +" movd %%mm7, 52(%%edx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 56(%%edx)\n\t" +" movd %%mm7, 48(%%edx)\n\t" + +" movq 64(%%ecx), %%mm0\n\t" +" movq 72(%%ecx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 88(%%ecx), %%mm1\n\t" +" movq 80(%%ecx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 64(%%edx)\n\t" -" movq %%mm4, 72(%%edx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsub %%mm5, %%mm7\n\t" -" pfmul 96(%%ebx), %%mm3\n\t" -" pfmul 104(%%ebx), %%mm7\n\t" -" movd %%mm3, 92(%%edx)\n\t" -" movd %%mm7, 84(%%edx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 88(%%edx)\n\t" -" movd %%mm7, 80(%%edx)\n\t" - -" movq 96(%%ecx), %%mm0\n\t" -" movq 104(%%ecx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 120(%%ecx), %%mm1\n\t" -" movq 112(%%ecx), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 64(%%edx)\n\t" +" movq %%mm4, 72(%%edx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsub %%mm5, %%mm7\n\t" +" pfmul 96(%%ebx), %%mm3\n\t" +" pfmul 104(%%ebx), %%mm7\n\t" +" movd %%mm3, 92(%%edx)\n\t" +" movd %%mm7, 84(%%edx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 88(%%edx)\n\t" +" movd %%mm7, 80(%%edx)\n\t" + +" movq 96(%%ecx), %%mm0\n\t" +" movq 104(%%ecx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 120(%%ecx), %%mm1\n\t" +" movq 112(%%ecx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 96(%%edx)\n\t" -" movq %%mm4, 104(%%edx)\n\t" -" pfsubr %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 96(%%ebx), %%mm3\n\t" -" pfmul 104(%%ebx), %%mm7\n\t" -" movd %%mm3, 124(%%edx)\n\t" -" movd %%mm7, 116(%%edx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 120(%%edx)\n\t" -" movd %%mm7, 112(%%edx)\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 96(%%edx)\n\t" +" movq %%mm4, 104(%%edx)\n\t" +" pfsubr %%mm1, %%mm3\n\t" +" pfsubr %%mm5, %%mm7\n\t" +" pfmul 96(%%ebx), %%mm3\n\t" +" pfmul 104(%%ebx), %%mm7\n\t" +" movd %%mm3, 124(%%edx)\n\t" +" movd %%mm7, 116(%%edx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 120(%%edx)\n\t" +" movd %%mm7, 112(%%edx)\n\t" /* Phase 5 */ -" movq (%%edx), %%mm0\n\t" -" movq 16(%%edx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 8(%%edx), %%mm1\n\t" -" movq 24(%%edx), %%mm5\n\t" +" movq (%%edx), %%mm0\n\t" +" movq 16(%%edx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 8(%%edx), %%mm1\n\t" +" movq 24(%%edx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, (%%ecx)\n\t" -" movq %%mm4, 16(%%ecx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 112(%%ebx), %%mm3\n\t" -" pfmul 112(%%ebx), %%mm7\n\t" -" movd %%mm3, 12(%%ecx)\n\t" -" movd %%mm7, 28(%%ecx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 8(%%ecx)\n\t" -" movd %%mm7, 24(%%ecx)\n\t" - -" movq 32(%%edx), %%mm0\n\t" -" movq 48(%%edx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 40(%%edx), %%mm1\n\t" -" movq 56(%%edx), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, (%%ecx)\n\t" +" movq %%mm4, 16(%%ecx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsubr %%mm5, %%mm7\n\t" +" pfmul 112(%%ebx), %%mm3\n\t" +" pfmul 112(%%ebx), %%mm7\n\t" +" movd %%mm3, 12(%%ecx)\n\t" +" movd %%mm7, 28(%%ecx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 8(%%ecx)\n\t" +" movd %%mm7, 24(%%ecx)\n\t" + +" movq 32(%%edx), %%mm0\n\t" +" movq 48(%%edx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 40(%%edx), %%mm1\n\t" +" movq 56(%%edx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 32(%%ecx)\n\t" -" movq %%mm4, 48(%%ecx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 112(%%ebx), %%mm3\n\t" -" pfmul 112(%%ebx), %%mm7\n\t" -" movd %%mm3, 44(%%ecx)\n\t" -" movd %%mm7, 60(%%ecx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 40(%%ecx)\n\t" -" movd %%mm7, 56(%%ecx)\n\t" - -" movq 64(%%edx), %%mm0\n\t" -" movq 80(%%edx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 72(%%edx), %%mm1\n\t" -" movq 88(%%edx), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 32(%%ecx)\n\t" +" movq %%mm4, 48(%%ecx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsubr %%mm5, %%mm7\n\t" +" pfmul 112(%%ebx), %%mm3\n\t" +" pfmul 112(%%ebx), %%mm7\n\t" +" movd %%mm3, 44(%%ecx)\n\t" +" movd %%mm7, 60(%%ecx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 40(%%ecx)\n\t" +" movd %%mm7, 56(%%ecx)\n\t" + +" movq 64(%%edx), %%mm0\n\t" +" movq 80(%%edx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 72(%%edx), %%mm1\n\t" +" movq 88(%%edx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 64(%%ecx)\n\t" -" movq %%mm4, 80(%%ecx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 112(%%ebx), %%mm3\n\t" -" pfmul 112(%%ebx), %%mm7\n\t" -" movd %%mm3, 76(%%ecx)\n\t" -" movd %%mm7, 92(%%ecx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 72(%%ecx)\n\t" -" movd %%mm7, 88(%%ecx)\n\t" - -" movq 96(%%edx), %%mm0\n\t" -" movq 112(%%edx), %%mm4\n\t" -" movq %%mm0, %%mm3\n\t" -" movq %%mm4, %%mm7\n\t" -" movq 104(%%edx), %%mm1\n\t" -" movq 120(%%edx), %%mm5\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 64(%%ecx)\n\t" +" movq %%mm4, 80(%%ecx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsubr %%mm5, %%mm7\n\t" +" pfmul 112(%%ebx), %%mm3\n\t" +" pfmul 112(%%ebx), %%mm7\n\t" +" movd %%mm3, 76(%%ecx)\n\t" +" movd %%mm7, 92(%%ecx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 72(%%ecx)\n\t" +" movd %%mm7, 88(%%ecx)\n\t" + +" movq 96(%%edx), %%mm0\n\t" +" movq 112(%%edx), %%mm4\n\t" +" movq %%mm0, %%mm3\n\t" +" movq %%mm4, %%mm7\n\t" +" movq 104(%%edx), %%mm1\n\t" +" movq 120(%%edx), %%mm5\n\t" /* n.b.: pswapd*/ -" movq %%mm1, %%mm2\n\t" -" movq %%mm5, %%mm6\n\t" -" psrlq $32, %%mm1\n\t" -" psrlq $32, %%mm5\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" punpckldq %%mm6, %%mm5\n\t" +" movq %%mm1, %%mm2\n\t" +" movq %%mm5, %%mm6\n\t" +" psrlq $32, %%mm1\n\t" +" psrlq $32, %%mm5\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" punpckldq %%mm6, %%mm5\n\t" /**/ -" pfadd %%mm1, %%mm0\n\t" -" pfadd %%mm5, %%mm4\n\t" -" movq %%mm0, 96(%%ecx)\n\t" -" movq %%mm4, 112(%%ecx)\n\t" -" pfsub %%mm1, %%mm3\n\t" -" pfsubr %%mm5, %%mm7\n\t" -" pfmul 112(%%ebx), %%mm3\n\t" -" pfmul 112(%%ebx), %%mm7\n\t" -" movd %%mm3, 108(%%ecx)\n\t" -" movd %%mm7, 124(%%ecx)\n\t" -" psrlq $32, %%mm3\n\t" -" psrlq $32, %%mm7\n\t" -" movd %%mm3, 104(%%ecx)\n\t" -" movd %%mm7, 120(%%ecx)\n\t" +" pfadd %%mm1, %%mm0\n\t" +" pfadd %%mm5, %%mm4\n\t" +" movq %%mm0, 96(%%ecx)\n\t" +" movq %%mm4, 112(%%ecx)\n\t" +" pfsub %%mm1, %%mm3\n\t" +" pfsubr %%mm5, %%mm7\n\t" +" pfmul 112(%%ebx), %%mm3\n\t" +" pfmul 112(%%ebx), %%mm7\n\t" +" movd %%mm3, 108(%%ecx)\n\t" +" movd %%mm7, 124(%%ecx)\n\t" +" psrlq $32, %%mm3\n\t" +" psrlq $32, %%mm7\n\t" +" movd %%mm3, 104(%%ecx)\n\t" +" movd %%mm7, 120(%%ecx)\n\t" /* Phase 6. This is the end of easy road. */ /* Code below is coded in scalar mode. Should be optimized */ -" movd "MANGLE(plus_1f)", %%mm6\n\t" -" punpckldq 120(%%ebx), %%mm6\n\t" /* mm6 = 1.0 | 120(%%ebx)*/ -" movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */ - -" movq 32(%%ecx), %%mm0\n\t" -" movq 64(%%ecx), %%mm2\n\t" -" movq %%mm0, %%mm1\n\t" -" movq %%mm2, %%mm3\n\t" -" pxor %%mm7, %%mm1\n\t" -" pxor %%mm7, %%mm3\n\t" -" pfacc %%mm1, %%mm0\n\t" -" pfacc %%mm3, %%mm2\n\t" -" pfmul %%mm6, %%mm0\n\t" -" pfmul %%mm6, %%mm2\n\t" -" movq %%mm0, 32(%%edx)\n\t" -" movq %%mm2, 64(%%edx)\n\t" - -" movd 44(%%ecx), %%mm0\n\t" -" movd 40(%%ecx), %%mm2\n\t" -" movd 120(%%ebx), %%mm3\n\t" -" punpckldq 76(%%ecx), %%mm0\n\t" -" punpckldq 72(%%ecx), %%mm2\n\t" -" punpckldq %%mm3, %%mm3\n\t" -" movq %%mm0, %%mm4\n\t" -" movq %%mm2, %%mm5\n\t" -" pfsub %%mm2, %%mm0\n\t" -" pfmul %%mm3, %%mm0\n\t" -" movq %%mm0, %%mm1\n\t" -" pfadd %%mm5, %%mm0\n\t" -" pfadd %%mm4, %%mm0\n\t" -" movq %%mm0, %%mm2\n\t" -" punpckldq %%mm1, %%mm0\n\t" -" punpckhdq %%mm1, %%mm2\n\t" -" movq %%mm0, 40(%%edx)\n\t" -" movq %%mm2, 72(%%edx)\n\t" - -" movd 48(%%ecx), %%mm3\n\t" -" movd 60(%%ecx), %%mm2\n\t" -" pfsub 52(%%ecx), %%mm3\n\t" -" pfsub 56(%%ecx), %%mm2\n\t" -" pfmul 120(%%ebx), %%mm3\n\t" -" pfmul 120(%%ebx), %%mm2\n\t" -" movq %%mm2, %%mm1\n\t" - -" pfadd 56(%%ecx), %%mm1\n\t" -" pfadd 60(%%ecx), %%mm1\n\t" -" movq %%mm1, %%mm0\n\t" - -" pfadd 48(%%ecx), %%mm0\n\t" -" pfadd 52(%%ecx), %%mm0\n\t" -" pfadd %%mm3, %%mm1\n\t" -" punpckldq %%mm2, %%mm1\n\t" -" pfadd %%mm3, %%mm2\n\t" -" punpckldq %%mm2, %%mm0\n\t" -" movq %%mm1, 56(%%edx)\n\t" -" movq %%mm0, 48(%%edx)\n\t" +" movd "MANGLE(plus_1f)", %%mm6\n\t" +" punpckldq 120(%%ebx), %%mm6\n\t" /* mm6 = 1.0 | 120(%%ebx)*/ +" movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */ + +" movq 32(%%ecx), %%mm0\n\t" +" movq 64(%%ecx), %%mm2\n\t" +" movq %%mm0, %%mm1\n\t" +" movq %%mm2, %%mm3\n\t" +" pxor %%mm7, %%mm1\n\t" +" pxor %%mm7, %%mm3\n\t" +" pfacc %%mm1, %%mm0\n\t" +" pfacc %%mm3, %%mm2\n\t" +" pfmul %%mm6, %%mm0\n\t" +" pfmul %%mm6, %%mm2\n\t" +" movq %%mm0, 32(%%edx)\n\t" +" movq %%mm2, 64(%%edx)\n\t" + +" movd 44(%%ecx), %%mm0\n\t" +" movd 40(%%ecx), %%mm2\n\t" +" movd 120(%%ebx), %%mm3\n\t" +" punpckldq 76(%%ecx), %%mm0\n\t" +" punpckldq 72(%%ecx), %%mm2\n\t" +" punpckldq %%mm3, %%mm3\n\t" +" movq %%mm0, %%mm4\n\t" +" movq %%mm2, %%mm5\n\t" +" pfsub %%mm2, %%mm0\n\t" +" pfmul %%mm3, %%mm0\n\t" +" movq %%mm0, %%mm1\n\t" +" pfadd %%mm5, %%mm0\n\t" +" pfadd %%mm4, %%mm0\n\t" +" movq %%mm0, %%mm2\n\t" +" punpckldq %%mm1, %%mm0\n\t" +" punpckhdq %%mm1, %%mm2\n\t" +" movq %%mm0, 40(%%edx)\n\t" +" movq %%mm2, 72(%%edx)\n\t" + +" movd 48(%%ecx), %%mm3\n\t" +" movd 60(%%ecx), %%mm2\n\t" +" pfsub 52(%%ecx), %%mm3\n\t" +" pfsub 56(%%ecx), %%mm2\n\t" +" pfmul 120(%%ebx), %%mm3\n\t" +" pfmul 120(%%ebx), %%mm2\n\t" +" movq %%mm2, %%mm1\n\t" + +" pfadd 56(%%ecx), %%mm1\n\t" +" pfadd 60(%%ecx), %%mm1\n\t" +" movq %%mm1, %%mm0\n\t" + +" pfadd 48(%%ecx), %%mm0\n\t" +" pfadd 52(%%ecx), %%mm0\n\t" +" pfadd %%mm3, %%mm1\n\t" +" punpckldq %%mm2, %%mm1\n\t" +" pfadd %%mm3, %%mm2\n\t" +" punpckldq %%mm2, %%mm0\n\t" +" movq %%mm1, 56(%%edx)\n\t" +" movq %%mm0, 48(%%edx)\n\t" /*---*/ -" movd 92(%%ecx), %%mm1\n\t" -" pfsub 88(%%ecx), %%mm1\n\t" -" pfmul 120(%%ebx), %%mm1\n\t" -" movd %%mm1, 92(%%edx)\n\t" -" pfadd 92(%%ecx), %%mm1\n\t" -" pfadd 88(%%ecx), %%mm1\n\t" -" movq %%mm1, %%mm0\n\t" - -" pfadd 80(%%ecx), %%mm0\n\t" -" pfadd 84(%%ecx), %%mm0\n\t" -" movd %%mm0, 80(%%edx)\n\t" - -" movd 80(%%ecx), %%mm0\n\t" -" pfsub 84(%%ecx), %%mm0\n\t" -" pfmul 120(%%ebx), %%mm0\n\t" -" pfadd %%mm0, %%mm1\n\t" -" pfadd 92(%%edx), %%mm0\n\t" -" punpckldq %%mm1, %%mm0\n\t" -" movq %%mm0, 84(%%edx)\n\t" - -" movq 96(%%ecx), %%mm0\n\t" -" movq %%mm0, %%mm1\n\t" -" pxor %%mm7, %%mm1\n\t" -" pfacc %%mm1, %%mm0\n\t" -" pfmul %%mm6, %%mm0\n\t" -" movq %%mm0, 96(%%edx)\n\t" - -" movd 108(%%ecx), %%mm0\n\t" -" pfsub 104(%%ecx), %%mm0\n\t" -" pfmul 120(%%ebx), %%mm0\n\t" -" movd %%mm0, 108(%%edx)\n\t" -" pfadd 104(%%ecx), %%mm0\n\t" -" pfadd 108(%%ecx), %%mm0\n\t" -" movd %%mm0, 104(%%edx)\n\t" - -" movd 124(%%ecx), %%mm1\n\t" -" pfsub 120(%%ecx), %%mm1\n\t" -" pfmul 120(%%ebx), %%mm1\n\t" -" movd %%mm1, 124(%%edx)\n\t" -" pfadd 120(%%ecx), %%mm1\n\t" -" pfadd 124(%%ecx), %%mm1\n\t" -" movq %%mm1, %%mm0\n\t" - -" pfadd 112(%%ecx), %%mm0\n\t" -" pfadd 116(%%ecx), %%mm0\n\t" -" movd %%mm0, 112(%%edx)\n\t" - -" movd 112(%%ecx), %%mm0\n\t" -" pfsub 116(%%ecx), %%mm0\n\t" -" pfmul 120(%%ebx), %%mm0\n\t" -" pfadd %%mm0,%%mm1\n\t" -" pfadd 124(%%edx), %%mm0\n\t" -" punpckldq %%mm1, %%mm0\n\t" -" movq %%mm0, 116(%%edx)\n\t" +" movd 9