summaryrefslogtreecommitdiffstats
path: root/mp3lib
diff options
context:
space:
mode:
authorUoti Urpala <uau@glyph.nonexistent.invalid>2010-04-26 18:25:34 +0300
committerUoti Urpala <uau@glyph.nonexistent.invalid>2010-04-26 18:25:34 +0300
commit7795726e0f8c70edd6ecde7fd2137214af302f4f (patch)
tree87a087e69a0e2912183736de409676f824fb2248 /mp3lib
parentba3b65b92f3f822fa75b0210b841557f5b20f6d1 (diff)
parente16f02fe4001f3056b8efd1a099a563569b73f5d (diff)
downloadmpv-7795726e0f8c70edd6ecde7fd2137214af302f4f.tar.bz2
mpv-7795726e0f8c70edd6ecde7fd2137214af302f4f.tar.xz
Merge svn changes up to r31033
Diffstat (limited to 'mp3lib')
-rw-r--r--mp3lib/dct36.c28
-rw-r--r--mp3lib/dct36_3dnow.c902
-rw-r--r--mp3lib/dct64_3dnow.c1666
-rw-r--r--mp3lib/dct64_k7.c1404
-rw-r--r--mp3lib/dct64_mmx.c1892
-rw-r--r--mp3lib/decod386.c10
-rw-r--r--mp3lib/decode_i586.c6
-rw-r--r--mp3lib/decode_mmx.c382
-rw-r--r--mp3lib/equalizer.c119
-rw-r--r--mp3lib/l2tables.h258
-rw-r--r--mp3lib/layer1.c2
-rw-r--r--mp3lib/layer2.c6
-rw-r--r--mp3lib/layer3.c28
-rw-r--r--mp3lib/mpg123.h4
-rw-r--r--mp3lib/sr1.c36
15 files changed, 3371 insertions, 3372 deletions
diff --git a/mp3lib/dct36.c b/mp3lib/dct36.c
index b884bb949d..22ec273277 100644
--- a/mp3lib/dct36.c
+++ b/mp3lib/dct36.c
@@ -189,21 +189,21 @@ static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf)
ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)];
#define MACRO1(v) { \
- real sum0,sum1; \
+ real sum0, sum1; \
sum0 = tmp1a + tmp2a; \
- sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \
- MACRO0(v); }
+ sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \
+ MACRO0(v); }
#define MACRO2(v) { \
- real sum0,sum1; \
+ real sum0, sum1; \
sum0 = tmp2a - tmp1a; \
sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \
- MACRO0(v); }
+ MACRO0(v); }
register const real *c = COS9;
register real *out2 = o2;
- register real *w = wintab;
- register real *out1 = o1;
- register real *ts = tsbuf;
+ register real *w = wintab;
+ register real *out1 = o1;
+ register real *ts = tsbuf;
real ta33,ta66,tb33,tb66;
@@ -256,12 +256,12 @@ static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf)
MACRO2(5);
}
- {
- real sum0,sum1;
- sum0 = in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0];
- sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4];
- MACRO0(4);
- }
+ {
+ real sum0,sum1;
+ sum0 = in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0];
+ sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4];
+ MACRO0(4);
+ }
}
#endif
diff --git a/mp3lib/dct36_3dnow.c b/mp3lib/dct36_3dnow.c
index 8263639149..68cc33014b 100644
--- a/mp3lib/dct36_3dnow.c
+++ b/mp3lib/dct36_3dnow.c
@@ -40,462 +40,462 @@ void dct36_3dnow(real *inbuf, real *o1,
#endif
{
__asm__ volatile(
- "movq (%%eax),%%mm0\n\t"
- "movq 4(%%eax),%%mm1\n\t"
- "pfadd %%mm1,%%mm0\n\t"
- "movq %%mm0,4(%%eax)\n\t"
- "psrlq $32,%%mm1\n\t"
- "movq 12(%%eax),%%mm2\n\t"
- "punpckldq %%mm2,%%mm1\n\t"
- "pfadd %%mm2,%%mm1\n\t"
- "movq %%mm1,12(%%eax)\n\t"
- "psrlq $32,%%mm2\n\t"
- "movq 20(%%eax),%%mm3\n\t"
- "punpckldq %%mm3,%%mm2\n\t"
- "pfadd %%mm3,%%mm2\n\t"
- "movq %%mm2,20(%%eax)\n\t"
- "psrlq $32,%%mm3\n\t"
- "movq 28(%%eax),%%mm4\n\t"
- "punpckldq %%mm4,%%mm3\n\t"
- "pfadd %%mm4,%%mm3\n\t"
- "movq %%mm3,28(%%eax)\n\t"
- "psrlq $32,%%mm4\n\t"
- "movq 36(%%eax),%%mm5\n\t"
- "punpckldq %%mm5,%%mm4\n\t"
- "pfadd %%mm5,%%mm4\n\t"
- "movq %%mm4,36(%%eax)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movq 44(%%eax),%%mm6\n\t"
- "punpckldq %%mm6,%%mm5\n\t"
- "pfadd %%mm6,%%mm5\n\t"
- "movq %%mm5,44(%%eax)\n\t"
- "psrlq $32,%%mm6\n\t"
- "movq 52(%%eax),%%mm7\n\t"
- "punpckldq %%mm7,%%mm6\n\t"
- "pfadd %%mm7,%%mm6\n\t"
- "movq %%mm6,52(%%eax)\n\t"
- "psrlq $32,%%mm7\n\t"
- "movq 60(%%eax),%%mm0\n\t"
- "punpckldq %%mm0,%%mm7\n\t"
- "pfadd %%mm0,%%mm7\n\t"
- "movq %%mm7,60(%%eax)\n\t"
- "psrlq $32,%%mm0\n\t"
- "movd 68(%%eax),%%mm1\n\t"
- "pfadd %%mm1,%%mm0\n\t"
- "movd %%mm0,68(%%eax)\n\t"
- "movd 4(%%eax),%%mm0\n\t"
- "movd 12(%%eax),%%mm1\n\t"
- "punpckldq %%mm1,%%mm0\n\t"
- "punpckldq 20(%%eax),%%mm1\n\t"
- "pfadd %%mm1,%%mm0\n\t"
- "movd %%mm0,12(%%eax)\n\t"
- "psrlq $32,%%mm0\n\t"
- "movd %%mm0,20(%%eax)\n\t"
- "psrlq $32,%%mm1\n\t"
- "movd 28(%%eax),%%mm2\n\t"
- "punpckldq %%mm2,%%mm1\n\t"
- "punpckldq 36(%%eax),%%mm2\n\t"
- "pfadd %%mm2,%%mm1\n\t"
- "movd %%mm1,28(%%eax)\n\t"
- "psrlq $32,%%mm1\n\t"
- "movd %%mm1,36(%%eax)\n\t"
- "psrlq $32,%%mm2\n\t"
- "movd 44(%%eax),%%mm3\n\t"
- "punpckldq %%mm3,%%mm2\n\t"
- "punpckldq 52(%%eax),%%mm3\n\t"
- "pfadd %%mm3,%%mm2\n\t"
- "movd %%mm2,44(%%eax)\n\t"
- "psrlq $32,%%mm2\n\t"
- "movd %%mm2,52(%%eax)\n\t"
- "psrlq $32,%%mm3\n\t"
- "movd 60(%%eax),%%mm4\n\t"
- "punpckldq %%mm4,%%mm3\n\t"
- "punpckldq 68(%%eax),%%mm4\n\t"
- "pfadd %%mm4,%%mm3\n\t"
- "movd %%mm3,60(%%eax)\n\t"
- "psrlq $32,%%mm3\n\t"
- "movd %%mm3,68(%%eax)\n\t"
+ "movq (%%eax),%%mm0\n\t"
+ "movq 4(%%eax),%%mm1\n\t"
+ "pfadd %%mm1,%%mm0\n\t"
+ "movq %%mm0,4(%%eax)\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "movq 12(%%eax),%%mm2\n\t"
+ "punpckldq %%mm2,%%mm1\n\t"
+ "pfadd %%mm2,%%mm1\n\t"
+ "movq %%mm1,12(%%eax)\n\t"
+ "psrlq $32,%%mm2\n\t"
+ "movq 20(%%eax),%%mm3\n\t"
+ "punpckldq %%mm3,%%mm2\n\t"
+ "pfadd %%mm3,%%mm2\n\t"
+ "movq %%mm2,20(%%eax)\n\t"
+ "psrlq $32,%%mm3\n\t"
+ "movq 28(%%eax),%%mm4\n\t"
+ "punpckldq %%mm4,%%mm3\n\t"
+ "pfadd %%mm4,%%mm3\n\t"
+ "movq %%mm3,28(%%eax)\n\t"
+ "psrlq $32,%%mm4\n\t"
+ "movq 36(%%eax),%%mm5\n\t"
+ "punpckldq %%mm5,%%mm4\n\t"
+ "pfadd %%mm5,%%mm4\n\t"
+ "movq %%mm4,36(%%eax)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movq 44(%%eax),%%mm6\n\t"
+ "punpckldq %%mm6,%%mm5\n\t"
+ "pfadd %%mm6,%%mm5\n\t"
+ "movq %%mm5,44(%%eax)\n\t"
+ "psrlq $32,%%mm6\n\t"
+ "movq 52(%%eax),%%mm7\n\t"
+ "punpckldq %%mm7,%%mm6\n\t"
+ "pfadd %%mm7,%%mm6\n\t"
+ "movq %%mm6,52(%%eax)\n\t"
+ "psrlq $32,%%mm7\n\t"
+ "movq 60(%%eax),%%mm0\n\t"
+ "punpckldq %%mm0,%%mm7\n\t"
+ "pfadd %%mm0,%%mm7\n\t"
+ "movq %%mm7,60(%%eax)\n\t"
+ "psrlq $32,%%mm0\n\t"
+ "movd 68(%%eax),%%mm1\n\t"
+ "pfadd %%mm1,%%mm0\n\t"
+ "movd %%mm0,68(%%eax)\n\t"
+ "movd 4(%%eax),%%mm0\n\t"
+ "movd 12(%%eax),%%mm1\n\t"
+ "punpckldq %%mm1,%%mm0\n\t"
+ "punpckldq 20(%%eax),%%mm1\n\t"
+ "pfadd %%mm1,%%mm0\n\t"
+ "movd %%mm0,12(%%eax)\n\t"
+ "psrlq $32,%%mm0\n\t"
+ "movd %%mm0,20(%%eax)\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "movd 28(%%eax),%%mm2\n\t"
+ "punpckldq %%mm2,%%mm1\n\t"
+ "punpckldq 36(%%eax),%%mm2\n\t"
+ "pfadd %%mm2,%%mm1\n\t"
+ "movd %%mm1,28(%%eax)\n\t"
+ "psrlq $32,%%mm1\n\t"
+ "movd %%mm1,36(%%eax)\n\t"
+ "psrlq $32,%%mm2\n\t"
+ "movd 44(%%eax),%%mm3\n\t"
+ "punpckldq %%mm3,%%mm2\n\t"
+ "punpckldq 52(%%eax),%%mm3\n\t"
+ "pfadd %%mm3,%%mm2\n\t"
+ "movd %%mm2,44(%%eax)\n\t"
+ "psrlq $32,%%mm2\n\t"
+ "movd %%mm2,52(%%eax)\n\t"
+ "psrlq $32,%%mm3\n\t"
+ "movd 60(%%eax),%%mm4\n\t"
+ "punpckldq %%mm4,%%mm3\n\t"
+ "punpckldq 68(%%eax),%%mm4\n\t"
+ "pfadd %%mm4,%%mm3\n\t"
+ "movd %%mm3,60(%%eax)\n\t"
+ "psrlq $32,%%mm3\n\t"
+ "movd %%mm3,68(%%eax)\n\t"
- "movq 24(%%eax),%%mm0\n\t"
- "movq 48(%%eax),%%mm1\n\t"
- "movd "MANGLE(COS9)"+12,%%mm2\n\t"
- "punpckldq %%mm2,%%mm2\n\t"
- "movd "MANGLE(COS9)"+24,%%mm3\n\t"
- "punpckldq %%mm3,%%mm3\n\t"
- "pfmul %%mm2,%%mm0\n\t"
- "pfmul %%mm3,%%mm1\n\t"
- "pushl %%eax\n\t"
- "movl $1,%%eax\n\t"
- "movd %%eax,%%mm7\n\t"
- "pi2fd %%mm7,%%mm7\n\t"
- "popl %%eax\n\t"
- "movq 8(%%eax),%%mm2\n\t"
- "movd "MANGLE(COS9)"+4,%%mm3\n\t"
- "punpckldq %%mm3,%%mm3\n\t"
- "pfmul %%mm3,%%mm2\n\t"
- "pfadd %%mm0,%%mm2\n\t"
- "movq 40(%%eax),%%mm3\n\t"
- "movd "MANGLE(COS9)"+20,%%mm4\n\t"
- "punpckldq %%mm4,%%mm4\n\t"
- "pfmul %%mm4,%%mm3\n\t"
- "pfadd %%mm3,%%mm2\n\t"
- "movq 56(%%eax),%%mm3\n\t"
- "movd "MANGLE(COS9)"+28,%%mm4\n\t"
- "punpckldq %%mm4,%%mm4\n\t"
- "pfmul %%mm4,%%mm3\n\t"
- "pfadd %%mm3,%%mm2\n\t"
- "movq (%%eax),%%mm3\n\t"
- "movq 16(%%eax),%%mm4\n\t"
- "movd "MANGLE(COS9)"+8,%%mm5\n\t"
- "punpckldq %%mm5,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "pfadd %%mm4,%%mm3\n\t"
- "movq 32(%%eax),%%mm4\n\t"
- "movd "MANGLE(COS9)"+16,%%mm5\n\t"
- "punpckldq %%mm5,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "pfadd %%mm4,%%mm3\n\t"
- "pfadd %%mm1,%%mm3\n\t"
- "movq 64(%%eax),%%mm4\n\t"
- "movd "MANGLE(COS9)"+32,%%mm5\n\t"
- "punpckldq %%mm5,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "pfadd %%mm4,%%mm3\n\t"
- "movq %%mm2,%%mm4\n\t"
- "pfadd %%mm3,%%mm4\n\t"
- "movq %%mm7,%%mm5\n\t"
- "punpckldq "MANGLE(tfcos36)"+0,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "movq %%mm4,%%mm5\n\t"
- "pfacc %%mm5,%%mm5\n\t"
- "movd 108(%%edx),%%mm6\n\t"
- "punpckldq 104(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
+ "movq 24(%%eax),%%mm0\n\t"
+ "movq 48(%%eax),%%mm1\n\t"
+ "movd "MANGLE(COS9)"+12,%%mm2\n\t"
+ "punpckldq %%mm2,%%mm2\n\t"
+ "movd "MANGLE(COS9)"+24,%%mm3\n\t"
+ "punpckldq %%mm3,%%mm3\n\t"
+ "pfmul %%mm2,%%mm0\n\t"
+ "pfmul %%mm3,%%mm1\n\t"
+ "pushl %%eax\n\t"
+ "movl $1,%%eax\n\t"
+ "movd %%eax,%%mm7\n\t"
+ "pi2fd %%mm7,%%mm7\n\t"
+ "popl %%eax\n\t"
+ "movq 8(%%eax),%%mm2\n\t"
+ "movd "MANGLE(COS9)"+4,%%mm3\n\t"
+ "punpckldq %%mm3,%%mm3\n\t"
+ "pfmul %%mm3,%%mm2\n\t"
+ "pfadd %%mm0,%%mm2\n\t"
+ "movq 40(%%eax),%%mm3\n\t"
+ "movd "MANGLE(COS9)"+20,%%mm4\n\t"
+ "punpckldq %%mm4,%%mm4\n\t"
+ "pfmul %%mm4,%%mm3\n\t"
+ "pfadd %%mm3,%%mm2\n\t"
+ "movq 56(%%eax),%%mm3\n\t"
+ "movd "MANGLE(COS9)"+28,%%mm4\n\t"
+ "punpckldq %%mm4,%%mm4\n\t"
+ "pfmul %%mm4,%%mm3\n\t"
+ "pfadd %%mm3,%%mm2\n\t"
+ "movq (%%eax),%%mm3\n\t"
+ "movq 16(%%eax),%%mm4\n\t"
+ "movd "MANGLE(COS9)"+8,%%mm5\n\t"
+ "punpckldq %%mm5,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "pfadd %%mm4,%%mm3\n\t"
+ "movq 32(%%eax),%%mm4\n\t"
+ "movd "MANGLE(COS9)"+16,%%mm5\n\t"
+ "punpckldq %%mm5,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "pfadd %%mm4,%%mm3\n\t"
+ "pfadd %%mm1,%%mm3\n\t"
+ "movq 64(%%eax),%%mm4\n\t"
+ "movd "MANGLE(COS9)"+32,%%mm5\n\t"
+ "punpckldq %%mm5,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "pfadd %%mm4,%%mm3\n\t"
+ "movq %%mm2,%%mm4\n\t"
+ "pfadd %%mm3,%%mm4\n\t"
+ "movq %%mm7,%%mm5\n\t"
+ "punpckldq "MANGLE(tfcos36)"+0,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "pfacc %%mm5,%%mm5\n\t"
+ "movd 108(%%edx),%%mm6\n\t"
+ "punpckldq 104(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
#ifdef DCT36_OPTIMIZE_FOR_K7
- "pswapd %%mm5,%%mm5\n\t"
- "movq %%mm5,32(%%ecx)\n\t"
+ "pswapd %%mm5,%%mm5\n\t"
+ "movq %%mm5,32(%%ecx)\n\t"
#else
- "movd %%mm5,36(%%ecx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,32(%%ecx)\n\t"
+ "movd %%mm5,36(%%ecx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,32(%%ecx)\n\t"
#endif
- "movq %%mm4,%%mm6\n\t"
- "punpckldq %%mm6,%%mm5\n\t"
- "pfsub %%mm6,%%mm5\n\t"
- "punpckhdq %%mm5,%%mm5\n\t"
- "movd 32(%%edx),%%mm6\n\t"
- "punpckldq 36(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd 32(%%esi),%%mm6\n\t"
- "punpckldq 36(%%esi),%%mm6\n\t"
- "pfadd %%mm6,%%mm5\n\t"
- "movd %%mm5,1024(%%ebx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,1152(%%ebx)\n\t"
- "movq %%mm3,%%mm4\n\t"
- "pfsub %%mm2,%%mm4\n\t"
- "movq %%mm7,%%mm5\n\t"
- "punpckldq "MANGLE(tfcos36)"+32,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "movq %%mm4,%%mm5\n\t"
- "pfacc %%mm5,%%mm5\n\t"
- "movd 140(%%edx),%%mm6\n\t"
- "punpckldq 72(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd %%mm5,68(%%ecx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,0(%%ecx)\n\t"
- "movq %%mm4,%%mm6\n\t"
- "punpckldq %%mm6,%%mm5\n\t"
- "pfsub %%mm6,%%mm5\n\t"
- "punpckhdq %%mm5,%%mm5\n\t"
- "movd 0(%%edx),%%mm6\n\t"
- "punpckldq 68(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd 0(%%esi),%%mm6\n\t"
- "punpckldq 68(%%esi),%%mm6\n\t"
- "pfadd %%mm6,%%mm5\n\t"
- "movd %%mm5,0(%%ebx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,2176(%%ebx)\n\t"
- "movq 8(%%eax),%%mm2\n\t"
- "movq 40(%%eax),%%mm3\n\t"
- "pfsub %%mm3,%%mm2\n\t"
- "movq 56(%%eax),%%mm3\n\t"
- "pfsub %%mm3,%%mm2\n\t"
- "movd "MANGLE(COS9)"+12,%%mm3\n\t"
- "punpckldq %%mm3,%%mm3\n\t"
- "pfmul %%mm3,%%mm2\n\t"
- "movq 16(%%eax),%%mm3\n\t"
- "movq 32(%%eax),%%mm4\n\t"
- "pfsub %%mm4,%%mm3\n\t"
- "movq 64(%%eax),%%mm4\n\t"
- "pfsub %%mm4,%%mm3\n\t"
- "movd "MANGLE(COS9)"+24,%%mm4\n\t"
- "punpckldq %%mm4,%%mm4\n\t"
- "pfmul %%mm4,%%mm3\n\t"
- "movq 48(%%eax),%%mm4\n\t"
- "pfsub %%mm4,%%mm3\n\t"
- "movq (%%eax),%%mm4\n\t"
- "pfadd %%mm4,%%mm3\n\t"
- "movq %%mm2,%%mm4\n\t"
- "pfadd %%mm3,%%mm4\n\t"
- "movq %%mm7,%%mm5\n\t"
- "punpckldq "MANGLE(tfcos36)"+4,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "movq %%mm4,%%mm5\n\t"
- "pfacc %%mm5,%%mm5\n\t"
- "movd 112(%%edx),%%mm6\n\t"
- "punpckldq 100(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd %%mm5,40(%%ecx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,28(%%ecx)\n\t"
- "movq %%mm4,%%mm6\n\t"
- "punpckldq %%mm6,%%mm5\n\t"
- "pfsub %%mm6,%%mm5\n\t"
- "punpckhdq %%mm5,%%mm5\n\t"
- "movd 28(%%edx),%%mm6\n\t"
- "punpckldq 40(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd 28(%%esi),%%mm6\n\t"
- "punpckldq 40(%%esi),%%mm6\n\t"
- "pfadd %%mm6,%%mm5\n\t"
- "movd %%mm5,896(%%ebx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,1280(%%ebx)\n\t"
- "movq %%mm3,%%mm4\n\t"
- "pfsub %%mm2,%%mm4\n\t"
- "movq %%mm7,%%mm5\n\t"
- "punpckldq "MANGLE(tfcos36)"+28,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "movq %%mm4,%%mm5\n\t"
- "pfacc %%mm5,%%mm5\n\t"
- "movd 136(%%edx),%%mm6\n\t"
- "punpckldq 76(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd %%mm5,64(%%ecx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,4(%%ecx)\n\t"
- "movq %%mm4,%%mm6\n\t"
- "punpckldq %%mm6,%%mm5\n\t"
- "pfsub %%mm6,%%mm5\n\t"
- "punpckhdq %%mm5,%%mm5\n\t"
- "movd 4(%%edx),%%mm6\n\t"
- "punpckldq 64(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd 4(%%esi),%%mm6\n\t"
- "punpckldq 64(%%esi),%%mm6\n\t"
- "pfadd %%mm6,%%mm5\n\t"
- "movd %%mm5,128(%%ebx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,2048(%%ebx)\n\t"
+ "movq %%mm4,%%mm6\n\t"
+ "punpckldq %%mm6,%%mm5\n\t"
+ "pfsub %%mm6,%%mm5\n\t"
+ "punpckhdq %%mm5,%%mm5\n\t"
+ "movd 32(%%edx),%%mm6\n\t"
+ "punpckldq 36(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd 32(%%esi),%%mm6\n\t"
+ "punpckldq 36(%%esi),%%mm6\n\t"
+ "pfadd %%mm6,%%mm5\n\t"
+ "movd %%mm5,1024(%%ebx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,1152(%%ebx)\n\t"
+ "movq %%mm3,%%mm4\n\t"
+ "pfsub %%mm2,%%mm4\n\t"
+ "movq %%mm7,%%mm5\n\t"
+ "punpckldq "MANGLE(tfcos36)"+32,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "pfacc %%mm5,%%mm5\n\t"
+ "movd 140(%%edx),%%mm6\n\t"
+ "punpckldq 72(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd %%mm5,68(%%ecx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,0(%%ecx)\n\t"
+ "movq %%mm4,%%mm6\n\t"
+ "punpckldq %%mm6,%%mm5\n\t"
+ "pfsub %%mm6,%%mm5\n\t"
+ "punpckhdq %%mm5,%%mm5\n\t"
+ "movd 0(%%edx),%%mm6\n\t"
+ "punpckldq 68(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd 0(%%esi),%%mm6\n\t"
+ "punpckldq 68(%%esi),%%mm6\n\t"
+ "pfadd %%mm6,%%mm5\n\t"
+ "movd %%mm5,0(%%ebx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,2176(%%ebx)\n\t"
+ "movq 8(%%eax),%%mm2\n\t"
+ "movq 40(%%eax),%%mm3\n\t"
+ "pfsub %%mm3,%%mm2\n\t"
+ "movq 56(%%eax),%%mm3\n\t"
+ "pfsub %%mm3,%%mm2\n\t"
+ "movd "MANGLE(COS9)"+12,%%mm3\n\t"
+ "punpckldq %%mm3,%%mm3\n\t"
+ "pfmul %%mm3,%%mm2\n\t"
+ "movq 16(%%eax),%%mm3\n\t"
+ "movq 32(%%eax),%%mm4\n\t"
+ "pfsub %%mm4,%%mm3\n\t"
+ "movq 64(%%eax),%%mm4\n\t"
+ "pfsub %%mm4,%%mm3\n\t"
+ "movd "MANGLE(COS9)"+24,%%mm4\n\t"
+ "punpckldq %%mm4,%%mm4\n\t"
+ "pfmul %%mm4,%%mm3\n\t"
+ "movq 48(%%eax),%%mm4\n\t"
+ "pfsub %%mm4,%%mm3\n\t"
+ "movq (%%eax),%%mm4\n\t"
+ "pfadd %%mm4,%%mm3\n\t"
+ "movq %%mm2,%%mm4\n\t"
+ "pfadd %%mm3,%%mm4\n\t"
+ "movq %%mm7,%%mm5\n\t"
+ "punpckldq "MANGLE(tfcos36)"+4,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "pfacc %%mm5,%%mm5\n\t"
+ "movd 112(%%edx),%%mm6\n\t"
+ "punpckldq 100(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd %%mm5,40(%%ecx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,28(%%ecx)\n\t"
+ "movq %%mm4,%%mm6\n\t"
+ "punpckldq %%mm6,%%mm5\n\t"
+ "pfsub %%mm6,%%mm5\n\t"
+ "punpckhdq %%mm5,%%mm5\n\t"
+ "movd 28(%%edx),%%mm6\n\t"
+ "punpckldq 40(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd 28(%%esi),%%mm6\n\t"
+ "punpckldq 40(%%esi),%%mm6\n\t"
+ "pfadd %%mm6,%%mm5\n\t"
+ "movd %%mm5,896(%%ebx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,1280(%%ebx)\n\t"
+ "movq %%mm3,%%mm4\n\t"
+ "pfsub %%mm2,%%mm4\n\t"
+ "movq %%mm7,%%mm5\n\t"
+ "punpckldq "MANGLE(tfcos36)"+28,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "pfacc %%mm5,%%mm5\n\t"
+ "movd 136(%%edx),%%mm6\n\t"
+ "punpckldq 76(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd %%mm5,64(%%ecx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,4(%%ecx)\n\t"
+ "movq %%mm4,%%mm6\n\t"
+ "punpckldq %%mm6,%%mm5\n\t"
+ "pfsub %%mm6,%%mm5\n\t"
+ "punpckhdq %%mm5,%%mm5\n\t"
+ "movd 4(%%edx),%%mm6\n\t"
+ "punpckldq 64(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd 4(%%esi),%%mm6\n\t"
+ "punpckldq 64(%%esi),%%mm6\n\t"
+ "pfadd %%mm6,%%mm5\n\t"
+ "movd %%mm5,128(%%ebx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,2048(%%ebx)\n\t"
- "movq 8(%%eax),%%mm2\n\t"
- "movd "MANGLE(COS9)"+20,%%mm3\n\t"
- "punpckldq %%mm3,%%mm3\n\t"
- "pfmul %%mm3,%%mm2\n\t"
- "pfsub %%mm0,%%mm2\n\t"
- "movq 40(%%eax),%%mm3\n\t"
- "movd "MANGLE(COS9)"+28,%%mm4\n\t"
- "punpckldq %%mm4,%%mm4\n\t"
- "pfmul %%mm4,%%mm3\n\t"
- "pfsub %%mm3,%%mm2\n\t"
- "movq 56(%%eax),%%mm3\n\t"
- "movd "MANGLE(COS9)"+4,%%mm4\n\t"
- "punpckldq %%mm4,%%mm4\n\t"
- "pfmul %%mm4,%%mm3\n\t"
- "pfadd %%mm3,%%mm2\n\t"
- "movq (%%eax),%%mm3\n\t"
- "movq 16(%%eax),%%mm4\n\t"
- "movd "MANGLE(COS9)"+32,%%mm5\n\t"
- "punpckldq %%mm5,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "pfsub %%mm4,%%mm3\n\t"
- "movq 32(%%eax),%%mm4\n\t"
- "movd "MANGLE(COS9)"+8,%%mm5\n\t"
- "punpckldq %%mm5,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "pfsub %%mm4,%%mm3\n\t"
- "pfadd %%mm1,%%mm3\n\t"
- "movq 64(%%eax),%%mm4\n\t"
- "movd "MANGLE(COS9)"+16,%%mm5\n\t"
- "punpckldq %%mm5,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "pfadd %%mm4,%%mm3\n\t"
- "movq %%mm2,%%mm4\n\t"
- "pfadd %%mm3,%%mm4\n\t"
- "movq %%mm7,%%mm5\n\t"
- "punpckldq "MANGLE(tfcos36)"+8,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "movq %%mm4,%%mm5\n\t"
- "pfacc %%mm5,%%mm5\n\t"
- "movd 116(%%edx),%%mm6\n\t"
- "punpckldq 96(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd %%mm5,44(%%ecx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,24(%%ecx)\n\t"
- "movq %%mm4,%%mm6\n\t"
- "punpckldq %%mm6,%%mm5\n\t"
- "pfsub %%mm6,%%mm5\n\t"
- "punpckhdq %%mm5,%%mm5\n\t"
- "movd 24(%%edx),%%mm6\n\t"
- "punpckldq 44(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd 24(%%esi),%%mm6\n\t"
- "punpckldq 44(%%esi),%%mm6\n\t"
- "pfadd %%mm6,%%mm5\n\t"
- "movd %%mm5,768(%%ebx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,1408(%%ebx)\n\t"
- "movq %%mm3,%%mm4\n\t"
- "pfsub %%mm2,%%mm4\n\t"
- "movq %%mm7,%%mm5\n\t"
- "punpckldq "MANGLE(tfcos36)"+24,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "movq %%mm4,%%mm5\n\t"
- "pfacc %%mm5,%%mm5\n\t"
- "movd 132(%%edx),%%mm6\n\t"
- "punpckldq 80(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd %%mm5,60(%%ecx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,8(%%ecx)\n\t"
- "movq %%mm4,%%mm6\n\t"
- "punpckldq %%mm6,%%mm5\n\t"
- "pfsub %%mm6,%%mm5\n\t"
- "punpckhdq %%mm5,%%mm5\n\t"
- "movd 8(%%edx),%%mm6\n\t"
- "punpckldq 60(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd 8(%%esi),%%mm6\n\t"
- "punpckldq 60(%%esi),%%mm6\n\t"
- "pfadd %%mm6,%%mm5\n\t"
- "movd %%mm5,256(%%ebx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,1920(%%ebx)\n\t"
- "movq 8(%%eax),%%mm2\n\t"
- "movd "MANGLE(COS9)"+28,%%mm3\n\t"
- "punpckldq %%mm3,%%mm3\n\t"
- "pfmul %%mm3,%%mm2\n\t"
- "pfsub %%mm0,%%mm2\n\t"
- "movq 40(%%eax),%%mm3\n\t"
- "movd "MANGLE(COS9)"+4,%%mm4\n\t"
- "punpckldq %%mm4,%%mm4\n\t"
- "pfmul %%mm4,%%mm3\n\t"
- "pfadd %%mm3,%%mm2\n\t"
- "movq 56(%%eax),%%mm3\n\t"
- "movd "MANGLE(COS9)"+20,%%mm4\n\t"
- "punpckldq %%mm4,%%mm4\n\t"
- "pfmul %%mm4,%%mm3\n\t"
- "pfsub %%mm3,%%mm2\n\t"
- "movq (%%eax),%%mm3\n\t"
- "movq 16(%%eax),%%mm4\n\t"
- "movd "MANGLE(COS9)"+16,%%mm5\n\t"
- "punpckldq %%mm5,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "pfsub %%mm4,%%mm3\n\t"
- "movq 32(%%eax),%%mm4\n\t"
- "movd "MANGLE(COS9)"+32,%%mm5\n\t"
- "punpckldq %%mm5,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "pfadd %%mm4,%%mm3\n\t"
- "pfadd %%mm1,%%mm3\n\t"
- "movq 64(%%eax),%%mm4\n\t"
- "movd "MANGLE(COS9)"+8,%%mm5\n\t"
- "punpckldq %%mm5,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "pfsub %%mm4,%%mm3\n\t"
- "movq %%mm2,%%mm4\n\t"
- "pfadd %%mm3,%%mm4\n\t"
- "movq %%mm7,%%mm5\n\t"
- "punpckldq "MANGLE(tfcos36)"+12,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "movq %%mm4,%%mm5\n\t"
- "pfacc %%mm5,%%mm5\n\t"
- "movd 120(%%edx),%%mm6\n\t"
- "punpckldq 92(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd %%mm5,48(%%ecx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,20(%%ecx)\n\t"
- "movq %%mm4,%%mm6\n\t"
- "punpckldq %%mm6,%%mm5\n\t"
- "pfsub %%mm6,%%mm5\n\t"
- "punpckhdq %%mm5,%%mm5\n\t"
- "movd 20(%%edx),%%mm6\n\t"
- "punpckldq 48(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd 20(%%esi),%%mm6\n\t"
- "punpckldq 48(%%esi),%%mm6\n\t"
- "pfadd %%mm6,%%mm5\n\t"
- "movd %%mm5,640(%%ebx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,1536(%%ebx)\n\t"
- "movq %%mm3,%%mm4\n\t"
- "pfsub %%mm2,%%mm4\n\t"
- "movq %%mm7,%%mm5\n\t"
- "punpckldq "MANGLE(tfcos36)"+20,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "movq %%mm4,%%mm5\n\t"
- "pfacc %%mm5,%%mm5\n\t"
- "movd 128(%%edx),%%mm6\n\t"
- "punpckldq 84(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd %%mm5,56(%%ecx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,12(%%ecx)\n\t"
- "movq %%mm4,%%mm6\n\t"
- "punpckldq %%mm6,%%mm5\n\t"
- "pfsub %%mm6,%%mm5\n\t"
- "punpckhdq %%mm5,%%mm5\n\t"
- "movd 12(%%edx),%%mm6\n\t"
- "punpckldq 56(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd 12(%%esi),%%mm6\n\t"
- "punpckldq 56(%%esi),%%mm6\n\t"
- "pfadd %%mm6,%%mm5\n\t"
- "movd %%mm5,384(%%ebx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,1792(%%ebx)\n\t"
+ "movq 8(%%eax),%%mm2\n\t"
+ "movd "MANGLE(COS9)"+20,%%mm3\n\t"
+ "punpckldq %%mm3,%%mm3\n\t"
+ "pfmul %%mm3,%%mm2\n\t"
+ "pfsub %%mm0,%%mm2\n\t"
+ "movq 40(%%eax),%%mm3\n\t"
+ "movd "MANGLE(COS9)"+28,%%mm4\n\t"
+ "punpckldq %%mm4,%%mm4\n\t"
+ "pfmul %%mm4,%%mm3\n\t"
+ "pfsub %%mm3,%%mm2\n\t"
+ "movq 56(%%eax),%%mm3\n\t"
+ "movd "MANGLE(COS9)"+4,%%mm4\n\t"
+ "punpckldq %%mm4,%%mm4\n\t"
+ "pfmul %%mm4,%%mm3\n\t"
+ "pfadd %%mm3,%%mm2\n\t"
+ "movq (%%eax),%%mm3\n\t"
+ "movq 16(%%eax),%%mm4\n\t"
+ "movd "MANGLE(COS9)"+32,%%mm5\n\t"
+ "punpckldq %%mm5,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "pfsub %%mm4,%%mm3\n\t"
+ "movq 32(%%eax),%%mm4\n\t"
+ "movd "MANGLE(COS9)"+8,%%mm5\n\t"
+ "punpckldq %%mm5,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "pfsub %%mm4,%%mm3\n\t"
+ "pfadd %%mm1,%%mm3\n\t"
+ "movq 64(%%eax),%%mm4\n\t"
+ "movd "MANGLE(COS9)"+16,%%mm5\n\t"
+ "punpckldq %%mm5,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "pfadd %%mm4,%%mm3\n\t"
+ "movq %%mm2,%%mm4\n\t"
+ "pfadd %%mm3,%%mm4\n\t"
+ "movq %%mm7,%%mm5\n\t"
+ "punpckldq "MANGLE(tfcos36)"+8,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "pfacc %%mm5,%%mm5\n\t"
+ "movd 116(%%edx),%%mm6\n\t"
+ "punpckldq 96(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd %%mm5,44(%%ecx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,24(%%ecx)\n\t"
+ "movq %%mm4,%%mm6\n\t"
+ "punpckldq %%mm6,%%mm5\n\t"
+ "pfsub %%mm6,%%mm5\n\t"
+ "punpckhdq %%mm5,%%mm5\n\t"
+ "movd 24(%%edx),%%mm6\n\t"
+ "punpckldq 44(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd 24(%%esi),%%mm6\n\t"
+ "punpckldq 44(%%esi),%%mm6\n\t"
+ "pfadd %%mm6,%%mm5\n\t"
+ "movd %%mm5,768(%%ebx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,1408(%%ebx)\n\t"
+ "movq %%mm3,%%mm4\n\t"
+ "pfsub %%mm2,%%mm4\n\t"
+ "movq %%mm7,%%mm5\n\t"
+ "punpckldq "MANGLE(tfcos36)"+24,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "pfacc %%mm5,%%mm5\n\t"
+ "movd 132(%%edx),%%mm6\n\t"
+ "punpckldq 80(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd %%mm5,60(%%ecx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,8(%%ecx)\n\t"
+ "movq %%mm4,%%mm6\n\t"
+ "punpckldq %%mm6,%%mm5\n\t"
+ "pfsub %%mm6,%%mm5\n\t"
+ "punpckhdq %%mm5,%%mm5\n\t"
+ "movd 8(%%edx),%%mm6\n\t"
+ "punpckldq 60(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd 8(%%esi),%%mm6\n\t"
+ "punpckldq 60(%%esi),%%mm6\n\t"
+ "pfadd %%mm6,%%mm5\n\t"
+ "movd %%mm5,256(%%ebx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,1920(%%ebx)\n\t"
+ "movq 8(%%eax),%%mm2\n\t"
+ "movd "MANGLE(COS9)"+28,%%mm3\n\t"
+ "punpckldq %%mm3,%%mm3\n\t"
+ "pfmul %%mm3,%%mm2\n\t"
+ "pfsub %%mm0,%%mm2\n\t"
+ "movq 40(%%eax),%%mm3\n\t"
+ "movd "MANGLE(COS9)"+4,%%mm4\n\t"
+ "punpckldq %%mm4,%%mm4\n\t"
+ "pfmul %%mm4,%%mm3\n\t"
+ "pfadd %%mm3,%%mm2\n\t"
+ "movq 56(%%eax),%%mm3\n\t"
+ "movd "MANGLE(COS9)"+20,%%mm4\n\t"
+ "punpckldq %%mm4,%%mm4\n\t"
+ "pfmul %%mm4,%%mm3\n\t"
+ "pfsub %%mm3,%%mm2\n\t"
+ "movq (%%eax),%%mm3\n\t"
+ "movq 16(%%eax),%%mm4\n\t"
+ "movd "MANGLE(COS9)"+16,%%mm5\n\t"
+ "punpckldq %%mm5,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "pfsub %%mm4,%%mm3\n\t"
+ "movq 32(%%eax),%%mm4\n\t"
+ "movd "MANGLE(COS9)"+32,%%mm5\n\t"
+ "punpckldq %%mm5,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "pfadd %%mm4,%%mm3\n\t"
+ "pfadd %%mm1,%%mm3\n\t"
+ "movq 64(%%eax),%%mm4\n\t"
+ "movd "MANGLE(COS9)"+8,%%mm5\n\t"
+ "punpckldq %%mm5,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "pfsub %%mm4,%%mm3\n\t"
+ "movq %%mm2,%%mm4\n\t"
+ "pfadd %%mm3,%%mm4\n\t"
+ "movq %%mm7,%%mm5\n\t"
+ "punpckldq "MANGLE(tfcos36)"+12,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "pfacc %%mm5,%%mm5\n\t"
+ "movd 120(%%edx),%%mm6\n\t"
+ "punpckldq 92(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd %%mm5,48(%%ecx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,20(%%ecx)\n\t"
+ "movq %%mm4,%%mm6\n\t"
+ "punpckldq %%mm6,%%mm5\n\t"
+ "pfsub %%mm6,%%mm5\n\t"
+ "punpckhdq %%mm5,%%mm5\n\t"
+ "movd 20(%%edx),%%mm6\n\t"
+ "punpckldq 48(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd 20(%%esi),%%mm6\n\t"
+ "punpckldq 48(%%esi),%%mm6\n\t"
+ "pfadd %%mm6,%%mm5\n\t"
+ "movd %%mm5,640(%%ebx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,1536(%%ebx)\n\t"
+ "movq %%mm3,%%mm4\n\t"
+ "pfsub %%mm2,%%mm4\n\t"
+ "movq %%mm7,%%mm5\n\t"
+ "punpckldq "MANGLE(tfcos36)"+20,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "pfacc %%mm5,%%mm5\n\t"
+ "movd 128(%%edx),%%mm6\n\t"
+ "punpckldq 84(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd %%mm5,56(%%ecx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,12(%%ecx)\n\t"
+ "movq %%mm4,%%mm6\n\t"
+ "punpckldq %%mm6,%%mm5\n\t"
+ "pfsub %%mm6,%%mm5\n\t"
+ "punpckhdq %%mm5,%%mm5\n\t"
+ "movd 12(%%edx),%%mm6\n\t"
+ "punpckldq 56(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd 12(%%esi),%%mm6\n\t"
+ "punpckldq 56(%%esi),%%mm6\n\t"
+ "pfadd %%mm6,%%mm5\n\t"
+ "movd %%mm5,384(%%ebx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,1792(%%ebx)\n\t"
- "movq (%%eax),%%mm4\n\t"
- "movq 16(%%eax),%%mm3\n\t"
- "pfsub %%mm3,%%mm4\n\t"
- "movq 32(%%eax),%%mm3\n\t"
- "pfadd %%mm3,%%mm4\n\t"
- "movq 48(%%eax),%%mm3\n\t"
- "pfsub %%mm3,%%mm4\n\t"
- "movq 64(%%eax),%%mm3\n\t"
- "pfadd %%mm3,%%mm4\n\t"
- "movq %%mm7,%%mm5\n\t"
- "punpckldq "MANGLE(tfcos36)"+16,%%mm5\n\t"
- "pfmul %%mm5,%%mm4\n\t"
- "movq %%mm4,%%mm5\n\t"
- "pfacc %%mm5,%%mm5\n\t"
- "movd 124(%%edx),%%mm6\n\t"
- "punpckldq 88(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd %%mm5,52(%%ecx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,16(%%ecx)\n\t"
- "movq %%mm4,%%mm6\n\t"
- "punpckldq %%mm6,%%mm5\n\t"
- "pfsub %%mm6,%%mm5\n\t"
- "punpckhdq %%mm5,%%mm5\n\t"
- "movd 16(%%edx),%%mm6\n\t"
- "punpckldq 52(%%edx),%%mm6\n\t"
- "pfmul %%mm6,%%mm5\n\t"
- "movd 16(%%esi),%%mm6\n\t"
- "punpckldq 52(%%esi),%%mm6\n\t"
- "pfadd %%mm6,%%mm5\n\t"
- "movd %%mm5,512(%%ebx)\n\t"
- "psrlq $32,%%mm5\n\t"
- "movd %%mm5,1664(%%ebx)\n\t"
+ "movq (%%eax),%%mm4\n\t"
+ "movq 16(%%eax),%%mm3\n\t"
+ "pfsub %%mm3,%%mm4\n\t"
+ "movq 32(%%eax),%%mm3\n\t"
+ "pfadd %%mm3,%%mm4\n\t"
+ "movq 48(%%eax),%%mm3\n\t"
+ "pfsub %%mm3,%%mm4\n\t"
+ "movq 64(%%eax),%%mm3\n\t"
+ "pfadd %%mm3,%%mm4\n\t"
+ "movq %%mm7,%%mm5\n\t"
+ "punpckldq "MANGLE(tfcos36)"+16,%%mm5\n\t"
+ "pfmul %%mm5,%%mm4\n\t"
+ "movq %%mm4,%%mm5\n\t"
+ "pfacc %%mm5,%%mm5\n\t"
+ "movd 124(%%edx),%%mm6\n\t"
+ "punpckldq 88(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd %%mm5,52(%%ecx)\n\t"
+ "psrlq $32,%%mm5\n\t"
+ "movd %%mm5,16(%%ecx)\n\t"
+ "movq %%mm4,%%mm6\n\t"
+ "punpckldq %%mm6,%%mm5\n\t"
+ "pfsub %%mm6,%%mm5\n\t"
+ "punpckhdq %%mm5,%%mm5\n\t"
+ "movd 16(%%edx),%%mm6\n\t"
+ "punpckldq 52(%%edx),%%mm6\n\t"
+ "pfmul %%mm6,%%mm5\n\t"
+ "movd 16(%%esi),%%mm6\n\t"