summaryrefslogtreecommitdiffstats
path: root/liba52
diff options
context:
space:
mode:
authornickols_k <nickols_k@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-05-26 15:43:54 +0000
committernickols_k <nickols_k@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-05-26 15:43:54 +0000
commitd687880b3e2aa6fa083b47ee666f0e1f9ad692de (patch)
treee0742ac2f8316450f7d93cf599317b9253aa878f /liba52
parentdf7072771e4e126aef5a7f5adf1e2be75f32a2d3 (diff)
downloadmpv-d687880b3e2aa6fa083b47ee666f0e1f9ad692de.tar.bz2
mpv-d687880b3e2aa6fa083b47ee666f0e1f9ad692de.tar.xz
Improvements
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@887 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'liba52')
-rw-r--r--liba52/srfftp_3dnow.h61
1 files changed, 54 insertions, 7 deletions
diff --git a/liba52/srfftp_3dnow.h b/liba52/srfftp_3dnow.h
index 721bfa720a..10f780b1a6 100644
--- a/liba52/srfftp_3dnow.h
+++ b/liba52/srfftp_3dnow.h
@@ -33,6 +33,8 @@
#ifndef SRFFTP_3DNOW_H__
#define SRFFTP_3DNOW_H__
+static float HSQRT2_3DNOW = 0.707106781188;
+
#ifdef HAVE_3DNOWEX
#define TRANS_FILL_MM6_MM7_3DNOW()\
asm(\
@@ -59,10 +61,10 @@
#endif
#ifdef HAVE_3DNOWEX
-#define PSWAP_MM(mm_base,mm_hlp) "pswapd "##mm_base","##mm_base" \n\t"
+#define PSWAP_MM(mm_base,mm_hlp) "pswapd "##mm_base","##mm_base"\n\t"
#else
#define PSWAP_MM(mm_base,mm_hlp)\
- "movq "##mm_base","##mm_hlp" \n\t"\
+ "movq "##mm_base","##mm_hlp"\n\t"\
"psrlq $32, "##mm_base"\n\t"\
"punpckldq "##mm_hlp","##mm_base"\n\t"
#endif
@@ -82,16 +84,16 @@
"movq %%mm0, %%mm4\n\t"/*v.re =-wTB[0].im + wTB[k*2].im;*/\
PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\
"movq %6, %%mm0\n\t" /* a1 = A0;*/\
- "movq %%mm0, %%mm1\n\t"\
- "pfadd %%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\
- "pfsub %%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\
- "movq %%mm0, %0\n\t"\
- "movq %%mm1, %1\n\t"\
"movq %7, %%mm2\n\t" /* a1 = A4;*/\
+ "movq %%mm0, %%mm1\n\t"\
"movq %%mm2, %%mm3\n\t"\
+ "pfadd %%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\
"pfadd %%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\
+ "pfsub %%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\
"pfsub %%mm4, %%mm3\n\t" /*A4 = a1 - v;*/\
+ "movq %%mm0, %0\n\t"\
"movq %%mm2, %3\n\t"\
+ "movq %%mm1, %1\n\t"\
"movq %%mm3, %2"\
:"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\
:"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\
@@ -99,4 +101,49 @@
asm volatile("femms":::"memory");\
}
+#define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
+{\
+ asm volatile("femms":::"memory");\
+ TRANS_FILL_MM6_MM7_3DNOW()\
+ asm(\
+ "movq %4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
+ "movq %%mm0, %%mm1\n\t"\
+ "pfmul %%mm7, %%mm1\n\t"\
+ "pfacc %%mm1, %%mm0\n\t"/*u.im = wTB[2].im - wTB[2].re; mm0 = u*/\
+ "movq %5, %%mm1\n\t" /*a.re = wTB[6].im - wTB[6].re; */\
+ "movq %%mm1, %%mm2\n\t"\
+ "pfmul %%mm7, %%mm1\n\t"\
+ "pfacc %%mm2, %%mm1\n\t"/*a.im = wTB[6].im + wTB[6].re; mm1 = a*/\
+ "movq %%mm1, %%mm2\n\t"\
+ "pfmul %%mm7, %%mm2\n\t"/*v.im = u.re - a.re;*/\
+ "movq %%mm0, %%mm3\n\t"/*v.re = u.im + a.im;*/\
+ "pfadd %%mm2, %%mm3\n\t"\
+ PSWAP_MM("%%mm3","%%mm2")/*mm3 = v*/\
+ "pfmul %%mm6, %%mm1\n\t"/*u.re = u.re + a.re;*/\
+ "pfadd %%mm1, %%mm0\n\t"/*u.im = u.im - a.im; mm0 = u*/\
+ "movd %8, %%mm2\n\t"\
+ "punpckldq %8, %%mm2\n\t"\
+ "pfmul %%mm2, %%mm3\n\t" /* v *= HSQRT2_3DNOW; */\
+ "pfmul %%mm2, %%mm0\n\t" /* u *= HSQRT2_3DNOW; */\
+ "movq %6, %%mm1\n\t" /* a1 = A2;*/\
+ "movq %%mm1, %%mm2\n\t"\
+ "pfadd %%mm0, %%mm1\n\t" /*A2 = a1 + u;*/\
+ "pfsub %%mm0, %%mm2\n\t" /*A2 = a1 - u;*/\
+ "movq %%mm1, %0\n\t"\
+ "movq %%mm2, %1\n\t"\
+ "movq %7, %%mm1\n\t" /* a1 = A6;*/\
+ "movq %%mm1, %%mm2\n\t"\
+ "movq %%mm3, %%mm4\n\t"\
+ "pfmul %%mm6, %%mm4\n\t"/*A6.re = a1.re + v.re;*/\
+ "pfadd %%mm4, %%mm1\n\t"/*A6.im = a1.im - v.im;*/\
+ "pfmul %%mm7, %%mm3\n\t"/*A14.re = a1.re - v.re;*/\
+ "pfadd %%mm3, %%mm2\n\t"/*A14.im = a1.im + v.im;*/\
+ "movq %%mm1, %2\n\t"\
+ "movq %%mm2, %3"\
+ :"=m"(A2), "=m"(A10), "=m"(A6), "=m"(A14)\
+ :"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\
+ :"memory");\
+ asm volatile("femms":::"memory");\
+}
+
#endif