summaryrefslogtreecommitdiffstats
path: root/mp3lib
diff options
context:
space:
mode:
authornickols_k <nickols_k@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-06-20 07:54:19 +0000
committernickols_k <nickols_k@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-06-20 07:54:19 +0000
commit1202129042d7f1dbe58953ddd2f3d771ed4379ac (patch)
tree4ac93dae0cbf799d663e40d2842b7717d081c75b /mp3lib
parentb267d6e357653f7fffba35ba666304b0af544452 (diff)
downloadmpv-1202129042d7f1dbe58953ddd2f3d771ed4379ac.tar.bz2
mpv-1202129042d7f1dbe58953ddd2f3d771ed4379ac.tar.xz
Better 3dnow! optimization
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1174 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'mp3lib')
-rw-r--r--mp3lib/dct64_k7.s31
1 files changed, 17 insertions, 14 deletions
diff --git a/mp3lib/dct64_k7.s b/mp3lib/dct64_k7.s
index 44e2cf74ed..6a82d618c4 100644
--- a/mp3lib/dct64_k7.s
+++ b/mp3lib/dct64_k7.s
@@ -9,6 +9,9 @@
/// (using memory reference as operand of instructions)
/// - Phase 6 is rewritten with mixing of cpu and mmx opcodes
/// - change function name for support 3DNowEx! automatic detect
+/// - negation of 3dnow reg was replaced with PXOR 0x800000000, MMi instead
+/// of PFMUL as it was suggested by athlon manual. (Two not separated PFMUL
+/// can not be paired, but PXOR can be).
///
/// note: because K7 processors are an aggresive out-of-order three-way
/// superscalar ones instruction order is not significand for them.
@@ -21,6 +24,11 @@
/// this program. Use it at your own risk.
///
+.data
+ .align 8
+plus_minus_3dnow: .long 0x00000000, 0x80000000
+
+.text
.globl dct64_3dnowex
.type dct64_3dnowex,@function
@@ -412,13 +420,8 @@ dct64_3dnowex:
movq %mm5, 120(%esi)
// 5
- movl $-1,%eax
- movd %eax,%mm1
+ movq plus_minus_3dnow, %mm0 /* mm0 = 1.0 | -1.0 */
movl $1,%eax
- movd %eax,%mm0
- / L | H
- punpckldq %mm1,%mm0
- pi2fd %mm0,%mm0 /* mm0 = 1.0 | -1.0 */
movd %eax,%mm1
pi2fd %mm1,%mm1
movl pnts+16,%eax
@@ -433,7 +436,7 @@ dct64_3dnowex:
movq 8(%esi),%mm4 /* mm4 = tmp2[2] | tmp2[3]*/
pfpnacc %mm4, %mm4
pswapd %mm4, %mm4 /* mm4 = tmp2[2]+tmp2[3]|tmp2[2]-tmp2[3]*/
- pfmul %mm0,%mm4 /* mm4 = tmp2[2]+tmp2[3]|tmp2[3]-tmp2[2]*/
+ pxor %mm0,%mm4 /* mm4 = tmp2[2]+tmp2[3]|tmp2[3]-tmp2[2]*/
pfmul %mm1,%mm4 /* mm4 = tmp2[2]+tmp2[3]|(tmp2[3]-tmp2[2])*cos0*/
movq %mm4,%mm5
psrlq $32,%mm5 /* mm5 = (tmp2[3]-tmp2[2])*cos0 */
@@ -449,7 +452,7 @@ dct64_3dnowex:
pfpnacc %mm4, %mm4
pswapd %mm4, %mm4
- pfmul %mm0,%mm4
+ pxor %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
@@ -470,7 +473,7 @@ dct64_3dnowex:
movq 40(%esi),%mm4
pfpnacc %mm4, %mm4
pswapd %mm4, %mm4
- pfmul %mm0,%mm4
+ pxor %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
@@ -484,7 +487,7 @@ dct64_3dnowex:
movq 56(%esi),%mm4
pfpnacc %mm4, %mm4
pswapd %mm4, %mm4
- pfmul %mm0,%mm4
+ pxor %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
@@ -504,7 +507,7 @@ dct64_3dnowex:
movq 72(%esi),%mm4
pfpnacc %mm4, %mm4
pswapd %mm4, %mm4
- pfmul %mm0,%mm4
+ pxor %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
@@ -518,7 +521,7 @@ dct64_3dnowex:
movq 88(%esi),%mm4
pfpnacc %mm4, %mm4
pswapd %mm4, %mm4
- pfmul %mm0,%mm4
+ pxor %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
@@ -538,7 +541,7 @@ dct64_3dnowex:
movq 104(%esi),%mm4
pfpnacc %mm4, %mm4
pswapd %mm4, %mm4
- pfmul %mm0,%mm4
+ pxor %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5
@@ -552,7 +555,7 @@ dct64_3dnowex:
movq 120(%esi),%mm4
pfpnacc %mm4, %mm4
pswapd %mm4, %mm4
- pfmul %mm0,%mm4
+ pxor %mm0,%mm4
pfmul %mm1,%mm4
movq %mm4,%mm5
psrlq $32,%mm5