Disable SSE code and reenable FPU dct for SSE cpus (fpu code is 0.3% faster and I don't get data aligned in dct64_sse.s, so I can't finish optimizing it)

git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@3204 b3059339-0415-0410-9bf9-f77b7e298cf2
author: atmos4 <atmos4@b3059339-0415-0410-9bf9-f77b7e298cf2> 2001-11-29 18:05:42 +0000
committer: atmos4 <atmos4@b3059339-0415-0410-9bf9-f77b7e298cf2> 2001-11-29 18:05:42 +0000
commit: 4c9621ece7c7a5bd2da22ffbf64fcd730775275d (patch)
tree: ce5e389d53424bd6a21f47e31c28197292e3af92
parent: 4b5c1a845c241d7c1b7d63f8c871bcb3b1d1ee20 (diff)
download: mpv-4c9621ece7c7a5bd2da22ffbf64fcd730775275d.tar.bz2
mpv-4c9621ece7c7a5bd2da22ffbf64fcd730775275d.tar.xz
3 files changed, 20 insertions, 16 deletions
diff --git a/mp3lib/Makefile b/mp3lib/Makefile
index 5295eabab7..71247c3360 100644
--- a/mp3lib/Makefile
+++ b/mp3lib/Makefile
@@ -15,10 +15,10 @@ ifeq ($(TARGET_MMX),yes)
 SRCS += decode_MMX.s dct64_MMX.s tabinit_MMX.s
 OBJS += decode_MMX.o dct64_MMX.o tabinit_MMX.o
 endif
-ifeq ($(TARGET_SSE),yes)
-SRCS += dct64_sse.s
-OBJS += dct64_sse.o
-endif
+#ifeq ($(TARGET_SSE),yes)
+#SRCS += dct64_sse.s
+#OBJS += dct64_sse.o
+#endif
 ifeq ($(TARGET_3DNOW),yes)
 SRCS += dct36_3dnow.s dct64_3dnow.s
 OBJS += dct36_3dnow.o dct64_3dnow.o
diff --git a/mp3lib/dct64_sse.s b/mp3lib/dct64_sse.s
index 922e1c881a..3bc74cc8c0 100644
--- a/mp3lib/dct64_sse.s
+++ b/mp3lib/dct64_sse.s
@@ -1,9 +1,13 @@
-# This code is a translation of dct64_k7.s from MPlayer.
-# Coded by Felix Buenemann <atmosfear at users.sourceforge.net>
-#
-# TODO: - fix phases 4 and 5 (sse)
-#       - optimize scalar FPU code? (interleave with sse code)
-#
+/ This code is a translation of dct64_k7.s from MPlayer.
+/ Coded by Felix Buenemann <atmosfear at users.sourceforge.net>
+/
+/ TODO: - fix phases 4 and 5 (sse)
+/       - optimize scalar FPU code? (interleave with sse code)
+/       - fix alignment (prohibits finishing this code)
+/       - then use faster insns for aligned data
+/
+/ Note: currently code is disabled as I couldn't get input data aligned!
+/
 
 //.data
 //	.align 8
diff --git a/mp3lib/sr1.c b/mp3lib/sr1.c
index a664c9dc88..601c94f211 100644
--- a/mp3lib/sr1.c
+++ b/mp3lib/sr1.c
@@ -409,8 +409,8 @@ void MP3_Init(){
        Note: It's ok, Since K8 will have SSE2 support and will much faster
        of P4 ;) 
      */
-      printf( "mp3lib: Using SSE%s! optimized decore.\n",(_isse>1?"2":""));
-//      printf( "mp3lib: Using Pentium%s optimized decore.\n",(_i586>1?"-MMX":""));
+//      printf( "mp3lib: Using SSE%s! optimized decore.\n",(_isse>1?"2":""));
+      printf( "mp3lib: Using Pentium%s optimized decore.\n",(_i586>1?"-MMX":""));
     else
     if(_3dnow)
       printf( "mp3lib: Using AMD 3dnow%s! optimized decore.\n",(_3dnow>1?"-dsp(k7)":""));
@@ -443,14 +443,14 @@ void MP3_Init(){
     tables_done_flag=1;
 
     dct36_func=dct36;
-#ifdef HAVE_SSE
+/*#ifdef HAVE_SSE
   if(_isse)
   {
     synth_func=synth_1to1_MMX;
     dct64_MMX_func=dct64_MMX_sse;
   }    
   else
-#endif
+#endif*/
 #ifdef HAVE_3DNOWEX
   if ( _3dnow > 1 )
   {
@@ -518,7 +518,7 @@ void MP3_Init(){
     tables_done_flag=1;
 
     dct36_func=dct36;
-#ifdef HAVE_SSE
+/*#ifdef HAVE_SSE
   if(gCpuCaps.hasSSE)
   {
     synth_func=synth_1to1_MMX;
@@ -526,7 +526,7 @@ void MP3_Init(){
     printf("mp3lib: using SSE optimized decore!\n");
   }    
   else
-#endif
+#endif*/
 #ifdef HAVE_3DNOWEX
   if (gCpuCaps.has3DNowExt)
   {
author	atmos4 <atmos4@b3059339-0415-0410-9bf9-f77b7e298cf2>	2001-11-29 18:05:42 +0000
committer	atmos4 <atmos4@b3059339-0415-0410-9bf9-f77b7e298cf2>	2001-11-29 18:05:42 +0000
commit	4c9621ece7c7a5bd2da22ffbf64fcd730775275d (patch)
tree	ce5e389d53424bd6a21f47e31c28197292e3af92
parent	4b5c1a845c241d7c1b7d63f8c871bcb3b1d1ee20 (diff)
download	mpv-4c9621ece7c7a5bd2da22ffbf64fcd730775275d.tar.bz2 mpv-4c9621ece7c7a5bd2da22ffbf64fcd730775275d.tar.xz