P3 fixes...

git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@377 b3059339-0415-0410-9bf9-f77b7e298cf2
author: arpi_esp <arpi_esp@b3059339-0415-0410-9bf9-f77b7e298cf2> 2001-04-12 14:40:10 +0000
committer: arpi_esp <arpi_esp@b3059339-0415-0410-9bf9-f77b7e298cf2> 2001-04-12 14:40:10 +0000
commit: 34a46800995e272e65021b23c0932a958a158c2c (patch)
tree: 9848826311293729bb22dcc5523eb122778b216b /libvo/fastmemcpy.h
parent: 3dde448fb2a3fa46c45bc6734a0a4c06f550d11b (diff)
download: mpv-34a46800995e272e65021b23c0932a958a158c2c.tar.bz2
mpv-34a46800995e272e65021b23c0932a958a158c2c.tar.xz
1 files changed, 24 insertions, 30 deletions
diff --git a/libvo/fastmemcpy.h b/libvo/fastmemcpy.h
index 44ee7ef473..1f0a41853e 100644
--- a/libvo/fastmemcpy.h
+++ b/libvo/fastmemcpy.h
@@ -2,31 +2,19 @@
  This part of code was taken by from Linux-2.4.3 and slightly modified
 for MMX2 instruction set. I have done it since linux uses page aligned
 blocks but mplayer uses weakly ordered data and original sources can not
-speedup their. Only using prefetch and movntq together have effect! 
+speedup their. Only using prefetchnta and movntq together have effect! 
 If you have questions please contact with me: Nick Kurshev: nickols_k@mail.ru.
 */
-
-#ifndef HAVE_MMX2
-//static inline void * __memcpy(void * to, const void * from, unsigned n)
-inline static void * fast_memcpy(void * to, const void * from, unsigned n)
-{
-int d0, d1, d2;
-__asm__ __volatile__(
-	"rep ; movsl\n\t"
-	"testb $2,%b4\n\t"
-	"je 1f\n\t"
-	"movsw\n"
-	"1:\ttestb $1,%b4\n\t"
-	"je 2f\n\t"
-	"movsb\n"
-	"2:"
-	: "=&c" (d0), "=&D" (d1), "=&S" (d2)
-	:"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
-	: "memory");
-return (to);
+#ifdef HAVE_MMX2
+/* for small memory blocks (<256 bytes) this version is faster */
+#define small_memcpy(to,from,n)\
+{\
+__asm__ __volatile__(\
+	"rep ; movsb\n"\
+	::"D" (to), "S" (from),"c" (n)\
+	: "memory");\
 }
-#else
-//inline static void *__memcpy_mmx2(void *to, const void *from, unsigned len)
+
 inline static void * fast_memcpy(void * to, const void * from, unsigned len)
 {
 	void *p;
@@ -36,12 +24,15 @@ inline static void * fast_memcpy(void * to, const void * from, unsigned len)
 	{
   	  p = to;
 	  i = len >> 6; /* len/64 */
+	  len&=63;
+	  
 	__asm__ __volatile__ (
-		"1: prefetch (%0)\n"		/* This set is 28 bytes */
-		"   prefetch 64(%0)\n"
-		"   prefetch 128(%0)\n"
-		"   prefetch 192(%0)\n"
-		"   prefetch 256(%0)\n"
+		"1: prefetchnta (%0)\n"		/* This set is 28 bytes */
+		"   prefetchnta 64(%0)\n"
+		"   prefetchnta 128(%0)\n"
+		"   prefetchnta 192(%0)\n"
+		"   prefetchnta 256(%0)\n"
+#if 0		
 		"2:  \n"
 		".section .fixup, \"ax\"\n"
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
@@ -51,13 +42,14 @@ inline static void * fast_memcpy(void * to, const void * from, unsigned len)
 		"	.align 4\n"
 		"	.long 1b, 3b\n"
 		".previous"
+#endif		
 		: : "r" (from) );
 		
 	
 	for(; i>0; i--)
 	{
 		__asm__ __volatile__ (
-		"1:  prefetch 320(%0)\n"
+		"1:  prefetchnta 320(%0)\n"
 		"2:  movq (%0), %%mm0\n"
 		"  movq 8(%0), %%mm1\n"
 		"  movq 16(%0), %%mm2\n"
@@ -74,6 +66,7 @@ inline static void * fast_memcpy(void * to, const void * from, unsigned len)
 		"  movntq %%mm1, 40(%1)\n"
 		"  movntq %%mm2, 48(%1)\n"
 		"  movntq %%mm3, 56(%1)\n"
+#if 0		
 		".section .fixup, \"ax\"\n"
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
@@ -82,6 +75,7 @@ inline static void * fast_memcpy(void * to, const void * from, unsigned len)
 		"	.align 4\n"
 		"	.long 1b, 3b\n"
 		".previous"
+#endif		
 		: : "r" (from), "r" (to) : "memory");
 		from+=64;
 		to+=64;
@@ -91,10 +85,10 @@ inline static void * fast_memcpy(void * to, const void * from, unsigned len)
 	/*
 	 *	Now do the tail of the block
 	 */
-	memcpy(to, from, len&63);
+	small_memcpy(to, from, len);
 	return p;
 }
+#define memcpy(a,b,c) fast_memcpy(a,b,c)
 #endif
 
-#define memcpy(a,b,c) fast_memcpy(a,b,c)
author	arpi_esp <arpi_esp@b3059339-0415-0410-9bf9-f77b7e298cf2>	2001-04-12 14:40:10 +0000
committer	arpi_esp <arpi_esp@b3059339-0415-0410-9bf9-f77b7e298cf2>	2001-04-12 14:40:10 +0000
commit	34a46800995e272e65021b23c0932a958a158c2c (patch)
tree	9848826311293729bb22dcc5523eb122778b216b /libvo/fastmemcpy.h
parent	3dde448fb2a3fa46c45bc6734a0a4c06f550d11b (diff)
download	mpv-34a46800995e272e65021b23c0932a958a158c2c.tar.bz2 mpv-34a46800995e272e65021b23c0932a958a158c2c.tar.xz