summaryrefslogtreecommitdiffstats
path: root/TOOLS
diff options
context:
space:
mode:
authordiego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2>2009-03-10 10:05:09 +0000
committerdiego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2>2009-03-10 10:05:09 +0000
commit143a63fd9eef49e84c26faf7644b032c8db79939 (patch)
treefcb6cf800bc2a443c670c4d0935a48128d0144cc /TOOLS
parentcae648825521d65effff0fd371e142a8dc2d9cca (diff)
downloadmpv-143a63fd9eef49e84c26faf7644b032c8db79939.tar.bz2
mpv-143a63fd9eef49e84c26faf7644b032c8db79939.tar.xz
Fix and restructure fastmemcpybench. It is now one binary that runs all
available memcpy variants and prints benchmark results about them. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@28929 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'TOOLS')
-rwxr-xr-xTOOLS/fastmem.sh21
-rw-r--r--TOOLS/fastmemcpybench.c142
2 files changed, 137 insertions, 26 deletions
diff --git a/TOOLS/fastmem.sh b/TOOLS/fastmem.sh
deleted file mode 100755
index 1b8ff12793..0000000000
--- a/TOOLS/fastmem.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-
-sync
-sleep 2
-./fastmem-k6
-sleep 2
-./fastmem-k7
-sleep 2
-./fastmem-mmx
-sleep 2
-./fastmem-sse
-sleep 2
-./fastmem-c
-sleep 2
-./fastmem2-k6
-sleep 2
-./fastmem2-k7
-sleep 2
-./fastmem2-mmx
-sleep 2
-./fastmem2-sse
-sleep 2
diff --git a/TOOLS/fastmemcpybench.c b/TOOLS/fastmemcpybench.c
index 48a29374c3..75eb70fc5b 100644
--- a/TOOLS/fastmemcpybench.c
+++ b/TOOLS/fastmemcpybench.c
@@ -7,8 +7,6 @@
* was not confirmed through testing.
*/
-/* According to Uoti this code is broken. */
-
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -18,7 +16,92 @@
#include <sys/mman.h>
#include <sys/time.h>
#include <inttypes.h>
-#include "libvo/fastmemcpy.h"
+
+#include "config.h"
+#include "cpudetect.h"
+
+#define BLOCK_SIZE 4096
+#define CONFUSION_FACTOR 0
+
+#if HAVE_MMX
+#define COMPILE_MMX
+#endif
+
+#if HAVE_MMX2
+#define COMPILE_MMX2
+#endif
+
+#if HAVE_AMD3DNOW
+#define COMPILE_AMD3DNOW
+#endif
+
+#if HAVE_SSE
+#define COMPILE_SSE
+#endif
+
+#ifdef COMPILE_MMX
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define HAVE_SSE 0
+#define HAVE_SSE2 0
+#define RENAME(a) a ## _MMX
+#include "libvo/aclib_template.c"
+#endif
+
+#ifdef COMPILE_MMX2
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 1
+#define HAVE_AMD3DNOW 0
+#define HAVE_SSE 0
+#define HAVE_SSE2 0
+#define RENAME(a) a ## _MMX2
+#include "libvo/aclib_template.c"
+#endif
+
+#ifdef COMPILE_AMD3DNOW
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 1
+#define HAVE_SSE 0
+#define HAVE_SSE2 0
+#define RENAME(a) a ## _3DNow
+#include "libvo/aclib_template.c"
+#endif
+
+#ifdef COMPILE_SSE
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 1
+#define HAVE_AMD3DNOW 0
+#define HAVE_SSE 1
+#define HAVE_SSE2 1
+#define RENAME(a) a ## _SSE
+#include "libvo/aclib_template.c"
+#endif
//#define ARR_SIZE 100000
#define ARR_SIZE (1024*768*2)
@@ -114,11 +197,60 @@ int main(void)
t = GetTimer();
v1 = read_tsc();
for (i = 0; i < 100; i++)
- fast_memcpy(marr1, marr2, ARR_SIZE - 16);
+ memcpy(marr1, marr2, ARR_SIZE - 16);
+ v2 = read_tsc();
+ t = GetTimer() - t;
+ // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+ printf("libc: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
+ 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+
+#if HAVE_MMX
+ t = GetTimer();
+ v1 = read_tsc();
+ for (i = 0; i < 100; i++)
+ fast_memcpy_MMX(marr1, marr2, ARR_SIZE - 16);
+ v2 = read_tsc();
+ t = GetTimer() - t;
+ // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+ printf("MMX: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
+ 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
+#if HAVE_AMD3DNOW
+ t = GetTimer();
+ v1 = read_tsc();
+ for (i = 0; i < 100; i++)
+ fast_memcpy_3DNow(marr1, marr2, ARR_SIZE - 16);
+ v2 = read_tsc();
+ t = GetTimer() - t;
+ // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+ printf("3DNow!: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
+ 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
+#if HAVE_MMX2
+ t = GetTimer();
+ v1 = read_tsc();
+ for (i = 0; i < 100; i++)
+ fast_memcpy_MMX2(marr1, marr2, ARR_SIZE - 16);
+ v2 = read_tsc();
+ t = GetTimer() - t;
+ // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+ printf("MMX2: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
+ 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
+#if HAVE_SSE
+ t = GetTimer();
+ v1 = read_tsc();
+ for (i = 0; i < 100; i++)
+ fast_memcpy_SSE(marr1, marr2, ARR_SIZE - 16);
v2 = read_tsc();
t = GetTimer() - t;
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
- printf(NAME ": CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
+ printf("SSE: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
return 0;
}