diff options
author | diego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2009-03-10 10:05:09 +0000 |
---|---|---|
committer | diego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2009-03-10 10:05:09 +0000 |
commit | 143a63fd9eef49e84c26faf7644b032c8db79939 (patch) | |
tree | fcb6cf800bc2a443c670c4d0935a48128d0144cc /TOOLS | |
parent | cae648825521d65effff0fd371e142a8dc2d9cca (diff) | |
download | mpv-143a63fd9eef49e84c26faf7644b032c8db79939.tar.bz2 mpv-143a63fd9eef49e84c26faf7644b032c8db79939.tar.xz |
Fix and restructure fastmemcpybench. It is now one binary that runs all
available memcpy variants and prints benchmark results about them.
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@28929 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'TOOLS')
-rwxr-xr-x | TOOLS/fastmem.sh | 21 | ||||
-rw-r--r-- | TOOLS/fastmemcpybench.c | 142 |
2 files changed, 137 insertions, 26 deletions
diff --git a/TOOLS/fastmem.sh b/TOOLS/fastmem.sh deleted file mode 100755 index 1b8ff12793..0000000000 --- a/TOOLS/fastmem.sh +++ /dev/null @@ -1,21 +0,0 @@ - -sync -sleep 2 -./fastmem-k6 -sleep 2 -./fastmem-k7 -sleep 2 -./fastmem-mmx -sleep 2 -./fastmem-sse -sleep 2 -./fastmem-c -sleep 2 -./fastmem2-k6 -sleep 2 -./fastmem2-k7 -sleep 2 -./fastmem2-mmx -sleep 2 -./fastmem2-sse -sleep 2 diff --git a/TOOLS/fastmemcpybench.c b/TOOLS/fastmemcpybench.c index 48a29374c3..75eb70fc5b 100644 --- a/TOOLS/fastmemcpybench.c +++ b/TOOLS/fastmemcpybench.c @@ -7,8 +7,6 @@ * was not confirmed through testing. */ -/* According to Uoti this code is broken. */ - #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -18,7 +16,92 @@ #include <sys/mman.h> #include <sys/time.h> #include <inttypes.h> -#include "libvo/fastmemcpy.h" + +#include "config.h" +#include "cpudetect.h" + +#define BLOCK_SIZE 4096 +#define CONFUSION_FACTOR 0 + +#if HAVE_MMX +#define COMPILE_MMX +#endif + +#if HAVE_MMX2 +#define COMPILE_MMX2 +#endif + +#if HAVE_AMD3DNOW +#define COMPILE_AMD3DNOW +#endif + +#if HAVE_SSE +#define COMPILE_SSE +#endif + +#ifdef COMPILE_MMX +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 +#define HAVE_MMX 1 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 0 +#define HAVE_SSE 0 +#define HAVE_SSE2 0 +#define RENAME(a) a ## _MMX +#include "libvo/aclib_template.c" +#endif + +#ifdef COMPILE_MMX2 +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 +#define HAVE_MMX 1 +#define HAVE_MMX2 1 +#define HAVE_AMD3DNOW 0 +#define HAVE_SSE 0 +#define HAVE_SSE2 0 +#define RENAME(a) a ## _MMX2 +#include "libvo/aclib_template.c" +#endif + +#ifdef COMPILE_AMD3DNOW +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 +#define HAVE_MMX 1 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 1 +#define HAVE_SSE 0 +#define HAVE_SSE2 0 +#define RENAME(a) a ## _3DNow +#include "libvo/aclib_template.c" +#endif + +#ifdef COMPILE_SSE +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 +#define HAVE_MMX 1 +#define HAVE_MMX2 1 +#define HAVE_AMD3DNOW 0 +#define HAVE_SSE 1 +#define HAVE_SSE2 1 +#define RENAME(a) a ## _SSE +#include "libvo/aclib_template.c" +#endif //#define ARR_SIZE 100000 #define ARR_SIZE (1024*768*2) @@ -114,11 +197,60 @@ int main(void) t = GetTimer(); v1 = read_tsc(); for (i = 0; i < 100; i++) - fast_memcpy(marr1, marr2, ARR_SIZE - 16); + memcpy(marr1, marr2, ARR_SIZE - 16); + v2 = read_tsc(); + t = GetTimer() - t; + // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t + printf("libc: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, + 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); + +#if HAVE_MMX + t = GetTimer(); + v1 = read_tsc(); + for (i = 0; i < 100; i++) + fast_memcpy_MMX(marr1, marr2, ARR_SIZE - 16); + v2 = read_tsc(); + t = GetTimer() - t; + // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t + printf("MMX: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, + 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); +#endif + +#if HAVE_AMD3DNOW + t = GetTimer(); + v1 = read_tsc(); + for (i = 0; i < 100; i++) + fast_memcpy_3DNow(marr1, marr2, ARR_SIZE - 16); + v2 = read_tsc(); + t = GetTimer() - t; + // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t + printf("3DNow!: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, + 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); +#endif + +#if HAVE_MMX2 + t = GetTimer(); + v1 = read_tsc(); + for (i = 0; i < 100; i++) + fast_memcpy_MMX2(marr1, marr2, ARR_SIZE - 16); + v2 = read_tsc(); + t = GetTimer() - t; + // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t + printf("MMX2: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, + 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); +#endif + +#if HAVE_SSE + t = GetTimer(); + v1 = read_tsc(); + for (i = 0; i < 100; i++) + fast_memcpy_SSE(marr1, marr2, ARR_SIZE - 16); v2 = read_tsc(); t = GetTimer() - t; // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t - printf(NAME ": CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, + printf("SSE: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); +#endif + return 0; } |