summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile22
-rwxr-xr-xTOOLS/fastmem.sh21
-rw-r--r--TOOLS/fastmemcpybench.c142
3 files changed, 140 insertions, 45 deletions
diff --git a/Makefile b/Makefile
index b1ea48e963..feef5aba60 100644
--- a/Makefile
+++ b/Makefile
@@ -986,7 +986,7 @@ tests: $(addsuffix $(EXESUF),$(TESTS))
testsclean:
-rm -f $(foreach file,$(TESTS),$(call ADD_ALL_EXESUFS,$(file)))
-TOOLS = $(addprefix TOOLS/,alaw-gen asfinfo avi-fix avisubdump compare dump_mp4 movinfo netstream subrip vivodump)
+TOOLS = $(addprefix TOOLS/,alaw-gen asfinfo avi-fix avisubdump compare dump_mp4 fastmemcpybench movinfo netstream subrip vivodump)
ifdef ARCH_X86
TOOLS += TOOLS/modify_reg
@@ -999,7 +999,7 @@ alltools: $(addsuffix $(EXESUF),$(ALLTOOLS))
toolsclean:
-rm -f $(foreach file,$(ALLTOOLS),$(call ADD_ALL_EXESUFS,$(file)))
- -rm -f TOOLS/fastmem-* TOOLS/realcodecs/*.so.6.0
+ -rm -f TOOLS/realcodecs/*.so.6.0
TOOLS/bmovl-test$(EXESUF): -lSDL_image
@@ -1016,27 +1016,11 @@ TOOLS/vivodump$(EXESUF): TOOLS/vivodump.c
TOOLS/netstream$(EXESUF) TOOLS/vivodump$(EXESUF): $(subst mplayer.o,mplayer-nomain.o,$(OBJS_MPLAYER)) $(filter-out %mencoder.o,$(OBJS_MENCODER)) $(OBJS_COMMON) $(COMMON_LIBS)
$(CC) $(CFLAGS) -o $@ $^ $(EXTRALIBS_MPLAYER) $(EXTRALIBS_MENCODER) $(COMMON_LDFLAGS)
-TOOLS/fastmem-c$(EXESUF): CFLAGS += -DHAVE_MMX=0 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"C\"
-TOOLS/fastmem-mmx$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MMX\"
-TOOLS/fastmem-k6$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"K6\"
-TOOLS/fastmem-k7$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=1 -DHAVE_SSE=0 -DNAME=\"K7\"
-TOOLS/fastmem-sse$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=1 -DHAVE_SSE=1 -DNAME=\"SSE\"
-TOOLS/fastmem-mga-mmx$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MGA-MMX\" -DCONFIG_MGA
-TOOLS/fastmem-mga-k6$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MGA-K6\" -DCONFIG_MGA
-TOOLS/fastmem-mga-k7$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=1 -DHAVE_SSE=0 -DNAME=\"MGA-K7\" -DCONFIG_MGA
-TOOLS/fastmem-mga-sse$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=1 -DHAVE_SSE=1 -DNAME=\"MGA-SSE\" -DCONFIG_MGA
-
-fastmemcpybench: $(addsuffix $(EXESUF),$(addprefix TOOLS/fastmem-,c mmx k6 k7 sse mga-mmx mga-k6 mga-k7 mga-sse))
-
-TOOLS/fastmem-%$(EXESUF): TOOLS/fastmemcpybench.c libvo/aclib.c
- $(CC) $(CFLAGS) -o $@ $^
-
REAL_SRCS = $(wildcard TOOLS/realcodecs/*.c)
REAL_TARGETS = $(REAL_SRCS:.c=.so.6.0)
realcodecs: $(REAL_TARGETS)
-
-fastmemcpybench realcodecs: CFLAGS += -g
+realcodecs: CFLAGS += -g
%.so.6.0: %.o
ld -shared -o $@ $< -ldl -lc
diff --git a/TOOLS/fastmem.sh b/TOOLS/fastmem.sh
deleted file mode 100755
index 1b8ff12793..0000000000
--- a/TOOLS/fastmem.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-
-sync
-sleep 2
-./fastmem-k6
-sleep 2
-./fastmem-k7
-sleep 2
-./fastmem-mmx
-sleep 2
-./fastmem-sse
-sleep 2
-./fastmem-c
-sleep 2
-./fastmem2-k6
-sleep 2
-./fastmem2-k7
-sleep 2
-./fastmem2-mmx
-sleep 2
-./fastmem2-sse
-sleep 2
diff --git a/TOOLS/fastmemcpybench.c b/TOOLS/fastmemcpybench.c
index 48a29374c3..75eb70fc5b 100644
--- a/TOOLS/fastmemcpybench.c
+++ b/TOOLS/fastmemcpybench.c
@@ -7,8 +7,6 @@
* was not confirmed through testing.
*/
-/* According to Uoti this code is broken. */
-
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -18,7 +16,92 @@
#include <sys/mman.h>
#include <sys/time.h>
#include <inttypes.h>
-#include "libvo/fastmemcpy.h"
+
+#include "config.h"
+#include "cpudetect.h"
+
+#define BLOCK_SIZE 4096
+#define CONFUSION_FACTOR 0
+
+#if HAVE_MMX
+#define COMPILE_MMX
+#endif
+
+#if HAVE_MMX2
+#define COMPILE_MMX2
+#endif
+
+#if HAVE_AMD3DNOW
+#define COMPILE_AMD3DNOW
+#endif
+
+#if HAVE_SSE
+#define COMPILE_SSE
+#endif
+
+#ifdef COMPILE_MMX
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define HAVE_SSE 0
+#define HAVE_SSE2 0
+#define RENAME(a) a ## _MMX
+#include "libvo/aclib_template.c"
+#endif
+
+#ifdef COMPILE_MMX2
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 1
+#define HAVE_AMD3DNOW 0
+#define HAVE_SSE 0
+#define HAVE_SSE2 0
+#define RENAME(a) a ## _MMX2
+#include "libvo/aclib_template.c"
+#endif
+
+#ifdef COMPILE_AMD3DNOW
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 1
+#define HAVE_SSE 0
+#define HAVE_SSE2 0
+#define RENAME(a) a ## _3DNow
+#include "libvo/aclib_template.c"
+#endif
+
+#ifdef COMPILE_SSE
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 1
+#define HAVE_AMD3DNOW 0
+#define HAVE_SSE 1
+#define HAVE_SSE2 1
+#define RENAME(a) a ## _SSE
+#include "libvo/aclib_template.c"
+#endif
//#define ARR_SIZE 100000
#define ARR_SIZE (1024*768*2)
@@ -114,11 +197,60 @@ int main(void)
t = GetTimer();
v1 = read_tsc();
for (i = 0; i < 100; i++)
- fast_memcpy(marr1, marr2, ARR_SIZE - 16);
+ memcpy(marr1, marr2, ARR_SIZE - 16);
+ v2 = read_tsc();
+ t = GetTimer() - t;
+ // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+ printf("libc: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
+ 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+
+#if HAVE_MMX
+ t = GetTimer();
+ v1 = read_tsc();
+ for (i = 0; i < 100; i++)
+ fast_memcpy_MMX(marr1, marr2, ARR_SIZE - 16);
+ v2 = read_tsc();
+ t = GetTimer() - t;
+ // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+ printf("MMX: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
+ 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
+#if HAVE_AMD3DNOW
+ t = GetTimer();
+ v1 = read_tsc();
+ for (i = 0; i < 100; i++)
+ fast_memcpy_3DNow(marr1, marr2, ARR_SIZE - 16);
+ v2 = read_tsc();
+ t = GetTimer() - t;
+ // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+ printf("3DNow!: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
+ 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
+#if HAVE_MMX2
+ t = GetTimer();
+ v1 = read_tsc();
+ for (i = 0; i < 100; i++)
+ fast_memcpy_MMX2(marr1, marr2, ARR_SIZE - 16);
+ v2 = read_tsc();
+ t = GetTimer() - t;
+ // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+ printf("MMX2: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
+ 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
+#if HAVE_SSE
+ t = GetTimer();
+ v1 = read_tsc();
+ for (i = 0; i < 100; i++)
+ fast_memcpy_SSE(marr1, marr2, ARR_SIZE - 16);
v2 = read_tsc();
t = GetTimer() - t;
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
- printf(NAME ": CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
+ printf("SSE: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
return 0;
}