diff options
author | aurel <aurel@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2004-10-21 11:55:20 +0000 |
---|---|---|
committer | aurel <aurel@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2004-10-21 11:55:20 +0000 |
commit | 798d2d133780c000166f01fd2d7fbde395196be3 (patch) | |
tree | 1346451ddb9911c63cd778af21261a6ae6f70346 /libvo | |
parent | 159928a38a3fc65427a1336b2c73aa3f78a82aa7 (diff) | |
download | mpv-798d2d133780c000166f01fd2d7fbde395196be3.tar.bz2 mpv-798d2d133780c000166f01fd2d7fbde395196be3.tar.xz |
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@13721 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libvo')
-rw-r--r-- | libvo/aclib.c | 7 | ||||
-rw-r--r-- | libvo/aclib_template.c | 70 | ||||
-rw-r--r-- | libvo/osd.c | 24 | ||||
-rw-r--r-- | libvo/osd_template.c | 6 |
4 files changed, 49 insertions, 58 deletions
diff --git a/libvo/aclib.c b/libvo/aclib.c index e2a19223f8..a3330eafef 100644 --- a/libvo/aclib.c +++ b/libvo/aclib.c @@ -17,7 +17,7 @@ //Feel free to fine-tune the above 2, it might be possible to get some speedup with them :) //#define STATISTICS -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #define CAN_COMPILE_X86_ASM #endif @@ -50,7 +50,6 @@ #undef HAVE_3DNOW #undef HAVE_SSE #undef HAVE_SSE2 -#undef ARCH_X86 /* #ifdef COMPILE_C #undef HAVE_MMX @@ -69,7 +68,6 @@ #undef HAVE_3DNOW #undef HAVE_SSE #undef HAVE_SSE2 -#define ARCH_X86 #define RENAME(a) a ## _MMX #include "aclib_template.c" #endif @@ -82,7 +80,6 @@ #undef HAVE_3DNOW #undef HAVE_SSE #undef HAVE_SSE2 -#define ARCH_X86 #define RENAME(a) a ## _MMX2 #include "aclib_template.c" #endif @@ -95,7 +92,6 @@ #define HAVE_3DNOW #undef HAVE_SSE #undef HAVE_SSE2 -#define ARCH_X86 #define RENAME(a) a ## _3DNow #include "aclib_template.c" #endif @@ -108,7 +104,6 @@ #undef HAVE_3DNOW #define HAVE_SSE #define HAVE_SSE2 -#define ARCH_X86 #define RENAME(a) a ## _SSE #include "aclib_template.c" #endif diff --git a/libvo/aclib_template.c b/libvo/aclib_template.c index 54b420eecf..0b50f7ecf3 100644 --- a/libvo/aclib_template.c +++ b/libvo/aclib_template.c @@ -257,62 +257,62 @@ static void * RENAME(fast_memcpy)(void * to, const void * from, size_t len) // Pure Assembly cuz gcc is a bit unpredictable ;) if(i>=BLOCK_SIZE/64) asm volatile( - "xorl %%eax, %%eax \n\t" + "xor %%"REG_a", %%"REG_a" \n\t" ".balign 16 \n\t" "1: \n\t" - "movl (%0, %%eax), %%ebx \n\t" - "movl 32(%0, %%eax), %%ebx \n\t" - "movl 64(%0, %%eax), %%ebx \n\t" - "movl 96(%0, %%eax), %%ebx \n\t" - "addl $128, %%eax \n\t" - "cmpl %3, %%eax \n\t" + "movl (%0, %%"REG_a"), %%ebx \n\t" + "movl 32(%0, %%"REG_a"), %%ebx \n\t" + "movl 64(%0, %%"REG_a"), %%ebx \n\t" + "movl 96(%0, %%"REG_a"), %%ebx \n\t" + "add $128, %%"REG_a" \n\t" + "cmp %3, %%"REG_a" \n\t" " jb 1b \n\t" - "xorl %%eax, %%eax \n\t" + "xor %%"REG_a", %%"REG_a" \n\t" ".balign 16 \n\t" "2: \n\t" - "movq (%0, %%eax), %%mm0\n" - "movq 8(%0, %%eax), %%mm1\n" - "movq 16(%0, %%eax), %%mm2\n" - "movq 24(%0, %%eax), %%mm3\n" - "movq 32(%0, %%eax), %%mm4\n" - "movq 40(%0, %%eax), %%mm5\n" - "movq 48(%0, %%eax), %%mm6\n" - "movq 56(%0, %%eax), %%mm7\n" - MOVNTQ" %%mm0, (%1, %%eax)\n" - MOVNTQ" %%mm1, 8(%1, %%eax)\n" - MOVNTQ" %%mm2, 16(%1, %%eax)\n" - MOVNTQ" %%mm3, 24(%1, %%eax)\n" - MOVNTQ" %%mm4, 32(%1, %%eax)\n" - MOVNTQ" %%mm5, 40(%1, %%eax)\n" - MOVNTQ" %%mm6, 48(%1, %%eax)\n" - MOVNTQ" %%mm7, 56(%1, %%eax)\n" - "addl $64, %%eax \n\t" - "cmpl %3, %%eax \n\t" + "movq (%0, %%"REG_a"), %%mm0\n" + "movq 8(%0, %%"REG_a"), %%mm1\n" + "movq 16(%0, %%"REG_a"), %%mm2\n" + "movq 24(%0, %%"REG_a"), %%mm3\n" + "movq 32(%0, %%"REG_a"), %%mm4\n" + "movq 40(%0, %%"REG_a"), %%mm5\n" + "movq 48(%0, %%"REG_a"), %%mm6\n" + "movq 56(%0, %%"REG_a"), %%mm7\n" + MOVNTQ" %%mm0, (%1, %%"REG_a")\n" + MOVNTQ" %%mm1, 8(%1, %%"REG_a")\n" + MOVNTQ" %%mm2, 16(%1, %%"REG_a")\n" + MOVNTQ" %%mm3, 24(%1, %%"REG_a")\n" + MOVNTQ" %%mm4, 32(%1, %%"REG_a")\n" + MOVNTQ" %%mm5, 40(%1, %%"REG_a")\n" + MOVNTQ" %%mm6, 48(%1, %%"REG_a")\n" + MOVNTQ" %%mm7, 56(%1, %%"REG_a")\n" + "add $64, %%"REG_a" \n\t" + "cmp %3, %%"REG_a" \n\t" "jb 2b \n\t" #if CONFUSION_FACTOR > 0 // a few percent speedup on out of order executing CPUs - "movl %5, %%eax \n\t" + "mov %5, %%"REG_a" \n\t" "2: \n\t" "movl (%0), %%ebx \n\t" "movl (%0), %%ebx \n\t" "movl (%0), %%ebx \n\t" "movl (%0), %%ebx \n\t" - "decl %%eax \n\t" + "dec %%"REG_a" \n\t" " jnz 2b \n\t" #endif - "xorl %%eax, %%eax \n\t" - "addl %3, %0 \n\t" - "addl %3, %1 \n\t" - "subl %4, %2 \n\t" - "cmpl %4, %2 \n\t" + "xor %%"REG_a", %%"REG_a" \n\t" + "add %3, %0 \n\t" + "add %3, %1 \n\t" + "sub %4, %2 \n\t" + "cmp %4, %2 \n\t" " jae 1b \n\t" : "+r" (from), "+r" (to), "+r" (i) - : "r" (BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" (CONFUSION_FACTOR) - : "%eax", "%ebx" + : "r" ((long)BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" ((long)CONFUSION_FACTOR) + : "%"REG_a, "%ebx" ); for(; i>0; i--) diff --git a/libvo/osd.c b/libvo/osd.c index 742174e7c8..3c616dc0de 100644 --- a/libvo/osd.c +++ b/libvo/osd.c @@ -14,7 +14,7 @@ extern int verbose; // defined in mplayer.c -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #define CAN_COMPILE_X86_ASM #endif @@ -48,18 +48,18 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#undef ARCH_X86 + +#ifndef CAN_COMPILE_X86_ASM #ifdef COMPILE_C #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#undef ARCH_X86 #define RENAME(a) a ## _C #include "osd_template.c" #endif -#ifdef CAN_COMPILE_X86_ASM +#else //X86 noMMX versions #ifdef COMPILE_C @@ -67,7 +67,6 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _X86 #include "osd_template.c" #endif @@ -78,7 +77,6 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF #define HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _MMX #include "osd_template.c" #endif @@ -89,7 +87,6 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF #define HAVE_MMX #define HAVE_MMX2 #undef HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _MMX2 #include "osd_template.c" #endif @@ -100,7 +97,6 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF #define HAVE_MMX #undef HAVE_MMX2 #define HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _3DNow #include "osd_template.c" #endif @@ -129,7 +125,7 @@ void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, in vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_MMX) vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride); -#elif defined (ARCH_X86) +#elif defined(ARCH_X86) || defined(ARCH_X86_64) vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride); @@ -159,7 +155,7 @@ void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, in vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_MMX) vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride); -#elif defined (ARCH_X86) +#elif defined(ARCH_X86) || defined(ARCH_X86_64) vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride); @@ -189,7 +185,7 @@ void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, in vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_MMX) vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride); -#elif defined (ARCH_X86) +#elif defined(ARCH_X86) || defined(ARCH_X86_64) vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride); @@ -219,7 +215,7 @@ void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, i vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_MMX) vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride); -#elif defined (ARCH_X86) +#elif defined(ARCH_X86) || defined(ARCH_X86_64) vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride); @@ -249,7 +245,7 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride); #elif defined (HAVE_MMX) vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride); -#elif defined (ARCH_X86) +#elif defined(ARCH_X86) || defined(ARCH_X86_64) vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride); #else vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride); @@ -294,7 +290,7 @@ void vo_draw_alpha_init(){ mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n"); #elif defined (HAVE_MMX) mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n"); -#elif defined (ARCH_X86) +#elif defined(ARCH_X86) || defined(ARCH_X86_64) mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n"); #else mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n"); diff --git a/libvo/osd_template.c b/libvo/osd_template.c index 5c8c009ec7..e2ada2ccaf 100644 --- a/libvo/osd_template.c +++ b/libvo/osd_template.c @@ -189,7 +189,7 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, for(y=0;y<h;y++){ register unsigned char *dst = dstbase; register int x; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #ifdef HAVE_MMX asm volatile( PREFETCHW" %0\n\t" @@ -253,7 +253,7 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, "addl %2, %%eax\n\t" "movb %%ah, 2(%0)\n\t" : - :"r" (dst), + :"D" (dst), "r" ((unsigned)srca[x]), "r" (((unsigned)src[x])<<8) :"%eax", "%ecx" @@ -293,7 +293,7 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, #endif for(y=0;y<h;y++){ register int x; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #ifdef HAVE_MMX #ifdef HAVE_3DNOW asm volatile( |