diff options
author | nick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-10-28 12:02:16 +0000 |
---|---|---|
committer | nick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-10-28 12:02:16 +0000 |
commit | 4bbf6688daa975825fe5e9d9001a41496880b678 (patch) | |
tree | 93ec2f8507209394601c44a3944e696c17c36242 /postproc | |
parent | 6beedb448b6e4775cf4f7dcace3c8d7078891ef4 (diff) | |
download | mpv-4bbf6688daa975825fe5e9d9001a41496880b678.tar.bz2 mpv-4bbf6688daa975825fe5e9d9001a41496880b678.tar.xz |
mmx, mmx2, 3dnow optimized 24to32
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2512 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc')
-rw-r--r-- | postproc/rgb2rgb.c | 38 | ||||
-rw-r--r-- | postproc/rgb2rgb_template.c | 38 |
2 files changed, 72 insertions, 4 deletions
diff --git a/postproc/rgb2rgb.c b/postproc/rgb2rgb.c index dc5f062f3e..73bbca7849 100644 --- a/postproc/rgb2rgb.c +++ b/postproc/rgb2rgb.c @@ -3,6 +3,26 @@ #include "rgb2rgb.h" #include "mmx.h" +#ifdef HAVE_3DNOW +#define PREFETCH "prefetch" +#define PREFETCHW "prefetchw" +#elif HAVE_MMX2 +#define PREFETCH "prefetchnta" +#define PREFETCHW "prefetcht0" +#endif + +#ifdef HAVE_3DNOW +#define EMMS "femms" +#else +#define EMMS "emms" +#endif + +#ifdef HAVE_MMX2 +#define MOVNTQ "movntq" +#else +#define MOVNTQ "movq" +#endif + void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) { uint8_t *dest = dst; @@ -14,10 +34,23 @@ void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) #endif end = s + src_size; #ifdef HAVE_MMX +#ifdef PREFETCH + __asm __volatile( + PREFETCH" %0\n\t" + PREFETCH" 64%0\n\t" + PREFETCHW" %1\n\t" + PREFETCHW" 64%1\n\t"::"m"(*s),"m"(*dest):"memory"); +#endif mm_end = (uint8_t*)((((unsigned long)end)/16)*16); __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); while(s < mm_end) { +#ifdef PREFETCH + __asm __volatile( + PREFETCH" 128%0\n\t" + PREFETCHW" 128%1" + ::"m"(*s),"m"(*dest):"memory"); +#endif __asm __volatile( "movd %1, %%mm0\n\t" "movd 3%1, %%mm1\n\t" @@ -27,14 +60,15 @@ void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) "punpckldq %%mm3, %%mm2\n\t" "pand %%mm7, %%mm0\n\t" "pand %%mm7, %%mm2\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm2, 8%0" + MOVNTQ" %%mm0, %0\n\t" + MOVNTQ" %%mm2, 8%0" :"=m"(*dest) :"m"(*s) :"memory"); dest += 16; s += 12; } + __asm __volatile(EMMS:::"memory"); #endif while(s < end) { diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c index dc5f062f3e..73bbca7849 100644 --- a/postproc/rgb2rgb_template.c +++ b/postproc/rgb2rgb_template.c @@ -3,6 +3,26 @@ #include "rgb2rgb.h" #include "mmx.h" +#ifdef HAVE_3DNOW +#define PREFETCH "prefetch" +#define PREFETCHW "prefetchw" +#elif HAVE_MMX2 +#define PREFETCH "prefetchnta" +#define PREFETCHW "prefetcht0" +#endif + +#ifdef HAVE_3DNOW +#define EMMS "femms" +#else +#define EMMS "emms" +#endif + +#ifdef HAVE_MMX2 +#define MOVNTQ "movntq" +#else +#define MOVNTQ "movq" +#endif + void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) { uint8_t *dest = dst; @@ -14,10 +34,23 @@ void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) #endif end = s + src_size; #ifdef HAVE_MMX +#ifdef PREFETCH + __asm __volatile( + PREFETCH" %0\n\t" + PREFETCH" 64%0\n\t" + PREFETCHW" %1\n\t" + PREFETCHW" 64%1\n\t"::"m"(*s),"m"(*dest):"memory"); +#endif mm_end = (uint8_t*)((((unsigned long)end)/16)*16); __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); while(s < mm_end) { +#ifdef PREFETCH + __asm __volatile( + PREFETCH" 128%0\n\t" + PREFETCHW" 128%1" + ::"m"(*s),"m"(*dest):"memory"); +#endif __asm __volatile( "movd %1, %%mm0\n\t" "movd 3%1, %%mm1\n\t" @@ -27,14 +60,15 @@ void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) "punpckldq %%mm3, %%mm2\n\t" "pand %%mm7, %%mm0\n\t" "pand %%mm7, %%mm2\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm2, 8%0" + MOVNTQ" %%mm0, %0\n\t" + MOVNTQ" %%mm2, 8%0" :"=m"(*dest) :"m"(*s) :"memory"); dest += 16; s += 12; } + __asm __volatile(EMMS:::"memory"); #endif while(s < end) { |