diff options
author | michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2003-02-18 19:22:34 +0000 |
---|---|---|
committer | michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2003-02-18 19:22:34 +0000 |
commit | 79067e3c099570165e9d88985fdb4c52c811ac21 (patch) | |
tree | 1ddb84621dc218b3b6d8542a55a0ace289f6d64e /postproc | |
parent | bb75b2a73a27460a4cf5f0f12c2932b1ff31d131 (diff) | |
download | mpv-79067e3c099570165e9d88985fdb4c52c811ac21.tar.bz2 mpv-79067e3c099570165e9d88985fdb4c52c811ac21.tar.xz |
optimize
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@9455 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc')
-rw-r--r-- | postproc/rgb2rgb.c | 5 | ||||
-rw-r--r-- | postproc/rgb2rgb_template.c | 74 |
2 files changed, 77 insertions, 2 deletions
diff --git a/postproc/rgb2rgb.c b/postproc/rgb2rgb.c index be21af0828..c07301c3f3 100644 --- a/postproc/rgb2rgb.c +++ b/postproc/rgb2rgb.c @@ -28,6 +28,11 @@ static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFU static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL; +static const uint64_t mask3216br __attribute__((aligned(8)))=0x00F800F800F800F8ULL; +static const uint64_t mask3216g __attribute__((aligned(8)))=0x0000FC000000FC00ULL; +static const uint64_t mask3215g __attribute__((aligned(8)))=0x0000F8000000F800ULL; +static const uint64_t mul3216 __attribute__((aligned(8))) = 0x2000000420000004ULL; +static const uint64_t mul3215 __attribute__((aligned(8))) = 0x2000000820000008ULL; static const uint64_t mask24b __attribute__((aligned(8))) = 0x00FF0000FF0000FFULL; static const uint64_t mask24g __attribute__((aligned(8))) = 0xFF0000FF0000FF00ULL; static const uint64_t mask24r __attribute__((aligned(8))) = 0x0000FF0000FF0000ULL; diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c index e299b0c12e..01ba6ed6f2 100644 --- a/postproc/rgb2rgb_template.c +++ b/postproc/rgb2rgb_template.c @@ -318,12 +318,46 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned uint16_t *d = (uint16_t *)dst; end = s + src_size; #ifdef HAVE_MMX + mm_end = end - 15; +#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) + asm volatile( + "movq %3, %%mm5 \n\t" + "movq %4, %%mm6 \n\t" + "movq %5, %%mm7 \n\t" + ".balign 16 \n\t" + "1: \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 4(%1), %%mm3 \n\t" + "punpckldq 8(%1), %%mm0 \n\t" + "punpckldq 12(%1), %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "pand %%mm6, %%mm0 \n\t" + "pand %%mm6, %%mm3 \n\t" + "pmaddwd %%mm7, %%mm0 \n\t" + "pmaddwd %%mm7, %%mm3 \n\t" + "pand %%mm5, %%mm1 \n\t" + "pand %%mm5, %%mm4 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "psrld $5, %%mm0 \n\t" + "pslld $11, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, (%0) \n\t" + "addl $16, %1 \n\t" + "addl $8, %0 \n\t" + "cmpl %2, %1 \n\t" + " jb 1b \n\t" + : "+r" (d), "+r"(s) + : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) + ); +#else __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_16mask),"m"(green_16mask)); - mm_end = end - 15; while(s < mm_end) { __asm __volatile( @@ -359,6 +393,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned d += 4; s += 16; } +#endif __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory"); #endif @@ -441,12 +476,46 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned uint16_t *d = (uint16_t *)dst; end = s + src_size; #ifdef HAVE_MMX + mm_end = end - 15; +#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) + asm volatile( + "movq %3, %%mm5 \n\t" + "movq %4, %%mm6 \n\t" + "movq %5, %%mm7 \n\t" + ".balign 16 \n\t" + "1: \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 4(%1), %%mm3 \n\t" + "punpckldq 8(%1), %%mm0 \n\t" + "punpckldq 12(%1), %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "pand %%mm6, %%mm0 \n\t" + "pand %%mm6, %%mm3 \n\t" + "pmaddwd %%mm7, %%mm0 \n\t" + "pmaddwd %%mm7, %%mm3 \n\t" + "pand %%mm5, %%mm1 \n\t" + "pand %%mm5, %%mm4 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "psrld $6, %%mm0 \n\t" + "pslld $10, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, (%0) \n\t" + "addl $16, %1 \n\t" + "addl $8, %0 \n\t" + "cmpl %2, %1 \n\t" + " jb 1b \n\t" + : "+r" (d), "+r"(s) + : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) + ); +#else __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_15mask),"m"(green_15mask)); - mm_end = end - 15; while(s < mm_end) { __asm __volatile( @@ -482,6 +551,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned d += 4; s += 16; } +#endif __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory"); #endif |