diff options
author | lucabe <lucabe@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2006-06-30 12:00:31 +0000 |
---|---|---|
committer | lucabe <lucabe@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2006-06-30 12:00:31 +0000 |
commit | cc30aae17bf521fc5453c7ee173cceefde255faf (patch) | |
tree | e88e8921f811ae85227b451ce739a0b9d49db9f8 /postproc/rgb2rgb_template.c | |
parent | 6ff1b359593595622d6f231fb3d5ff53e63cfa3d (diff) | |
download | mpv-cc30aae17bf521fc5453c7ee173cceefde255faf.tar.bz2 mpv-cc30aae17bf521fc5453c7ee173cceefde255faf.tar.xz |
Move postproc ---> libswscale
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@18866 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc/rgb2rgb_template.c')
-rw-r--r-- | postproc/rgb2rgb_template.c | 2675 |
1 files changed, 0 insertions, 2675 deletions
diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c deleted file mode 100644 index 807da6166f..0000000000 --- a/postproc/rgb2rgb_template.c +++ /dev/null @@ -1,2675 +0,0 @@ -/* - * - * rgb2rgb.c, Software RGB to RGB convertor - * pluralize by Software PAL8 to RGB convertor - * Software YUV to YUV convertor - * Software YUV to RGB convertor - * Written by Nick Kurshev. - * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) - * lot of big-endian byteorder fixes by Alex Beregszaszi - */ - -#include <stddef.h> -#include <inttypes.h> /* for __WORDSIZE */ - -#include "asmalign.h" - -#ifndef __WORDSIZE -// #warning You have misconfigured system and probably will lose performance! -#define __WORDSIZE MP_WORDSIZE -#endif - -#undef PREFETCH -#undef MOVNTQ -#undef EMMS -#undef SFENCE -#undef MMREG_SIZE -#undef PREFETCHW -#undef PAVGB - -#ifdef HAVE_SSE2 -#define MMREG_SIZE 16 -#else -#define MMREG_SIZE 8 -#endif - -#ifdef HAVE_3DNOW -#define PREFETCH "prefetch" -#define PREFETCHW "prefetchw" -#define PAVGB "pavgusb" -#elif defined ( HAVE_MMX2 ) -#define PREFETCH "prefetchnta" -#define PREFETCHW "prefetcht0" -#define PAVGB "pavgb" -#else -#ifdef __APPLE__ -#define PREFETCH "#" -#define PREFETCHW "#" -#else -#define PREFETCH "/nop" -#define PREFETCHW "/nop" -#endif -#endif - -#ifdef HAVE_3DNOW -/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ -#define EMMS "femms" -#else -#define EMMS "emms" -#endif - -#ifdef HAVE_MMX2 -#define MOVNTQ "movntq" -#define SFENCE "sfence" -#else -#define MOVNTQ "movq" -#ifdef __APPLE__ -#define SFENCE "#" -#else -#define SFENCE "/nop" -#endif -#endif - -static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size) -{ - uint8_t *dest = dst; - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 23; - __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "punpckldq 3%1, %%mm0\n\t" - "movd 6%1, %%mm1\n\t" - "punpckldq 9%1, %%mm1\n\t" - "movd 12%1, %%mm2\n\t" - "punpckldq 15%1, %%mm2\n\t" - "movd 18%1, %%mm3\n\t" - "punpckldq 21%1, %%mm3\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm1\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm7, %%mm3\n\t" - MOVNTQ" %%mm0, %0\n\t" - MOVNTQ" %%mm1, 8%0\n\t" - MOVNTQ" %%mm2, 16%0\n\t" - MOVNTQ" %%mm3, 24%0" - :"=m"(*dest) - :"m"(*s) - :"memory"); - dest += 32; - s += 24; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { -#ifdef WORDS_BIGENDIAN - /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ - *dest++ = 0; - *dest++ = s[2]; - *dest++ = s[1]; - *dest++ = s[0]; - s+=3; -#else - *dest++ = *s++; - *dest++ = *s++; - *dest++ = *s++; - *dest++ = 0; -#endif - } -} - -static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,long src_size) -{ - uint8_t *dest = dst; - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 31; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movq %1, %%mm0\n\t" - "movq 8%1, %%mm1\n\t" - "movq 16%1, %%mm4\n\t" - "movq 24%1, %%mm5\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm1, %%mm3\n\t" - "movq %%mm4, %%mm6\n\t" - "movq %%mm5, %%mm7\n\t" - "psrlq $8, %%mm2\n\t" - "psrlq $8, %%mm3\n\t" - "psrlq $8, %%mm6\n\t" - "psrlq $8, %%mm7\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm1\n\t" - "pand %2, %%mm4\n\t" - "pand %2, %%mm5\n\t" - "pand %3, %%mm2\n\t" - "pand %3, %%mm3\n\t" - "pand %3, %%mm6\n\t" - "pand %3, %%mm7\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm3, %%mm1\n\t" - "por %%mm6, %%mm4\n\t" - "por %%mm7, %%mm5\n\t" - - "movq %%mm1, %%mm2\n\t" - "movq %%mm4, %%mm3\n\t" - "psllq $48, %%mm2\n\t" - "psllq $32, %%mm3\n\t" - "pand %4, %%mm2\n\t" - "pand %5, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "psrlq $16, %%mm1\n\t" - "psrlq $32, %%mm4\n\t" - "psllq $16, %%mm5\n\t" - "por %%mm3, %%mm1\n\t" - "pand %6, %%mm5\n\t" - "por %%mm5, %%mm4\n\t" - - MOVNTQ" %%mm0, %0\n\t" - MOVNTQ" %%mm1, 8%0\n\t" - MOVNTQ" %%mm4, 16%0" - :"=m"(*dest) - :"m"(*s),"m"(mask24l), - "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) - :"memory"); - dest += 24; - s += 32; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { -#ifdef WORDS_BIGENDIAN - /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ - s++; - dest[2] = *s++; - dest[1] = *s++; - dest[0] = *s++; - dest += 3; -#else - *dest++ = *s++; - *dest++ = *s++; - *dest++ = *s++; - s++; -#endif - } -} - -/* - Original by Strepto/Astral - ported to gcc & bugfixed : A'rpi - MMX2, 3DNOW optimization by Nick Kurshev - 32bit c version, and and&add trick by Michael Niedermayer -*/ -static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,long src_size) -{ - register const uint8_t* s=src; - register uint8_t* d=dst; - register const uint8_t *end; - const uint8_t *mm_end; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*s)); - __asm __volatile("movq %0, %%mm4"::"m"(mask15s)); - mm_end = end - 15; - while(s<mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movq %1, %%mm0\n\t" - "movq 8%1, %%mm2\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm2, %%mm3\n\t" - "pand %%mm4, %%mm0\n\t" - "pand %%mm4, %%mm2\n\t" - "paddw %%mm1, %%mm0\n\t" - "paddw %%mm3, %%mm2\n\t" - MOVNTQ" %%mm0, %0\n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) - ); - d+=16; - s+=16; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - mm_end = end - 3; - while(s < mm_end) - { - register unsigned x= *((uint32_t *)s); - *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); - d+=4; - s+=4; - } - if(s < end) - { - register unsigned short x= *((uint16_t *)s); - *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); - } -} - -static inline void RENAME(rgb16to15)(const uint8_t *src,uint8_t *dst,long src_size) -{ - register const uint8_t* s=src; - register uint8_t* d=dst; - register const uint8_t *end; - const uint8_t *mm_end; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*s)); - __asm __volatile("movq %0, %%mm7"::"m"(mask15rg)); - __asm __volatile("movq %0, %%mm6"::"m"(mask15b)); - mm_end = end - 15; - while(s<mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movq %1, %%mm0\n\t" - "movq 8%1, %%mm2\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm2, %%mm3\n\t" - "psrlq $1, %%mm0\n\t" - "psrlq $1, %%mm2\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm3\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm3, %%mm2\n\t" - MOVNTQ" %%mm0, %0\n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) - ); - d+=16; - s+=16; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - mm_end = end - 3; - while(s < mm_end) - { - register uint32_t x= *((uint32_t *)s); - *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); - s+=4; - d+=4; - } - if(s < end) - { - register uint16_t x= *((uint16_t *)s); - *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); - s+=2; - d+=2; - } -} - -static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - mm_end = end - 15; -#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) - asm volatile( - "movq %3, %%mm5 \n\t" - "movq %4, %%mm6 \n\t" - "movq %5, %%mm7 \n\t" - ASMALIGN16 - "1: \n\t" - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 4(%1), %%mm3 \n\t" - "punpckldq 8(%1), %%mm0 \n\t" - "punpckldq 12(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm3, %%mm4 \n\t" - "pand %%mm6, %%mm0 \n\t" - "pand %%mm6, %%mm3 \n\t" - "pmaddwd %%mm7, %%mm0 \n\t" - "pmaddwd %%mm7, %%mm3 \n\t" - "pand %%mm5, %%mm1 \n\t" - "pand %%mm5, %%mm4 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "psrld $5, %%mm0 \n\t" - "pslld $11, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - "add $16, %1 \n\t" - "add $8, %0 \n\t" - "cmp %2, %1 \n\t" - " jb 1b \n\t" - : "+r" (d), "+r"(s) - : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) - ); -#else - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_16mask),"m"(green_16mask)); - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 4%1, %%mm3\n\t" - "punpckldq 8%1, %%mm0\n\t" - "punpckldq 12%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psrlq $3, %%mm0\n\t" - "psrlq $3, %%mm3\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm3\n\t" - "psrlq $5, %%mm1\n\t" - "psrlq $5, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $8, %%mm2\n\t" - "psrlq $8, %%mm5\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm7, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); - d += 4; - s += 16; - } -#endif - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { - register int rgb = *(uint32_t*)s; s += 4; - *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); - } -} - -static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_16mask),"m"(green_16mask)); - mm_end = end - 15; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 4%1, %%mm3\n\t" - "punpckldq 8%1, %%mm0\n\t" - "punpckldq 12%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psllq $8, %%mm0\n\t" - "psllq $8, %%mm3\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm3\n\t" - "psrlq $5, %%mm1\n\t" - "psrlq $5, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $19, %%mm2\n\t" - "psrlq $19, %%mm5\n\t" - "pand %2, %%mm2\n\t" - "pand %2, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); - d += 4; - s += 16; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { - register int rgb = *(uint32_t*)s; s += 4; - *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); - } -} - -static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - mm_end = end - 15; -#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) - asm volatile( - "movq %3, %%mm5 \n\t" - "movq %4, %%mm6 \n\t" - "movq %5, %%mm7 \n\t" - ASMALIGN16 - "1: \n\t" - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 4(%1), %%mm3 \n\t" - "punpckldq 8(%1), %%mm0 \n\t" - "punpckldq 12(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm3, %%mm4 \n\t" - "pand %%mm6, %%mm0 \n\t" - "pand %%mm6, %%mm3 \n\t" - "pmaddwd %%mm7, %%mm0 \n\t" - "pmaddwd %%mm7, %%mm3 \n\t" - "pand %%mm5, %%mm1 \n\t" - "pand %%mm5, %%mm4 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "psrld $6, %%mm0 \n\t" - "pslld $10, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - "add $16, %1 \n\t" - "add $8, %0 \n\t" - "cmp %2, %1 \n\t" - " jb 1b \n\t" - : "+r" (d), "+r"(s) - : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) - ); -#else - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_15mask),"m"(green_15mask)); - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 4%1, %%mm3\n\t" - "punpckldq 8%1, %%mm0\n\t" - "punpckldq 12%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psrlq $3, %%mm0\n\t" - "psrlq $3, %%mm3\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm3\n\t" - "psrlq $6, %%mm1\n\t" - "psrlq $6, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $9, %%mm2\n\t" - "psrlq $9, %%mm5\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm7, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); - d += 4; - s += 16; - } -#endif - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { - register int rgb = *(uint32_t*)s; s += 4; - *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); - } -} - -static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_15mask),"m"(green_15mask)); - mm_end = end - 15; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 4%1, %%mm3\n\t" - "punpckldq 8%1, %%mm0\n\t" - "punpckldq 12%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psllq $7, %%mm0\n\t" - "psllq $7, %%mm3\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm3\n\t" - "psrlq $6, %%mm1\n\t" - "psrlq $6, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $19, %%mm2\n\t" - "psrlq $19, %%mm5\n\t" - "pand %2, %%mm2\n\t" - "pand %2, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); - d += 4; - s += 16; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { - register int rgb = *(uint32_t*)s; s += 4; - *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); - } -} - -static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_16mask),"m"(green_16mask)); - mm_end = end - 11; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 3%1, %%mm3\n\t" - "punpckldq 6%1, %%mm0\n\t" - "punpckldq 9%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psrlq $3, %%mm0\n\t" - "psrlq $3, %%mm3\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm3\n\t" - "psrlq $5, %%mm1\n\t" - "psrlq $5, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $8, %%mm2\n\t" - "psrlq $8, %%mm5\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm7, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); - d += 4; - s += 12; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { - const int b= *s++; - const int g= *s++; - const int r= *s++; - *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); - } -} - -static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_16mask),"m"(green_16mask)); - mm_end = end - 15; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 3%1, %%mm3\n\t" - "punpckldq 6%1, %%mm0\n\t" - "punpckldq 9%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psllq $8, %%mm0\n\t" - "psllq $8, %%mm3\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm3\n\t" - "psrlq $5, %%mm1\n\t" - "psrlq $5, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $19, %%mm2\n\t" - "psrlq $19, %%mm5\n\t" - "pand %2, %%mm2\n\t" - "pand %2, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); - d += 4; - s += 12; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { - const int r= *s++; - const int g= *s++; - const int b= *s++; - *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); - } -} - -static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_15mask),"m"(green_15mask)); - mm_end = end - 11; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 3%1, %%mm3\n\t" - "punpckldq 6%1, %%mm0\n\t" - "punpckldq 9%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psrlq $3, %%mm0\n\t" - "psrlq $3, %%mm3\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm3\n\t" - "psrlq $6, %%mm1\n\t" - "psrlq $6, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $9, %%mm2\n\t" - "psrlq $9, %%mm5\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm7, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); - d += 4; - s += 12; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { - const int b= *s++; - const int g= *s++; - const int r= *s++; - *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); - } -} - -static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_15mask),"m"(green_15mask)); - mm_end = end - 15; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 3%1, %%mm3\n\t" - "punpckldq 6%1, %%mm0\n\t" - "punpckldq 9%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psllq $7, %%mm0\n\t" - "psllq $7, %%mm3\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm3\n\t" - "psrlq $6, %%mm1\n\t" - "psrlq $6, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $19, %%mm2\n\t" - "psrlq $19, %%mm5\n\t" - "pand %2, %%mm2\n\t" - "pand %2, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); - d += 4; - s += 12; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { - const int r= *s++; - const int g= *s++; - const int b= *s++; - *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); - } -} - -/* - I use here less accurate approximation by simply - left-shifting the input - value and filling the low order bits with - zeroes. This method improves png's - compression but this scheme cannot reproduce white exactly, since it does not - generate an all-ones maximum value; the net effect is to darken the - image slightly. - - The better method should be "left bit replication": - - 4 3 2 1 0 - --------- - 1 1 0 1 1 - - 7 6 5 4 3 2 1 0 - ---------------- - 1 1 0 1 1 1 1 0 - |=======| |===| - | Leftmost Bits Repeated to Fill Open Bits - | - Original Bits -*/ -static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size) -{ - const uint16_t *end; -#ifdef HAVE_MMX - const uint16_t *mm_end; -#endif - uint8_t *d = (uint8_t *)dst; - const uint16_t *s = (uint16_t *)src; - end = s + src_size/2; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 7; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movq %1, %%mm0\n\t" - "movq %1, %%mm1\n\t" - "movq %1, %%mm2\n\t" - "pand %2, %%mm0\n\t" - "pand %3, %%mm1\n\t" - "pand %4, %%mm2\n\t" - "psllq $3, %%mm0\n\t" - "psrlq $2, %%mm1\n\t" - "psrlq $7, %%mm2\n\t" - "movq %%mm0, %%mm3\n\t" - "movq %%mm1, %%mm4\n\t" - "movq %%mm2, %%mm5\n\t" - "punpcklwd %5, %%mm0\n\t" - "punpcklwd %5, %%mm1\n\t" - "punpcklwd %5, %%mm2\n\t" - "punpckhwd %5, %%mm3\n\t" - "punpckhwd %5, %%mm4\n\t" - "punpckhwd %5, %%mm5\n\t" - "psllq $8, %%mm1\n\t" - "psllq $16, %%mm2\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm2, %%mm0\n\t" - "psllq $8, %%mm4\n\t" - "psllq $16, %%mm5\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm5, %%mm3\n\t" - - "movq %%mm0, %%mm6\n\t" - "movq %%mm3, %%mm7\n\t" - - "movq 8%1, %%mm0\n\t" - "movq 8%1, %%mm1\n\t" - "movq 8%1, %%mm2\n\t" - "pand %2, %%mm0\n\t" - "pand %3, %%mm1\n\t" - "pand %4, %%mm2\n\t" - "psllq $3, %%mm0\n\t" - "psrlq $2, %%mm1\n\t" - "psrlq $7, %%mm2\n\t" - "movq %%mm0, %%mm3\n\t" - "movq %%mm1, %%mm4\n\t" - "movq %%mm2, %%mm5\n\t" - "punpcklwd %5, %%mm0\n\t" - "punpcklwd %5, %%mm1\n\t" - "punpcklwd %5, %%mm2\n\t" - "punpckhwd %5, %%mm3\n\t" - "punpckhwd %5, %%mm4\n\t" - "punpckhwd %5, %%mm5\n\t" - "psllq $8, %%mm1\n\t" - "psllq $16, %%mm2\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm2, %%mm0\n\t" - "psllq $8, %%mm4\n\t" - "psllq $16, %%mm5\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm5, %%mm3\n\t" - - :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) - :"memory"); - /* Borrowed 32 to 24 */ - __asm __volatile( - "movq %%mm0, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "movq %%mm6, %%mm0\n\t" - "movq %%mm7, %%mm1\n\t" - - "movq %%mm4, %%mm6\n\t" - "movq %%mm5, %%mm7\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm1, %%mm3\n\t" - - "psrlq $8, %%mm2\n\t" - "psrlq $8, %%mm3\n\t" - "psrlq $8, %%mm6\n\t" - "psrlq $8, %%mm7\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm1\n\t" - "pand %2, %%mm4\n\t" - "pand %2, %%mm5\n\t" - "pand %3, %%mm2\n\t" - "pand %3, %%mm3\n\t" - "pand %3, %%mm6\n\t" - "pand %3, %%mm7\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm3, %%mm1\n\t" - "por %%mm6, %%mm4\n\t" - "por %%mm7, %%mm5\n\t" - - "movq %%mm1, %%mm2\n\t" - "movq %%mm4, %%mm3\n\t" - "psllq $48, %%mm2\n\t" - "psllq $32, %%mm3\n\t" - "pand %4, %%mm2\n\t" - "pand %5, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "psrlq $16, %%mm1\n\t" - "psrlq $32, %%mm4\n\t" - "psllq $16, %%mm5\n\t" - "por %%mm3, %%mm1\n\t" - "pand %6, %%mm5\n\t" - "por %%mm5, %%mm4\n\t" - - MOVNTQ" %%mm0, %0\n\t" - MOVNTQ" %%mm1, 8%0\n\t" - MOVNTQ" %%mm4, 16%0" - - :"=m"(*d) - :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) - :"memory"); - d += 24; - s += 8; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { - register uint16_t bgr; - bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; - } -} - -static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_size) -{ - const uint16_t *end; -#ifdef HAVE_MMX - const uint16_t *mm_end; -#endif - uint8_t *d = (uint8_t *)dst; - const uint16_t *s = (const uint16_t *)src; - end = s + src_size/2; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 7; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movq %1, %%mm0\n\t" - "movq %1, %%mm1\n\t" - "movq %1, %%mm2\n\t" - "pand %2, %%mm0\n\t" - "pand %3, %%mm1\n\t" - "pand %4, %%mm2\n\t" - "psllq $3, %%mm0\n\t" - "psrlq $3, %%mm1\n\t" - "psrlq $8, %%mm2\n\t" - "movq %%mm0, %%mm3\n\t" - "movq %%mm1, %%mm4\n\t" - "movq %%mm2, %%mm5\n\t" - "punpcklwd %5, %%mm0\n\t" - "punpcklwd %5, %%mm1\n\t" - "punpcklwd %5, %%mm2\n\t" - "punpckhwd %5, %%mm3\n\t" - "punpckhwd %5, %%mm4\n\t" - "punpckhwd %5, %%mm5\n\t" - "psllq $8, %%mm1\n\t" - "psllq $16, %%mm2\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm2, %%mm0\n\t" - "psllq $8, %%mm4\n\t" - "psllq $16, %%mm5\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm5, %%mm3\n\t" - - "movq %%mm0, %%mm6\n\t" - "movq %%mm3, %%mm7\n\t" - - "movq 8%1, %%mm0\n\t" - "movq 8%1, %%mm1\n\t" - "movq 8%1, %%mm2\n\t" - "pand %2, %%mm0\n\t" - "pand %3, %%mm1\n\t" - "pand %4, %%mm2\n\t" - "psllq $3, %%mm0\n\t" - "psrlq $3, %%mm1\n\t" - "psrlq $8, %%mm2\n\t" - "movq %%mm0, %%mm3\n\t" - "movq %%mm1, %%mm4\n\t" - "movq %%mm2, %%mm5\n\t" - "punpcklwd %5, %%mm0\n\t" - "punpcklwd %5, %%mm1\n\t" - "punpcklwd %5, %%mm2\n\t" - "punpckhwd %5, %%mm3\n\t" - "punpckhwd %5, %%mm4\n\t" - "punpckhwd %5, %%mm5\n\t" - "psllq $8, %%mm1\n\t" - "psllq $16, %%mm2\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm2, %%mm0\n\t" - "psllq $8, %%mm4\n\t" - "psllq $16, %%mm5\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm5, %%mm3\n\t" - :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) - :"memory"); - /* Borrowed 32 to 24 */ - __asm __volatile( - "movq %%mm0, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "movq %%mm6, %%mm0\n\t" - "movq %%mm7, %%mm1\n\t" - - "movq %%mm4, %%mm6\n\t" - "movq %%mm5, %%mm7\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm1, %%mm3\n\t" - - "psrlq $8, %%mm2\n\t" - "psrlq $8, %%mm3\n\t" - "psrlq $8, %%mm6\n\t" - "psrlq $8, %%mm7\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm1\n\t" - "pand %2, %%mm4\n\t" - "pand %2, %%mm5\n\t" - "pand %3, %%mm2\n\t" - "pand %3, %%mm3\n\t" - "pand %3, %%mm6\n\t" - "pand %3, %%mm7\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm3, %%mm1\n\t" - "por %%mm6, %%mm4\n\t" - "por %%mm7, %%mm5\n\t" - - "movq %%mm1, %%mm2\n\t" - "movq %%mm4, %%mm3\n\t" - "psllq $48, %%mm2\n\t" - "psllq $32, %%mm3\n\t" - "pand %4, %%mm2\n\t" - "pand %5, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "psrlq $16, %%mm1\n\t" - "psrlq $32, %%mm4\n\t" - "psllq $16, %%mm5\n\t" - "por %%mm3, %%mm1\n\t" - "pand %6, %%mm5\n\t" - "por %%mm5, %%mm4\n\t" - - MOVNTQ" %%mm0, %0\n\t" - MOVNTQ" %%mm1, 8%0\n\t" - MOVNTQ" %%mm4, 16%0" - - :"=m"(*d) - :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) - :"memory"); - d += 24; - s += 8; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#endif - while(s < end) - { - register uint16_t bgr; - bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; - } -} - -static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size) -{< |