diff options
author | michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-11-26 00:31:43 +0000 |
---|---|---|
committer | michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-11-26 00:31:43 +0000 |
commit | 5645836ad4ad51848216fe431a3393845caa1e0a (patch) | |
tree | ff832372dcd0be6a47751daf772e9edf938267cf /postproc | |
parent | 8188f8359cc532987eb1152e288d2e66301eb910 (diff) | |
download | mpv-5645836ad4ad51848216fe431a3393845caa1e0a.tar.bz2 mpv-5645836ad4ad51848216fe431a3393845caa1e0a.tar.xz |
runtime cpu detection
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@3127 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc')
-rw-r--r-- | postproc/swscale.c | 1721 | ||||
-rw-r--r-- | postproc/swscale_template.c | 190 |
2 files changed, 87 insertions, 1824 deletions
diff --git a/postproc/swscale.c b/postproc/swscale.c index 56e53f8bf6..7afb61b2b4 100644 --- a/postproc/swscale.c +++ b/postproc/swscale.c @@ -10,6 +10,7 @@ #include "../config.h" #include "swscale.h" #include "../mmx_defs.h" +#include "../cpudetect.h" #undef MOVNTQ #undef PAVGB @@ -37,26 +38,18 @@ more intelligent missalignment avoidance for the horizontal scaler bicubic scaler dither in C change the distance of the u & v buffer +how to differenciate between x86 an C at runtime ?! (using C for now) */ #define ABS(a) ((a) > 0 ? (a) : (-(a))) #define MIN(a,b) ((a) > (b) ? (b) : (a)) #define MAX(a,b) ((a) < (b) ? (b) : (a)) -#ifdef HAVE_MMX2 -#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" -#elif defined (HAVE_3DNOW) -#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" -#endif - -#ifdef HAVE_MMX2 -#define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" -#else -#define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" +#ifdef ARCH_X86 +#define CAN_COMPILE_X86_ASM #endif - -#ifdef HAVE_MMX +#ifdef CAN_COMPILE_X86_ASM static uint64_t __attribute__((aligned(8))) yCoeff= 0x2568256825682568LL; static uint64_t __attribute__((aligned(8))) vrCoeff= 0x3343334333433343LL; static uint64_t __attribute__((aligned(8))) ubCoeff= 0x40cf40cf40cf40cfLL; @@ -98,11 +91,9 @@ static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; static uint64_t __attribute__((aligned(8))) temp0; static uint64_t __attribute__((aligned(8))) asm_yalpha1; static uint64_t __attribute__((aligned(8))) asm_uvalpha1; -#endif // temporary storage for 4 yuv lines: // 16bit for now (mmx likes it more compact) -#ifdef HAVE_MMX static uint16_t __attribute__((aligned(8))) pix_buf_y[4][2048]; static uint16_t __attribute__((aligned(8))) pix_buf_uv[2][2048*2]; #else @@ -127,483 +118,14 @@ static int yuvtab_0c92[256]; static int yuvtab_1a1e[256]; static int yuvtab_40cf[256]; -#ifdef HAVE_MMX2 +#ifdef CAN_COMPILE_X86_ASM static uint8_t funnyYCode[10000]; static uint8_t funnyUVCode[10000]; #endif static int canMMX2BeUsed=0; -#define FULL_YSCALEYUV2RGB \ - "pxor %%mm7, %%mm7 \n\t"\ - "movd %6, %%mm6 \n\t" /*yalpha1*/\ - "punpcklwd %%mm6, %%mm6 \n\t"\ - "punpcklwd %%mm6, %%mm6 \n\t"\ - "movd %7, %%mm5 \n\t" /*uvalpha1*/\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "xorl %%eax, %%eax \n\t"\ - ".balign 16 \n\t"\ - "1: \n\t"\ - "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ - "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ - "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ - "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ - "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "movq 4096(%2, %%eax,2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - "movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ - "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ - "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw w400, %%mm3 \n\t" /* 8(U-128)*/\ - "pmulhw yCoeff, %%mm1 \n\t"\ -\ -\ - "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "pmulhw ubCoeff, %%mm3 \n\t"\ - "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "pmulhw ugCoeff, %%mm2 \n\t"\ - "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ - "psubw w400, %%mm0 \n\t" /* (V-128)8*/\ -\ -\ - "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\ - "pmulhw vrCoeff, %%mm0 \n\t"\ - "pmulhw vgCoeff, %%mm4 \n\t"\ - "paddw %%mm1, %%mm3 \n\t" /* B*/\ - "paddw %%mm1, %%mm0 \n\t" /* R*/\ - "packuswb %%mm3, %%mm3 \n\t"\ -\ - "packuswb %%mm0, %%mm0 \n\t"\ - "paddw %%mm4, %%mm2 \n\t"\ - "paddw %%mm2, %%mm1 \n\t" /* G*/\ -\ - "packuswb %%mm1, %%mm1 \n\t" - -#define YSCALEYUV2RGB \ - "movd %6, %%mm6 \n\t" /*yalpha1*/\ - "punpcklwd %%mm6, %%mm6 \n\t"\ - "punpcklwd %%mm6, %%mm6 \n\t"\ - "movq %%mm6, asm_yalpha1 \n\t"\ - "movd %7, %%mm5 \n\t" /*uvalpha1*/\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "movq %%mm5, asm_uvalpha1 \n\t"\ - "xorl %%eax, %%eax \n\t"\ - ".balign 16 \n\t"\ - "1: \n\t"\ - "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ - "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ - "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ - "movq asm_uvalpha1, %%mm0 \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ - "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ - "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ - "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ - "psubw w400, %%mm3 \n\t" /* (U-128)8*/\ - "psubw w400, %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw ugCoeff, %%mm3 \n\t"\ - "pmulhw vgCoeff, %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ - "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ - "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\ - "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\ - "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ - "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ - "pmulhw asm_yalpha1, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "pmulhw asm_yalpha1, %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - "pmulhw ubCoeff, %%mm2 \n\t"\ - "pmulhw vrCoeff, %%mm5 \n\t"\ - "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw yCoeff, %%mm1 \n\t"\ - "pmulhw yCoeff, %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - "pxor %%mm7, %%mm7 \n\t" - -#define YSCALEYUV2RGB1 \ - "xorl %%eax, %%eax \n\t"\ - ".balign 16 \n\t"\ - "1: \n\t"\ - "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ - "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "psubw w400, %%mm3 \n\t" /* (U-128)8*/\ - "psubw w400, %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw ugCoeff, %%mm3 \n\t"\ - "pmulhw vgCoeff, %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ - "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "pmulhw ubCoeff, %%mm2 \n\t"\ - "pmulhw vrCoeff, %%mm5 \n\t"\ - "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw yCoeff, %%mm1 \n\t"\ - "pmulhw yCoeff, %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - "pxor %%mm7, %%mm7 \n\t" - -// do vertical chrominance interpolation -#define YSCALEYUV2RGB1b \ - "xorl %%eax, %%eax \n\t"\ - ".balign 16 \n\t"\ - "1: \n\t"\ - "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ - "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ - "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ - "psrlw $5, %%mm3 \n\t"\ - "psrlw $5, %%mm4 \n\t"\ - "psubw w400, %%mm3 \n\t" /* (U-128)8*/\ - "psubw w400, %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw ugCoeff, %%mm3 \n\t"\ - "pmulhw vgCoeff, %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ - "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "pmulhw ubCoeff, %%mm2 \n\t"\ - "pmulhw vrCoeff, %%mm5 \n\t"\ - "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw yCoeff, %%mm1 \n\t"\ - "pmulhw yCoeff, %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - "pxor %%mm7, %%mm7 \n\t" - -#define WRITEBGR32 \ - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq %%mm2, %%mm1 \n\t" /* B */\ - "movq %%mm5, %%mm6 \n\t" /* R */\ - "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ - "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ - "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ - "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ - "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ - "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ - "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ - "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ - "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ - "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ -\ - MOVNTQ(%%mm0, (%4, %%eax, 4))\ - MOVNTQ(%%mm2, 8(%4, %%eax, 4))\ - MOVNTQ(%%mm1, 16(%4, %%eax, 4))\ - MOVNTQ(%%mm3, 24(%4, %%eax, 4))\ -\ - "addl $8, %%eax \n\t"\ - "cmpl %5, %%eax \n\t"\ - " jb 1b \n\t" - -#define WRITEBGR16 \ - "pand bF8, %%mm2 \n\t" /* B */\ - "pand bFC, %%mm4 \n\t" /* G */\ - "pand bF8, %%mm5 \n\t" /* R */\ - "psrlq $3, %%mm2 \n\t"\ -\ - "movq %%mm2, %%mm1 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ -\ - "punpcklbw %%mm7, %%mm3 \n\t"\ - "punpcklbw %%mm5, %%mm2 \n\t"\ - "punpckhbw %%mm7, %%mm4 \n\t"\ - "punpckhbw %%mm5, %%mm1 \n\t"\ -\ - "psllq $3, %%mm3 \n\t"\ - "psllq $3, %%mm4 \n\t"\ -\ - "por %%mm3, %%mm2 \n\t"\ - "por %%mm4, %%mm1 \n\t"\ -\ - MOVNTQ(%%mm2, (%4, %%eax, 2))\ - MOVNTQ(%%mm1, 8(%4, %%eax, 2))\ -\ - "addl $8, %%eax \n\t"\ - "cmpl %5, %%eax \n\t"\ - " jb 1b \n\t" - -#define WRITEBGR15 \ - "pand bF8, %%mm2 \n\t" /* B */\ - "pand bF8, %%mm4 \n\t" /* G */\ - "pand bF8, %%mm5 \n\t" /* R */\ - "psrlq $3, %%mm2 \n\t"\ - "psrlq $1, %%mm5 \n\t"\ -\ - "movq %%mm2, %%mm1 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ -\ - "punpcklbw %%mm7, %%mm3 \n\t"\ - "punpcklbw %%mm5, %%mm2 \n\t"\ - "punpckhbw %%mm7, %%mm4 \n\t"\ - "punpckhbw %%mm5, %%mm1 \n\t"\ -\ - "psllq $2, %%mm3 \n\t"\ - "psllq $2, %%mm4 \n\t"\ -\ - "por %%mm3, %%mm2 \n\t"\ - "por %%mm4, %%mm1 \n\t"\ -\ - MOVNTQ(%%mm2, (%4, %%eax, 2))\ - MOVNTQ(%%mm1, 8(%4, %%eax, 2))\ -\ - "addl $8, %%eax \n\t"\ - "cmpl %5, %%eax \n\t"\ - " jb 1b \n\t" - -#define WRITEBGR24OLD \ - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq %%mm2, %%mm1 \n\t" /* B */\ - "movq %%mm5, %%mm6 \n\t" /* R */\ - "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ - "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ - "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ - "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ - "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ - "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ - "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ - "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ - "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ - "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ -\ - "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ - "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\ - "pand bm00000111, %%mm4 \n\t" /* 00000RGB 0 */\ - "pand bm11111000, %%mm0 \n\t" /* 00RGB000 0.5 */\ - "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\ - "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\ - "psllq $48, %%mm2 \n\t" /* GB000000 1 */\ - "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ -\ - "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\ - "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\ - "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\ - "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\ - "pand bm00001111, %%mm2 \n\t" /* 0000RGBR 1 */\ - "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\ - "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\ - "pand bm00000111, %%mm4 \n\t" /* 00000RGB 2 */\ - "pand bm11111000, %%mm1 \n\t" /* 00RGB000 2.5 */\ - "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\ - "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\ - "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\ - "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\ -\ - "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\ - "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\ - "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\ - "pand bm00000111, %%mm5 \n\t" /* 00000RGB 3 */\ - "pand bm11111000, %%mm3 \n\t" /* 00RGB000 3.5 */\ - "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\ - "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\ - "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\ -\ - MOVNTQ(%%mm0, (%%ebx))\ - MOVNTQ(%%mm2, 8(%%ebx))\ - MOVNTQ(%%mm3, 16(%%ebx))\ - "addl $24, %%ebx \n\t"\ -\ - "addl $8, %%eax \n\t"\ - "cmpl %5, %%eax \n\t"\ - " jb 1b \n\t" - -#define WRITEBGR24MMX \ - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq %%mm2, %%mm1 \n\t" /* B */\ - "movq %%mm5, %%mm6 \n\t" /* R */\ - "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ - "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ - "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ - "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ - "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ - "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ - "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ - "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ - "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ - "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ -\ - "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ - "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\ - "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\ - "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\ -\ - "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\ - "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\ - "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\ - "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\ -\ - "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\ - "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\ - "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\ - "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\ -\ - "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\ - "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\ - "psllq $40, %%mm2 \n\t" /* GB000000 1 */\ - "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ - MOVNTQ(%%mm0, (%%ebx))\ -\ - "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\ - "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\ - "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\ - "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\ - MOVNTQ(%%mm6, 8(%%ebx))\ -\ - "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\ - "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\ - "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\ - MOVNTQ(%%mm5, 16(%%ebx))\ -\ - "addl $24, %%ebx \n\t"\ -\ - "addl $8, %%eax \n\t"\ - "cmpl %5, %%eax \n\t"\ - " jb 1b \n\t" - -#define WRITEBGR24MMX2 \ - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq M24A, %%mm0 \n\t"\ - "movq M24C, %%mm7 \n\t"\ - "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ - "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ - "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ -\ - "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\ - "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\ - "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\ -\ - "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\ - "por %%mm1, %%mm6 \n\t"\ - "por %%mm3, %%mm6 \n\t"\ - MOVNTQ(%%mm6, (%%ebx))\ -\ - "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\ - "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\ - "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ - "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ -\ - "pand M24B, %%mm1 \n\t" /* B5 B4 B3 */\ - "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ - "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ -\ - "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\ - "por %%mm3, %%mm6 \n\t"\ - MOVNTQ(%%mm6, 8(%%ebx))\ -\ - "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\ - "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\ - "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\ -\ - "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ - "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ - "pand M24B, %%mm6 \n\t" /* R7 R6 R5 */\ -\ - "por %%mm1, %%mm3 \n\t"\ - "por %%mm3, %%mm6 \n\t"\ - MOVNTQ(%%mm6, 16(%%ebx))\ -\ - "addl $24, %%ebx \n\t"\ -\ - "addl $8, %%eax \n\t"\ - "cmpl %5, %%eax \n\t"\ - " jb 1b \n\t" - -#ifdef HAVE_MMX2 -#define WRITEBGR24 WRITEBGR24MMX2 -#else -#define WRITEBGR24 WRITEBGR24MMX -#endif - -#ifdef HAVE_MMX +#ifdef CAN_COMPILE_X86_ASM void in_asm_used_var_warning_killer() { int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ @@ -613,888 +135,57 @@ void in_asm_used_var_warning_killer() } #endif -static inline void yuv2yuv(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstw, int yalpha, int uvalpha) -{ - int yalpha1=yalpha^4095; - int uvalpha1=uvalpha^4095; - int i; - - asm volatile ("\n\t"::: "memory"); - - for(i=0;i<dstw;i++) - { - ((uint8_t*)dest)[i] = (buf0[i]*yalpha1+buf1[i]*yalpha)>>19; - } - - if(uvalpha != -1) - { - for(i=0; i<(dstw>>1); i++) - { - ((uint8_t*)uDest)[i] = (uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19; - ((uint8_t*)vDest)[i] = (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19; - } - } -} - -/** - * vertical scale YV12 to RGB - */ -static inline void yuv2rgbX(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, - uint8_t *dest, int dstw, int yalpha, int uvalpha, int dstbpp) -{ - int yalpha1=yalpha^4095; - int uvalpha1=uvalpha^4095; - - if(fullUVIpol) - { - -#ifdef HAVE_MMX - if(dstbpp == 32) - { - asm volatile( - - -FULL_YSCALEYUV2RGB - "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG - "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 - - "movq %%mm3, %%mm1 \n\t" - "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 - "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 - - MOVNTQ(%%mm3, (%4, %%eax, 4)) - MOVNTQ(%%mm1, 8(%4, %%eax, 4)) - - "addl $4, %%eax \n\t" - "cmpl %5, %%eax \n\t" - " jb 1b \n\t" - - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax" - ); - } - else if(dstbpp==24) - { - asm volatile( - -FULL_YSCALEYUV2RGB - - // lsb ... msb - "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG - "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 - - "movq %%mm3, %%mm1 \n\t" - "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 - "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 - - "movq %%mm3, %%mm2 \n\t" // BGR0BGR0 - "psrlq $8, %%mm3 \n\t" // GR0BGR00 - "pand bm00000111, %%mm2 \n\t" // BGR00000 - "pand bm11111000, %%mm3 \n\t" // 000BGR00 - "por %%mm2, %%mm3 \n\t" // BGRBGR00 - "movq %%mm1, %%mm2 \n\t" - "psllq $48, %%mm1 \n\t" // 000000BG - "por %%mm1, %%mm3 \n\t" // BGRBGRBG - - "movq %%mm2, %%mm1 \n\t" // BGR0BGR0 - "psrld $16, %%mm2 \n\t" // R000R000 - "psrlq $24, %%mm1 \n\t" // 0BGR0000 - "por %%mm2, %%mm1 \n\t" // RBGRR000 - - "movl %4, %%ebx \n\t" - "addl %%eax, %%ebx \n\t" - -#ifdef HAVE_MMX2 - //FIXME Alignment - "movntq %%mm3, (%%ebx, %%eax, 2)\n\t" - "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t" -#else - "movd %%mm3, (%%ebx, %%eax, 2) \n\t" - "psrlq $32, %%mm3 \n\t" - "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t" - "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t" -#endif - "addl $4, %%eax \n\t" - "cmpl %5, %%eax \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax", "%ebx" - ); - } - else if(dstbpp==15) - { - asm volatile( - -FULL_YSCALEYUV2RGB -#ifdef DITHER1XBPP - "paddusb g5Dither, %%mm1 \n\t" - "paddusb r5Dither, %%mm0 \n\t" - "paddusb b5Dither, %%mm3 \n\t" -#endif - "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G - "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B - "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R - - "psrlw $3, %%mm3 \n\t" - "psllw $2, %%mm1 \n\t" - "psllw $7, %%mm0 \n\t" - "pand g15Mask, %%mm1 \n\t" - "pand r15Mask, %%mm0 \n\t" - - "por %%mm3, %%mm1 \n\t" - "por %%mm1, %%mm0 \n\t" - - MOVNTQ(%%mm0, (%4, %%eax, 2)) - - "addl $4, %%eax \n\t" - "cmpl %5, %%eax \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax" - ); - } - else if(dstbpp==16) - { - asm volatile( - -FULL_YSCALEYUV2RGB -#ifdef DITHER1XBPP - "paddusb g6Dither, %%mm1 \n\t" - "paddusb r5Dither, %%mm0 \n\t" - "paddusb b5Dither, %%mm3 \n\t" -#endif - "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G - "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B - "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R - - "psrlw $3, %%mm3 \n\t" - "psllw $3, %%mm1 \n\t" - "psllw $8, %%mm0 \n\t" - "pand g16Mask, %%mm1 \n\t" - "pand r16Mask, %%mm0 \n\t" - - "por %%mm3, %%mm1 \n\t" - "por %%mm1, %%mm0 \n\t" - - MOVNTQ(%%mm0, (%4, %%eax, 2)) - - "addl $4, %%eax \n\t" - "cmpl %5, %%eax \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax" - ); - } -#else - asm volatile ("\n\t"::: "memory"); - - if(dstbpp==32 || dstbpp==24) - { - int i; - for(i=0;i<dstw;i++){ - // vertical linear interpolation && yuv2rgb in a single step: - int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); - dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; - dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; - dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; - dest+=dstbpp>>3; - } - } - else if(dstbpp==16) - { - int i; - for(i=0;i<dstw;i++){ - // vertical linear interpolation && yuv2rgb in a single step: - int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); - - ((uint16_t*)dest)[i] = - clip_table16b[(Y + yuvtab_40cf[U]) >>13] | - clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | - clip_table16r[(Y + yuvtab_3343[V]) >>13]; - } - } - else if(dstbpp==15) - { - int i; - for(i=0;i<dstw;i++){ - // vertical linear interpolation && yuv2rgb in a single step: - int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); - - ((uint16_t*)dest)[i] = - clip_table15b[(Y + yuvtab_40cf[U]) >>13] | - clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | - clip_table15r[(Y + yuvtab_3343[V]) >>13]; - } - } -#endif - }//FULL_UV_IPOL - else - { -#ifdef HAVE_MMX - if(dstbpp == 32) - { - asm volatile( - YSCALEYUV2RGB - WRITEBGR32 - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax" - ); - } - else if(dstbpp==24) - { - asm volatile( - "movl %4, %%ebx \n\t" - YSCALEYUV2RGB - WRITEBGR24 - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax", "%ebx" - ); - } - else if(dstbpp==15) - { - asm volatile( - YSCALEYUV2RGB - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb b5Dither, %%mm2 \n\t" - "paddusb g5Dither, %%mm4 \n\t" - "paddusb r5Dither, %%mm5 \n\t" -#endif - - WRITEBGR15 - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax" - ); - } - else if(dstbpp==16) - { - asm volatile( - YSCALEYUV2RGB - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb b5Dither, %%mm2 \n\t" - "paddusb g6Dither, %%mm4 \n\t" - "paddusb r5Dither, %%mm5 \n\t" -#endif - - WRITEBGR16 - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax" - ); - } -#else - asm volatile ("\n\t"::: "memory"); - - if(dstbpp==32) - { - int i; - for(i=0; i<dstw-1; i+=2){ - // vertical linear interpolation && yuv2rgb in a single step: - int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; - int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; - int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); - int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); - - int Cb= yuvtab_40cf[U]; - int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; - int Cr= yuvtab_3343[V]; - - dest[4*i+0]=clip_table[((Y1 + Cb) >>13)]; - dest[4*i+1]=clip_table[((Y1 + Cg) >>13)]; - dest[4*i+2]=clip_table[((Y1 + Cr) >>13)]; - - dest[4*i+4]=clip_table[((Y2 + Cb) >>13)]; - dest[4*i+5]=clip_table[((Y2 + Cg) >>13)]; - dest[4*i+6]=clip_table[((Y2 + Cr) >>13)]; - } - } - if(dstbpp==24) - { - int i; - for(i=0; i<dstw-1; i+=2){ - // vertical linear interpolation && yuv2rgb in a single step: - int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; - int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; - int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); - int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); - - int Cb= yuvtab_40cf[U]; - int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; - int Cr= yuvtab_3343[V]; - - dest[0]=clip_table[((Y1 + Cb) >>13)]; - dest[1]=clip_table[((Y1 + Cg) >>13)]; - dest[2]=clip_table[((Y1 + Cr) >>13)]; - - dest[3]=clip_table[((Y2 + Cb) >>13)]; - dest[4]=clip_table[((Y2 + Cg) >>13)]; - dest[5]=clip_table[((Y2 + Cr) >>13)]; - dest+=6; - } - } - else if(dstbpp==16) - { - int i; - for(i=0; i<dstw-1; i+=2){ - // vertical linear interpolation && yuv2rgb in a single step: - int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; - int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; - int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); - int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); - - int Cb= yuvtab_40cf[U]; - int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; - int Cr= yuvtab_3343[V]; - - ((uint16_t*)dest)[i] = - clip_table16b[(Y1 + Cb) >>13] | - clip_table16g[(Y1 + Cg) >>13] | - clip_table16r[(Y1 + Cr) >>13]; - - ((uint16_t*)dest)[i+1] = - clip_table16b[(Y2 + Cb) >>13] | - clip_table16g[(Y2 + Cg) >>13] | - clip_table16r[(Y2 + Cr) >>13]; - } - } - else if(dstbpp==15) - { - int i; - for(i=0; i<dstw-1; i+=2){ - // vertical linear interpolation && yuv2rgb in a single step: - int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; - int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; - int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); - int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); - - int Cb= yuvtab_40cf[U]; - int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; - int Cr= yuvtab_3343[V]; - - ((uint16_t*)dest)[i] = - clip_table15b[(Y1 + Cb) >>13] | - clip_table15g[(Y1 + Cg) >>13] | - clip_table15r[(Y1 + Cr) >>13]; - - ((uint16_t*)dest)[i+1] = - clip_table15b[(Y2 + Cb) >>13] | - clip_table15g[(Y2 + Cg) >>13] | - clip_table15r[(Y2 + Cr) >>13]; - } - } -#endif - } //!FULL_UV_IPOL -} - -/** - * YV12 to RGB without scaling or interpolating - */ -static inline void yuv2rgb1(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, - uint8_t *dest, int dstw, int yalpha, int uvalpha, int dstbpp) -{ - int uvalpha1=uvalpha^4095; -#ifdef HAVE_MMX - int yalpha1=yalpha^4095; -#endif - - if(fullUVIpol || allwaysIpol) - { - yuv2rgbX(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp); - return; - } - if( yalpha > 2048 ) buf0 = buf1; - -#ifdef HAVE_MMX - if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster - { - if(dstbpp == 32) - { - asm volatile( - YSCALEYUV2RGB1 - WRITEBGR32 - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax" - ); - } - else if(dstbpp==24) - { - asm volatile( - "movl %4, %%ebx \n\t" - YSCALEYUV2RGB1 - WRITEBGR24 - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax", "%ebx" - ); - } - else if(dstbpp==15) - { - asm volatile( - YSCALEYUV2RGB1 - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb b5Dither, %%mm2 \n\t" - "paddusb g5Dither, %%mm4 \n\t" - "paddusb r5Dither, %%mm5 \n\t" -#endif - WRITEBGR15 - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax" - ); - } - else if(dstbpp==16) - { - asm volatile( - YSCALEYUV2RGB1 - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb b5Dither, %%mm2 \n\t" - "paddusb g6Dither, %%mm4 \n\t" - "paddusb r5Dither, %%mm5 \n\t" -#endif - - WRITEBGR16 - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), - "m" (yalpha1), "m" (uvalpha1) - : "%eax" - ); - } - } - else - { - if(dstbpp == 32) - { - asm volatile( - YSCALEYUV2RGB1b |