summaryrefslogtreecommitdiffstats
path: root/postproc
diff options
context:
space:
mode:
authormichael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-11-26 00:31:43 +0000
committermichael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-11-26 00:31:43 +0000
commit5645836ad4ad51848216fe431a3393845caa1e0a (patch)
treeff832372dcd0be6a47751daf772e9edf938267cf /postproc
parent8188f8359cc532987eb1152e288d2e66301eb910 (diff)
downloadmpv-5645836ad4ad51848216fe431a3393845caa1e0a.tar.bz2
mpv-5645836ad4ad51848216fe431a3393845caa1e0a.tar.xz
runtime cpu detection
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@3127 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc')
-rw-r--r--postproc/swscale.c1721
-rw-r--r--postproc/swscale_template.c190
2 files changed, 87 insertions, 1824 deletions
diff --git a/postproc/swscale.c b/postproc/swscale.c
index 56e53f8bf6..7afb61b2b4 100644
--- a/postproc/swscale.c
+++ b/postproc/swscale.c
@@ -10,6 +10,7 @@
#include "../config.h"
#include "swscale.h"
#include "../mmx_defs.h"
+#include "../cpudetect.h"
#undef MOVNTQ
#undef PAVGB
@@ -37,26 +38,18 @@ more intelligent missalignment avoidance for the horizontal scaler
bicubic scaler
dither in C
change the distance of the u & v buffer
+how to differenciate between x86 an C at runtime ?! (using C for now)
*/
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
#define MIN(a,b) ((a) > (b) ? (b) : (a))
#define MAX(a,b) ((a) < (b) ? (b) : (a))
-#ifdef HAVE_MMX2
-#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif defined (HAVE_3DNOW)
-#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
-#endif
-
-#ifdef HAVE_MMX2
-#define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
-#else
-#define MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
+#ifdef ARCH_X86
+#define CAN_COMPILE_X86_ASM
#endif
-
-#ifdef HAVE_MMX
+#ifdef CAN_COMPILE_X86_ASM
static uint64_t __attribute__((aligned(8))) yCoeff= 0x2568256825682568LL;
static uint64_t __attribute__((aligned(8))) vrCoeff= 0x3343334333433343LL;
static uint64_t __attribute__((aligned(8))) ubCoeff= 0x40cf40cf40cf40cfLL;
@@ -98,11 +91,9 @@ static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
static uint64_t __attribute__((aligned(8))) temp0;
static uint64_t __attribute__((aligned(8))) asm_yalpha1;
static uint64_t __attribute__((aligned(8))) asm_uvalpha1;
-#endif
// temporary storage for 4 yuv lines:
// 16bit for now (mmx likes it more compact)
-#ifdef HAVE_MMX
static uint16_t __attribute__((aligned(8))) pix_buf_y[4][2048];
static uint16_t __attribute__((aligned(8))) pix_buf_uv[2][2048*2];
#else
@@ -127,483 +118,14 @@ static int yuvtab_0c92[256];
static int yuvtab_1a1e[256];
static int yuvtab_40cf[256];
-#ifdef HAVE_MMX2
+#ifdef CAN_COMPILE_X86_ASM
static uint8_t funnyYCode[10000];
static uint8_t funnyUVCode[10000];
#endif
static int canMMX2BeUsed=0;
-#define FULL_YSCALEYUV2RGB \
- "pxor %%mm7, %%mm7 \n\t"\
- "movd %6, %%mm6 \n\t" /*yalpha1*/\
- "punpcklwd %%mm6, %%mm6 \n\t"\
- "punpcklwd %%mm6, %%mm6 \n\t"\
- "movd %7, %%mm5 \n\t" /*uvalpha1*/\
- "punpcklwd %%mm5, %%mm5 \n\t"\
- "punpcklwd %%mm5, %%mm5 \n\t"\
- "xorl %%eax, %%eax \n\t"\
- ".balign 16 \n\t"\
- "1: \n\t"\
- "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\
- "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\
- "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\
- "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\
- "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
- "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
- "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
- "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
- "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
- "movq 4096(%2, %%eax,2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
- "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
- "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
- "movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
- "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
- "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
- "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
- "psubw w400, %%mm3 \n\t" /* 8(U-128)*/\
- "pmulhw yCoeff, %%mm1 \n\t"\
-\
-\
- "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
- "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
- "pmulhw ubCoeff, %%mm3 \n\t"\
- "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
- "pmulhw ugCoeff, %%mm2 \n\t"\
- "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
- "psubw w400, %%mm0 \n\t" /* (V-128)8*/\
-\
-\
- "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
- "pmulhw vrCoeff, %%mm0 \n\t"\
- "pmulhw vgCoeff, %%mm4 \n\t"\
- "paddw %%mm1, %%mm3 \n\t" /* B*/\
- "paddw %%mm1, %%mm0 \n\t" /* R*/\
- "packuswb %%mm3, %%mm3 \n\t"\
-\
- "packuswb %%mm0, %%mm0 \n\t"\
- "paddw %%mm4, %%mm2 \n\t"\
- "paddw %%mm2, %%mm1 \n\t" /* G*/\
-\
- "packuswb %%mm1, %%mm1 \n\t"
-
-#define YSCALEYUV2RGB \
- "movd %6, %%mm6 \n\t" /*yalpha1*/\
- "punpcklwd %%mm6, %%mm6 \n\t"\
- "punpcklwd %%mm6, %%mm6 \n\t"\
- "movq %%mm6, asm_yalpha1 \n\t"\
- "movd %7, %%mm5 \n\t" /*uvalpha1*/\
- "punpcklwd %%mm5, %%mm5 \n\t"\
- "punpcklwd %%mm5, %%mm5 \n\t"\
- "movq %%mm5, asm_uvalpha1 \n\t"\
- "xorl %%eax, %%eax \n\t"\
- ".balign 16 \n\t"\
- "1: \n\t"\
- "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\
- "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\
- "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
- "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
- "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
- "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
- "movq asm_uvalpha1, %%mm0 \n\t"\
- "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
- "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
- "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
- "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
- "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
- "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
- "psubw w400, %%mm3 \n\t" /* (U-128)8*/\
- "psubw w400, %%mm4 \n\t" /* (V-128)8*/\
- "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
- "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
- "pmulhw ugCoeff, %%mm3 \n\t"\
- "pmulhw vgCoeff, %%mm4 \n\t"\
- /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
- "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\
- "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\
- "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\
- "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\
- "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
- "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
- "pmulhw asm_yalpha1, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
- "pmulhw asm_yalpha1, %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
- "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
- "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
- "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
- "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
- "pmulhw ubCoeff, %%mm2 \n\t"\
- "pmulhw vrCoeff, %%mm5 \n\t"\
- "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
- "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
- "pmulhw yCoeff, %%mm1 \n\t"\
- "pmulhw yCoeff, %%mm7 \n\t"\
- /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
- "paddw %%mm3, %%mm4 \n\t"\
- "movq %%mm2, %%mm0 \n\t"\
- "movq %%mm5, %%mm6 \n\t"\
- "movq %%mm4, %%mm3 \n\t"\
- "punpcklwd %%mm2, %%mm2 \n\t"\
- "punpcklwd %%mm5, %%mm5 \n\t"\
- "punpcklwd %%mm4, %%mm4 \n\t"\
- "paddw %%mm1, %%mm2 \n\t"\
- "paddw %%mm1, %%mm5 \n\t"\
- "paddw %%mm1, %%mm4 \n\t"\
- "punpckhwd %%mm0, %%mm0 \n\t"\
- "punpckhwd %%mm6, %%mm6 \n\t"\
- "punpckhwd %%mm3, %%mm3 \n\t"\
- "paddw %%mm7, %%mm0 \n\t"\
- "paddw %%mm7, %%mm6 \n\t"\
- "paddw %%mm7, %%mm3 \n\t"\
- /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
- "packuswb %%mm0, %%mm2 \n\t"\
- "packuswb %%mm6, %%mm5 \n\t"\
- "packuswb %%mm3, %%mm4 \n\t"\
- "pxor %%mm7, %%mm7 \n\t"
-
-#define YSCALEYUV2RGB1 \
- "xorl %%eax, %%eax \n\t"\
- ".balign 16 \n\t"\
- "1: \n\t"\
- "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\
- "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
- "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
- "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
- "psubw w400, %%mm3 \n\t" /* (U-128)8*/\
- "psubw w400, %%mm4 \n\t" /* (V-128)8*/\
- "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
- "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
- "pmulhw ugCoeff, %%mm3 \n\t"\
- "pmulhw vgCoeff, %%mm4 \n\t"\
- /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
- "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
- "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
- "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
- "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
- "pmulhw ubCoeff, %%mm2 \n\t"\
- "pmulhw vrCoeff, %%mm5 \n\t"\
- "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
- "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
- "pmulhw yCoeff, %%mm1 \n\t"\
- "pmulhw yCoeff, %%mm7 \n\t"\
- /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
- "paddw %%mm3, %%mm4 \n\t"\
- "movq %%mm2, %%mm0 \n\t"\
- "movq %%mm5, %%mm6 \n\t"\
- "movq %%mm4, %%mm3 \n\t"\
- "punpcklwd %%mm2, %%mm2 \n\t"\
- "punpcklwd %%mm5, %%mm5 \n\t"\
- "punpcklwd %%mm4, %%mm4 \n\t"\
- "paddw %%mm1, %%mm2 \n\t"\
- "paddw %%mm1, %%mm5 \n\t"\
- "paddw %%mm1, %%mm4 \n\t"\
- "punpckhwd %%mm0, %%mm0 \n\t"\
- "punpckhwd %%mm6, %%mm6 \n\t"\
- "punpckhwd %%mm3, %%mm3 \n\t"\
- "paddw %%mm7, %%mm0 \n\t"\
- "paddw %%mm7, %%mm6 \n\t"\
- "paddw %%mm7, %%mm3 \n\t"\
- /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
- "packuswb %%mm0, %%mm2 \n\t"\
- "packuswb %%mm6, %%mm5 \n\t"\
- "packuswb %%mm3, %%mm4 \n\t"\
- "pxor %%mm7, %%mm7 \n\t"
-
-// do vertical chrominance interpolation
-#define YSCALEYUV2RGB1b \
- "xorl %%eax, %%eax \n\t"\
- ".balign 16 \n\t"\
- "1: \n\t"\
- "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\
- "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\
- "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
- "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
- "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
- "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
- "psrlw $5, %%mm3 \n\t"\
- "psrlw $5, %%mm4 \n\t"\
- "psubw w400, %%mm3 \n\t" /* (U-128)8*/\
- "psubw w400, %%mm4 \n\t" /* (V-128)8*/\
- "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
- "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
- "pmulhw ugCoeff, %%mm3 \n\t"\
- "pmulhw vgCoeff, %%mm4 \n\t"\
- /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
- "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
- "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
- "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
- "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
- "pmulhw ubCoeff, %%mm2 \n\t"\
- "pmulhw vrCoeff, %%mm5 \n\t"\
- "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
- "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
- "pmulhw yCoeff, %%mm1 \n\t"\
- "pmulhw yCoeff, %%mm7 \n\t"\
- /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
- "paddw %%mm3, %%mm4 \n\t"\
- "movq %%mm2, %%mm0 \n\t"\
- "movq %%mm5, %%mm6 \n\t"\
- "movq %%mm4, %%mm3 \n\t"\
- "punpcklwd %%mm2, %%mm2 \n\t"\
- "punpcklwd %%mm5, %%mm5 \n\t"\
- "punpcklwd %%mm4, %%mm4 \n\t"\
- "paddw %%mm1, %%mm2 \n\t"\
- "paddw %%mm1, %%mm5 \n\t"\
- "paddw %%mm1, %%mm4 \n\t"\
- "punpckhwd %%mm0, %%mm0 \n\t"\
- "punpckhwd %%mm6, %%mm6 \n\t"\
- "punpckhwd %%mm3, %%mm3 \n\t"\
- "paddw %%mm7, %%mm0 \n\t"\
- "paddw %%mm7, %%mm6 \n\t"\
- "paddw %%mm7, %%mm3 \n\t"\
- /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
- "packuswb %%mm0, %%mm2 \n\t"\
- "packuswb %%mm6, %%mm5 \n\t"\
- "packuswb %%mm3, %%mm4 \n\t"\
- "pxor %%mm7, %%mm7 \n\t"
-
-#define WRITEBGR32 \
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
- "movq %%mm2, %%mm1 \n\t" /* B */\
- "movq %%mm5, %%mm6 \n\t" /* R */\
- "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
- "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
- "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
- "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
- "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
- "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
- "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
- "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
- "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
- "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
-\
- MOVNTQ(%%mm0, (%4, %%eax, 4))\
- MOVNTQ(%%mm2, 8(%4, %%eax, 4))\
- MOVNTQ(%%mm1, 16(%4, %%eax, 4))\
- MOVNTQ(%%mm3, 24(%4, %%eax, 4))\
-\
- "addl $8, %%eax \n\t"\
- "cmpl %5, %%eax \n\t"\
- " jb 1b \n\t"
-
-#define WRITEBGR16 \
- "pand bF8, %%mm2 \n\t" /* B */\
- "pand bFC, %%mm4 \n\t" /* G */\
- "pand bF8, %%mm5 \n\t" /* R */\
- "psrlq $3, %%mm2 \n\t"\
-\
- "movq %%mm2, %%mm1 \n\t"\
- "movq %%mm4, %%mm3 \n\t"\
-\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm5, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm4 \n\t"\
- "punpckhbw %%mm5, %%mm1 \n\t"\
-\
- "psllq $3, %%mm3 \n\t"\
- "psllq $3, %%mm4 \n\t"\
-\
- "por %%mm3, %%mm2 \n\t"\
- "por %%mm4, %%mm1 \n\t"\
-\
- MOVNTQ(%%mm2, (%4, %%eax, 2))\
- MOVNTQ(%%mm1, 8(%4, %%eax, 2))\
-\
- "addl $8, %%eax \n\t"\
- "cmpl %5, %%eax \n\t"\
- " jb 1b \n\t"
-
-#define WRITEBGR15 \
- "pand bF8, %%mm2 \n\t" /* B */\
- "pand bF8, %%mm4 \n\t" /* G */\
- "pand bF8, %%mm5 \n\t" /* R */\
- "psrlq $3, %%mm2 \n\t"\
- "psrlq $1, %%mm5 \n\t"\
-\
- "movq %%mm2, %%mm1 \n\t"\
- "movq %%mm4, %%mm3 \n\t"\
-\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm5, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm4 \n\t"\
- "punpckhbw %%mm5, %%mm1 \n\t"\
-\
- "psllq $2, %%mm3 \n\t"\
- "psllq $2, %%mm4 \n\t"\
-\
- "por %%mm3, %%mm2 \n\t"\
- "por %%mm4, %%mm1 \n\t"\
-\
- MOVNTQ(%%mm2, (%4, %%eax, 2))\
- MOVNTQ(%%mm1, 8(%4, %%eax, 2))\
-\
- "addl $8, %%eax \n\t"\
- "cmpl %5, %%eax \n\t"\
- " jb 1b \n\t"
-
-#define WRITEBGR24OLD \
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
- "movq %%mm2, %%mm1 \n\t" /* B */\
- "movq %%mm5, %%mm6 \n\t" /* R */\
- "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
- "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
- "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
- "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
- "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
- "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
- "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
- "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
- "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
- "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
-\
- "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
- "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
- "pand bm00000111, %%mm4 \n\t" /* 00000RGB 0 */\
- "pand bm11111000, %%mm0 \n\t" /* 00RGB000 0.5 */\
- "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
- "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
- "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
- "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
-\
- "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
- "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
- "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
- "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
- "pand bm00001111, %%mm2 \n\t" /* 0000RGBR 1 */\
- "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
- "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
- "pand bm00000111, %%mm4 \n\t" /* 00000RGB 2 */\
- "pand bm11111000, %%mm1 \n\t" /* 00RGB000 2.5 */\
- "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
- "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
- "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
- "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
-\
- "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
- "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
- "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
- "pand bm00000111, %%mm5 \n\t" /* 00000RGB 3 */\
- "pand bm11111000, %%mm3 \n\t" /* 00RGB000 3.5 */\
- "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
- "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
- "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
-\
- MOVNTQ(%%mm0, (%%ebx))\
- MOVNTQ(%%mm2, 8(%%ebx))\
- MOVNTQ(%%mm3, 16(%%ebx))\
- "addl $24, %%ebx \n\t"\
-\
- "addl $8, %%eax \n\t"\
- "cmpl %5, %%eax \n\t"\
- " jb 1b \n\t"
-
-#define WRITEBGR24MMX \
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
- "movq %%mm2, %%mm1 \n\t" /* B */\
- "movq %%mm5, %%mm6 \n\t" /* R */\
- "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
- "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
- "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
- "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
- "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
- "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
- "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
- "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
- "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
- "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
-\
- "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
- "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
- "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
- "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
-\
- "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
- "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
- "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
- "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
-\
- "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
- "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
- "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
- "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
-\
- "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
- "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
- "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
- "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
- MOVNTQ(%%mm0, (%%ebx))\
-\
- "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
- "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
- "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
- "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
- MOVNTQ(%%mm6, 8(%%ebx))\
-\
- "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
- "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
- "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
- MOVNTQ(%%mm5, 16(%%ebx))\
-\
- "addl $24, %%ebx \n\t"\
-\
- "addl $8, %%eax \n\t"\
- "cmpl %5, %%eax \n\t"\
- " jb 1b \n\t"
-
-#define WRITEBGR24MMX2 \
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
- "movq M24A, %%mm0 \n\t"\
- "movq M24C, %%mm7 \n\t"\
- "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
- "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
- "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
-\
- "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
- "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
- "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
-\
- "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
- "por %%mm1, %%mm6 \n\t"\
- "por %%mm3, %%mm6 \n\t"\
- MOVNTQ(%%mm6, (%%ebx))\
-\
- "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
- "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
- "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
- "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
-\
- "pand M24B, %%mm1 \n\t" /* B5 B4 B3 */\
- "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
- "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
-\
- "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
- "por %%mm3, %%mm6 \n\t"\
- MOVNTQ(%%mm6, 8(%%ebx))\
-\
- "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
- "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
- "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
-\
- "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
- "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
- "pand M24B, %%mm6 \n\t" /* R7 R6 R5 */\
-\
- "por %%mm1, %%mm3 \n\t"\
- "por %%mm3, %%mm6 \n\t"\
- MOVNTQ(%%mm6, 16(%%ebx))\
-\
- "addl $24, %%ebx \n\t"\
-\
- "addl $8, %%eax \n\t"\
- "cmpl %5, %%eax \n\t"\
- " jb 1b \n\t"
-
-#ifdef HAVE_MMX2
-#define WRITEBGR24 WRITEBGR24MMX2
-#else
-#define WRITEBGR24 WRITEBGR24MMX
-#endif
-
-#ifdef HAVE_MMX
+#ifdef CAN_COMPILE_X86_ASM
void in_asm_used_var_warning_killer()
{
int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
@@ -613,888 +135,57 @@ void in_asm_used_var_warning_killer()
}
#endif
-static inline void yuv2yuv(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
- uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstw, int yalpha, int uvalpha)
-{
- int yalpha1=yalpha^4095;
- int uvalpha1=uvalpha^4095;
- int i;
-
- asm volatile ("\n\t"::: "memory");
-
- for(i=0;i<dstw;i++)
- {
- ((uint8_t*)dest)[i] = (buf0[i]*yalpha1+buf1[i]*yalpha)>>19;
- }
-
- if(uvalpha != -1)
- {
- for(i=0; i<(dstw>>1); i++)
- {
- ((uint8_t*)uDest)[i] = (uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19;
- ((uint8_t*)vDest)[i] = (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;
- }
- }
-}
-
-/**
- * vertical scale YV12 to RGB
- */
-static inline void yuv2rgbX(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
- uint8_t *dest, int dstw, int yalpha, int uvalpha, int dstbpp)
-{
- int yalpha1=yalpha^4095;
- int uvalpha1=uvalpha^4095;
-
- if(fullUVIpol)
- {
-
-#ifdef HAVE_MMX
- if(dstbpp == 32)
- {
- asm volatile(
-
-
-FULL_YSCALEYUV2RGB
- "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
- "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
-
- "movq %%mm3, %%mm1 \n\t"
- "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
- "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
-
- MOVNTQ(%%mm3, (%4, %%eax, 4))
- MOVNTQ(%%mm1, 8(%4, %%eax, 4))
-
- "addl $4, %%eax \n\t"
- "cmpl %5, %%eax \n\t"
- " jb 1b \n\t"
-
-
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax"
- );
- }
- else if(dstbpp==24)
- {
- asm volatile(
-
-FULL_YSCALEYUV2RGB
-
- // lsb ... msb
- "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
- "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
-
- "movq %%mm3, %%mm1 \n\t"
- "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
- "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
-
- "movq %%mm3, %%mm2 \n\t" // BGR0BGR0
- "psrlq $8, %%mm3 \n\t" // GR0BGR00
- "pand bm00000111, %%mm2 \n\t" // BGR00000
- "pand bm11111000, %%mm3 \n\t" // 000BGR00
- "por %%mm2, %%mm3 \n\t" // BGRBGR00
- "movq %%mm1, %%mm2 \n\t"
- "psllq $48, %%mm1 \n\t" // 000000BG
- "por %%mm1, %%mm3 \n\t" // BGRBGRBG
-
- "movq %%mm2, %%mm1 \n\t" // BGR0BGR0
- "psrld $16, %%mm2 \n\t" // R000R000
- "psrlq $24, %%mm1 \n\t" // 0BGR0000
- "por %%mm2, %%mm1 \n\t" // RBGRR000
-
- "movl %4, %%ebx \n\t"
- "addl %%eax, %%ebx \n\t"
-
-#ifdef HAVE_MMX2
- //FIXME Alignment
- "movntq %%mm3, (%%ebx, %%eax, 2)\n\t"
- "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t"
-#else
- "movd %%mm3, (%%ebx, %%eax, 2) \n\t"
- "psrlq $32, %%mm3 \n\t"
- "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t"
- "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t"
-#endif
- "addl $4, %%eax \n\t"
- "cmpl %5, %%eax \n\t"
- " jb 1b \n\t"
-
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax", "%ebx"
- );
- }
- else if(dstbpp==15)
- {
- asm volatile(
-
-FULL_YSCALEYUV2RGB
-#ifdef DITHER1XBPP
- "paddusb g5Dither, %%mm1 \n\t"
- "paddusb r5Dither, %%mm0 \n\t"
- "paddusb b5Dither, %%mm3 \n\t"
-#endif
- "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
- "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
- "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
-
- "psrlw $3, %%mm3 \n\t"
- "psllw $2, %%mm1 \n\t"
- "psllw $7, %%mm0 \n\t"
- "pand g15Mask, %%mm1 \n\t"
- "pand r15Mask, %%mm0 \n\t"
-
- "por %%mm3, %%mm1 \n\t"
- "por %%mm1, %%mm0 \n\t"
-
- MOVNTQ(%%mm0, (%4, %%eax, 2))
-
- "addl $4, %%eax \n\t"
- "cmpl %5, %%eax \n\t"
- " jb 1b \n\t"
-
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax"
- );
- }
- else if(dstbpp==16)
- {
- asm volatile(
-
-FULL_YSCALEYUV2RGB
-#ifdef DITHER1XBPP
- "paddusb g6Dither, %%mm1 \n\t"
- "paddusb r5Dither, %%mm0 \n\t"
- "paddusb b5Dither, %%mm3 \n\t"
-#endif
- "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
- "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
- "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
-
- "psrlw $3, %%mm3 \n\t"
- "psllw $3, %%mm1 \n\t"
- "psllw $8, %%mm0 \n\t"
- "pand g16Mask, %%mm1 \n\t"
- "pand r16Mask, %%mm0 \n\t"
-
- "por %%mm3, %%mm1 \n\t"
- "por %%mm1, %%mm0 \n\t"
-
- MOVNTQ(%%mm0, (%4, %%eax, 2))
-
- "addl $4, %%eax \n\t"
- "cmpl %5, %%eax \n\t"
- " jb 1b \n\t"
-
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax"
- );
- }
-#else
- asm volatile ("\n\t"::: "memory");
-
- if(dstbpp==32 || dstbpp==24)
- {
- int i;
- for(i=0;i<dstw;i++){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
- int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
- dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
- dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
- dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
- dest+=dstbpp>>3;
- }
- }
- else if(dstbpp==16)
- {
- int i;
- for(i=0;i<dstw;i++){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
- int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
-
- ((uint16_t*)dest)[i] =
- clip_table16b[(Y + yuvtab_40cf[U]) >>13] |
- clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
- clip_table16r[(Y + yuvtab_3343[V]) >>13];
- }
- }
- else if(dstbpp==15)
- {
- int i;
- for(i=0;i<dstw;i++){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
- int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
-
- ((uint16_t*)dest)[i] =
- clip_table15b[(Y + yuvtab_40cf[U]) >>13] |
- clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
- clip_table15r[(Y + yuvtab_3343[V]) >>13];
- }
- }
-#endif
- }//FULL_UV_IPOL
- else
- {
-#ifdef HAVE_MMX
- if(dstbpp == 32)
- {
- asm volatile(
- YSCALEYUV2RGB
- WRITEBGR32
-
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax"
- );
- }
- else if(dstbpp==24)
- {
- asm volatile(
- "movl %4, %%ebx \n\t"
- YSCALEYUV2RGB
- WRITEBGR24
-
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax", "%ebx"
- );
- }
- else if(dstbpp==15)
- {
- asm volatile(
- YSCALEYUV2RGB
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
- "paddusb b5Dither, %%mm2 \n\t"
- "paddusb g5Dither, %%mm4 \n\t"
- "paddusb r5Dither, %%mm5 \n\t"
-#endif
-
- WRITEBGR15
-
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax"
- );
- }
- else if(dstbpp==16)
- {
- asm volatile(
- YSCALEYUV2RGB
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
- "paddusb b5Dither, %%mm2 \n\t"
- "paddusb g6Dither, %%mm4 \n\t"
- "paddusb r5Dither, %%mm5 \n\t"
-#endif
-
- WRITEBGR16
-
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax"
- );
- }
-#else
- asm volatile ("\n\t"::: "memory");
-
- if(dstbpp==32)
- {
- int i;
- for(i=0; i<dstw-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- dest[4*i+0]=clip_table[((Y1 + Cb) >>13)];
- dest[4*i+1]=clip_table[((Y1 + Cg) >>13)];
- dest[4*i+2]=clip_table[((Y1 + Cr) >>13)];
-
- dest[4*i+4]=clip_table[((Y2 + Cb) >>13)];
- dest[4*i+5]=clip_table[((Y2 + Cg) >>13)];
- dest[4*i+6]=clip_table[((Y2 + Cr) >>13)];
- }
- }
- if(dstbpp==24)
- {
- int i;
- for(i=0; i<dstw-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- dest[0]=clip_table[((Y1 + Cb) >>13)];
- dest[1]=clip_table[((Y1 + Cg) >>13)];
- dest[2]=clip_table[((Y1 + Cr) >>13)];
-
- dest[3]=clip_table[((Y2 + Cb) >>13)];
- dest[4]=clip_table[((Y2 + Cg) >>13)];
- dest[5]=clip_table[((Y2 + Cr) >>13)];
- dest+=6;
- }
- }
- else if(dstbpp==16)
- {
- int i;
- for(i=0; i<dstw-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- ((uint16_t*)dest)[i] =
- clip_table16b[(Y1 + Cb) >>13] |
- clip_table16g[(Y1 + Cg) >>13] |
- clip_table16r[(Y1 + Cr) >>13];
-
- ((uint16_t*)dest)[i+1] =
- clip_table16b[(Y2 + Cb) >>13] |
- clip_table16g[(Y2 + Cg) >>13] |
- clip_table16r[(Y2 + Cr) >>13];
- }
- }
- else if(dstbpp==15)
- {
- int i;
- for(i=0; i<dstw-1; i+=2){
- // vertical linear interpolation && yuv2rgb in a single step:
- int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
- int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
- int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19);
- int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19);
-
- int Cb= yuvtab_40cf[U];
- int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
- int Cr= yuvtab_3343[V];
-
- ((uint16_t*)dest)[i] =
- clip_table15b[(Y1 + Cb) >>13] |
- clip_table15g[(Y1 + Cg) >>13] |
- clip_table15r[(Y1 + Cr) >>13];
-
- ((uint16_t*)dest)[i+1] =
- clip_table15b[(Y2 + Cb) >>13] |
- clip_table15g[(Y2 + Cg) >>13] |
- clip_table15r[(Y2 + Cr) >>13];
- }
- }
-#endif
- } //!FULL_UV_IPOL
-}
-
-/**
- * YV12 to RGB without scaling or interpolating
- */
-static inline void yuv2rgb1(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
- uint8_t *dest, int dstw, int yalpha, int uvalpha, int dstbpp)
-{
- int uvalpha1=uvalpha^4095;
-#ifdef HAVE_MMX
- int yalpha1=yalpha^4095;
-#endif
-
- if(fullUVIpol || allwaysIpol)
- {
- yuv2rgbX(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp);
- return;
- }
- if( yalpha > 2048 ) buf0 = buf1;
-
-#ifdef HAVE_MMX
- if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
- {
- if(dstbpp == 32)
- {
- asm volatile(
- YSCALEYUV2RGB1
- WRITEBGR32
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax"
- );
- }
- else if(dstbpp==24)
- {
- asm volatile(
- "movl %4, %%ebx \n\t"
- YSCALEYUV2RGB1
- WRITEBGR24
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax", "%ebx"
- );
- }
- else if(dstbpp==15)
- {
- asm volatile(
- YSCALEYUV2RGB1
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
- "paddusb b5Dither, %%mm2 \n\t"
- "paddusb g5Dither, %%mm4 \n\t"
- "paddusb r5Dither, %%mm5 \n\t"
-#endif
- WRITEBGR15
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax"
- );
- }
- else if(dstbpp==16)
- {
- asm volatile(
- YSCALEYUV2RGB1
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
- "paddusb b5Dither, %%mm2 \n\t"
- "paddusb g6Dither, %%mm4 \n\t"
- "paddusb r5Dither, %%mm5 \n\t"
-#endif
-
- WRITEBGR16
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax"
- );
- }
- }
- else
- {
- if(dstbpp == 32)
- {
- asm volatile(
- YSCALEYUV2RGB1b
- WRITEBGR32
- :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
- "m" (yalpha1), "m" (uvalpha1)
- : "%eax"
- );
- }
- else if(dstbpp==24)
- {
- asm volatile(
- "movl %4, %%ebx \n\t"