diff options
Diffstat (limited to 'libswscale/swscale_template.c')
-rw-r--r-- | libswscale/swscale_template.c | 868 |
1 files changed, 467 insertions, 401 deletions
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c index 6048bfc90b..74f4c57285 100644 --- a/libswscale/swscale_template.c +++ b/libswscale/swscale_template.c @@ -950,22 +950,25 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t * uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW) { #ifdef HAVE_MMX - if (c->flags & SWS_ACCURATE_RND){ - if (uDest){ - YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) - YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) - } + if(!(c->flags & SWS_BITEXACT)){ + if (c->flags & SWS_ACCURATE_RND){ + if (uDest){ + YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) + YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) + } - YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW) - }else{ - if (uDest){ - YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) - YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) - } + YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW) + }else{ + if (uDest){ + YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) + YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) + } - YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW) + YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW) + } + return; } -#else +#endif #ifdef HAVE_ALTIVEC yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize, chrFilter, chrSrc, chrFilterSize, @@ -975,7 +978,6 @@ yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, chrFilter, chrSrc, chrFilterSize, dest, uDest, vDest, dstW, chrDstW); #endif //!HAVE_ALTIVEC -#endif /* HAVE_MMX */ } static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, @@ -990,34 +992,36 @@ yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize, static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW) { + int i; #ifdef HAVE_MMX - long p= uDest ? 3 : 1; - uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW}; - uint8_t *dst[3]= {dest, uDest, vDest}; - long counter[3] = {dstW, chrDstW, chrDstW}; - - if (c->flags & SWS_ACCURATE_RND){ - while(p--){ - asm volatile( - YSCALEYUV2YV121_ACCURATE - :: "r" (src[p]), "r" (dst[p] + counter[p]), - "g" (-counter[p]) - : "%"REG_a - ); - } - }else{ - while(p--){ - asm volatile( - YSCALEYUV2YV121 - :: "r" (src[p]), "r" (dst[p] + counter[p]), - "g" (-counter[p]) - : "%"REG_a - ); + if(!(c->flags & SWS_BITEXACT)){ + long p= uDest ? 3 : 1; + uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW}; + uint8_t *dst[3]= {dest, uDest, vDest}; + long counter[3] = {dstW, chrDstW, chrDstW}; + + if (c->flags & SWS_ACCURATE_RND){ + while(p--){ + asm volatile( + YSCALEYUV2YV121_ACCURATE + :: "r" (src[p]), "r" (dst[p] + counter[p]), + "g" (-counter[p]) + : "%"REG_a + ); + } + }else{ + while(p--){ + asm volatile( + YSCALEYUV2YV121 + :: "r" (src[p]), "r" (dst[p] + counter[p]), + "g" (-counter[p]) + : "%"REG_a + ); + } } + return; } - -#else - int i; +#endif for (i=0; i<dstW; i++) { int val= (lumSrc[i]+64)>>7; @@ -1046,7 +1050,6 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chr uDest[i]= u; vDest[i]= v; } -#endif } @@ -1059,126 +1062,128 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ { #ifdef HAVE_MMX long dummy=0; - if (c->flags & SWS_ACCURATE_RND){ - switch(c->dstFormat){ - case PIX_FMT_RGB32: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - WRITEBGR32(%4, %5, %%REGa) + if(!(c->flags & SWS_BITEXACT)){ + if (c->flags & SWS_ACCURATE_RND){ + switch(c->dstFormat){ + case PIX_FMT_RGB32: + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + WRITEBGR32(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_BGR24: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize - "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_BGR24: + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize + "add %4, %%"REG_c" \n\t" + WRITEBGR24(%%REGc, %5, %%REGa) - :: "r" (&c->redDither), - "m" (dummy), "m" (dummy), "m" (dummy), - "r" (dest), "m" (dstW) - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S - ); - return; - case PIX_FMT_RGB555: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + :: "r" (&c->redDither), + "m" (dummy), "m" (dummy), "m" (dummy), + "r" (dest), "m" (dstW) + : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S + ); + return; + case PIX_FMT_RGB555: + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2\n\t" - "paddusb "MANGLE(g5Dither)", %%mm4\n\t" - "paddusb "MANGLE(r5Dither)", %%mm5\n\t" + "paddusb "MANGLE(b5Dither)", %%mm2\n\t" + "paddusb "MANGLE(g5Dither)", %%mm4\n\t" + "paddusb "MANGLE(r5Dither)", %%mm5\n\t" #endif - WRITERGB15(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_RGB565: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + WRITERGB15(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_RGB565: + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2\n\t" - "paddusb "MANGLE(g6Dither)", %%mm4\n\t" - "paddusb "MANGLE(r5Dither)", %%mm5\n\t" + "paddusb "MANGLE(b5Dither)", %%mm2\n\t" + "paddusb "MANGLE(g6Dither)", %%mm4\n\t" + "paddusb "MANGLE(r5Dither)", %%mm5\n\t" #endif - WRITERGB16(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_YUYV422: - YSCALEYUV2PACKEDX_ACCURATE - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ - - "psraw $3, %%mm3 \n\t" - "psraw $3, %%mm4 \n\t" - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - WRITEYUY2(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - } - }else{ - switch(c->dstFormat) - { - case PIX_FMT_RGB32: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - WRITEBGR32(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_BGR24: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize - "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, %5, %%REGa) - - :: "r" (&c->redDither), - "m" (dummy), "m" (dummy), "m" (dummy), - "r" (dest), "m" (dstW) - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S - ); - return; - case PIX_FMT_RGB555: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + WRITERGB16(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_YUYV422: + YSCALEYUV2PACKEDX_ACCURATE + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + + "psraw $3, %%mm3 \n\t" + "psraw $3, %%mm4 \n\t" + "psraw $3, %%mm1 \n\t" + "psraw $3, %%mm7 \n\t" + WRITEYUY2(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + } + }else{ + switch(c->dstFormat) + { + case PIX_FMT_RGB32: + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + WRITEBGR32(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_BGR24: + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize + "add %4, %%"REG_c" \n\t" + WRITEBGR24(%%REGc, %5, %%REGa) + + :: "r" (&c->redDither), + "m" (dummy), "m" (dummy), "m" (dummy), + "r" (dest), "m" (dstW) + : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S + ); + return; + case PIX_FMT_RGB555: + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" + "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" + "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" #endif - WRITERGB15(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_RGB565: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + WRITERGB15(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_RGB565: + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" + "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" + "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" #endif - WRITERGB16(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_YUYV422: - YSCALEYUV2PACKEDX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ - - "psraw $3, %%mm3 \n\t" - "psraw $3, %%mm4 \n\t" - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - WRITEYUY2(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; + WRITERGB16(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_YUYV422: + YSCALEYUV2PACKEDX + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + + "psraw $3, %%mm3 \n\t" + "psraw $3, %%mm4 \n\t" + "psraw $3, %%mm1 \n\t" + "psraw $3, %%mm7 \n\t" + WRITEYUY2(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + } } } #endif /* HAVE_MMX */ @@ -1204,8 +1209,8 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) { - int yalpha1=yalpha^4095; - int uvalpha1=uvalpha^4095; + int yalpha1=4095- yalpha; + int uvalpha1=4095-uvalpha; int i; #if 0 //isn't used @@ -1424,6 +1429,7 @@ FULL_YSCALEYUV2RGB { #endif // if 0 #ifdef HAVE_MMX + if(!(c->flags & SWS_BITEXACT)){ switch(c->dstFormat) { //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( @@ -1510,8 +1516,9 @@ FULL_YSCALEYUV2RGB return; default: break; } + } #endif //HAVE_MMX -YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C, YSCALE_YUV_2_GRAY16_2_C) +YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C, YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C) } /** @@ -1533,196 +1540,198 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t * } #ifdef HAVE_MMX - if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster - { - switch(dstFormat) + if(!(flags & SWS_BITEXACT)){ + if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster { - case PIX_FMT_RGB32: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - WRITEBGR32(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_BGR24: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_RGB555: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + switch(dstFormat) + { + case PIX_FMT_RGB32: + asm volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + WRITEBGR32(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_BGR24: + asm volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_RGB555: + asm volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" + "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" + "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" #endif - WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB15(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_RGB565: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_RGB565: + asm volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" + "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" + "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" #endif - WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB16(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_YUYV422: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_YUYV422: + asm volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2PACKED1(%%REGBP, %5) + WRITEYUY2(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + } } - } - else - { - switch(dstFormat) + else { - case PIX_FMT_RGB32: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - WRITEBGR32(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_BGR24: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_RGB555: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + switch(dstFormat) + { + case PIX_FMT_RGB32: + asm volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + WRITEBGR32(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_BGR24: + asm volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_RGB555: + asm volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" + "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" + "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" #endif - WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB15(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_RGB565: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_RGB565: + asm volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" + "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" + "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" #endif - WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB16(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_YUYV422: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1b(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_YUYV422: + asm volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2PACKED1b(%%REGBP, %5) + WRITEYUY2(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + } } } #endif /* HAVE_MMX */ if (uvalpha < 2048) { - YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C, YSCALE_YUV_2_GRAY16_1_C) + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) }else{ - YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C, YSCALE_YUV_2_GRAY16_1_C) + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) } } //FIXME yuy2* can read up to 7 samples too much -static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width) +static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) { #ifdef HAVE_MMX asm volatile( @@ -1747,7 +1756,7 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width) #endif } -static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) +static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) { #ifdef HAVE_MMX asm volatile( @@ -1784,7 +1793,7 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, /* This is almost identical to the previous, end exists only because * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ -static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width) +static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) { #ifdef HAVE_MMX asm volatile( @@ -1808,7 +1817,7 @@ static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width) #endif } -static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) +static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) { #ifdef HAVE_MMX asm volatile( @@ -1844,7 +1853,7 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, } #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\ -static inline void RENAME(name)(uint8_t *dst, uint8_t *src, long width)\ +static inline void RENAME(name)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)\ {\ int i;\ for (i=0; i<width; i++)\ @@ -1865,7 +1874,7 @@ BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7) #define BGR2UV(type, name, shr, shg, shb, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\ -static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width)\ +static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\ {\ int i;\ for (i=0; i<width; i++)\ @@ -1878,7 +1887,7 @@ static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\ }\ }\ -static inline void RENAME(name ## _half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width)\ +static inline void RENAME(name ## _half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\ {\ int i;\ for (i=0; i<width; i++)\ @@ -1904,7 +1913,7 @@ BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<< BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7) #ifdef HAVE_MMX -static inline void bgr24ToY_mmx(uint8_t *dst, uint8_t *src, long width, int srcFormat) +static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, long width, int srcFormat) { if(srcFormat == PIX_FMT_BGR24){ @@ -1957,7 +1966,7 @@ static inline void bgr24ToY_mmx(uint8_t *dst, uint8_t *src, long width, int srcF ); } -static inline void bgr24ToUV_mmx(uint8_t *dstU, uint8_t *dstV, uint8_t *src, long width, int srcFormat) +static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, long width, int srcFormat) { asm volatile( "movq 24+%4, %%mm6 \n\t" @@ -2016,10 +2025,10 @@ static inline void bgr24ToUV_mmx(uint8_t *dstU, uint8_t *dstV, uint8_t *src, lon } #endif -static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width) +static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) { #ifdef HAVE_MMX - bgr24ToY_mmx(dst, src, width, PIX_FMT_BGR24); + RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24); #else int i; for (i=0; i<width; i++) @@ -2033,10 +2042,10 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width) #endif /* HAVE_MMX */ } -static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) +static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) { #ifdef HAVE_MMX - bgr24ToUV_mmx(dstU, dstV, src1, width, PIX_FMT_BGR24); + RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24); #else int i; for (i=0; i<width; i++) @@ -2052,7 +2061,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 assert(src1 == src2); } -static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) +static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) { int i; for (i=0; i<width; i++) @@ -2067,10 +2076,10 @@ static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t assert(src1 == src2); } -static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width) +static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) { #ifdef HAVE_MMX - bgr24ToY_mmx(dst, src, width, PIX_FMT_RGB24); + RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); #else int i; for (i=0; i<width; i++) @@ -2084,12 +2093,12 @@ static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width) #endif } -static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) +static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) { int i; assert(src1==src2); #ifdef HAVE_MMX - bgr24ToUV_mmx(dstU, dstV, src1, width, PIX_FMT_RGB24); + RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24); #else for (i=0; i<width; i++) { @@ -2103,15 +2112,15 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 #endif } -static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) +static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) { int i; assert(src1==src2); for (i=0; i<width; i++) { - int r= src1[6*i + 0] + src1[6*i + 0]; - int g= src1[6*i + 1] + src1[6*i + 1]; - int b= src1[6*i + 2] + src1[6*i + 2]; + int r= src1[6*i + 0] + src1[6*i + 3]; + int g= src1[6*i + 1] + src1[6*i + 4]; + int b= src1[6*i + 2] + src1[6*i + 5]; dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); @@ -2143,6 +2152,26 @@ static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, } } +static inline void RENAME(monowhite2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) +{ + int i, j; + for (i=0; i<width/8; i++){ + int d= ~src[i]; + for(j=0; j<8; j++) + dst[8*i+j]= ((d>>(7-j))&1)*255; + } +} + +static inline void RENAME(monoblack2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) +{ + int i, j; + for (i=0; i<width/8; i++){ + int d= src[i]; + for(j=0; j<8; j++) + dst[8*i+j]= ((d>>(7-j))&1)*255; + } +} + // bilinear / bicubic scaling static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xI |