diff options
Diffstat (limited to 'libswscale/yuv2rgb.c')
-rw-r--r-- | libswscale/yuv2rgb.c | 175 |
1 files changed, 104 insertions, 71 deletions
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c index 65af412c2c..d8438c0c66 100644 --- a/libswscale/yuv2rgb.c +++ b/libswscale/yuv2rgb.c @@ -3,7 +3,6 @@ * * Copyright (C) 2009 Konstantin Shishkov * - * MMX/MMX2 template stuff (needed for fast movntq support), * 1,4,8bpp support and context / deglobalize stuff * by Michael Niedermayer (michaelni@gmx.at) * @@ -33,38 +32,12 @@ #include "rgb2rgb.h" #include "swscale.h" #include "swscale_internal.h" - -#define DITHER1XBPP // only for MMX +#include "libavutil/x86_cpu.h" extern const uint8_t dither_8x8_32[8][8]; extern const uint8_t dither_8x8_73[8][8]; extern const uint8_t dither_8x8_220[8][8]; -#if HAVE_MMX && CONFIG_GPL - -/* hope these constant values are cache line aligned */ -DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL; -DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL; -DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL; - -//MMX versions -#undef RENAME -#undef HAVE_MMX2 -#undef HAVE_AMD3DNOW -#define HAVE_MMX2 0 -#define HAVE_AMD3DNOW 0 -#define RENAME(a) a ## _MMX -#include "yuv2rgb_template.c" - -//MMX2 versions -#undef RENAME -#undef HAVE_MMX2 -#define HAVE_MMX2 1 -#define RENAME(a) a ## _MMX2 -#include "yuv2rgb_template.c" - -#endif /* HAVE_MMX && CONFIG_GPL */ - const int32_t ff_yuv2rgb_coeffs[8][4] = { {117504, 138453, 13954, 34903}, /* no sequence_display_extension */ {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */ @@ -101,12 +74,18 @@ const int32_t ff_yuv2rgb_coeffs[8][4] = { Y = src[2*i+1]; \ dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y]; -#define YUV2RGBFUNC(func_name, dst_type) \ +#define PUTRGBA(dst,ysrc,asrc,i,o,s) \ + Y = ysrc[2*i+o]; \ + dst[2*i ] = r[Y] + g[Y] + b[Y] + (asrc[2*i ]<<s); \ + Y = ysrc[2*i+1-o]; \ + dst[2*i+1] = r[Y] + g[Y] + b[Y] + (asrc[2*i+1]<<s); + +#define YUV2RGBFUNC(func_name, dst_type, alpha) \ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \ int srcSliceH, uint8_t* dst[], int dstStride[]){\ int y;\ \ - if (c->srcFormat == PIX_FMT_YUV422P) {\ + if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {\ srcStride[1] *= 2;\ srcStride[2] *= 2;\ }\ @@ -119,7 +98,12 @@ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSlic uint8_t *py_2 = py_1 + srcStride[0];\ uint8_t *pu = src[1] + (y>>1)*srcStride[1];\ uint8_t *pv = src[2] + (y>>1)*srcStride[2];\ + uint8_t av_unused *pa_1, *pa_2;\ unsigned int h_size = c->dstW>>3;\ + if (alpha){\ + pa_1 = src[3] + y*srcStride[3];\ + pa_2 = pa_1 + srcStride[3];\ + }\ while (h_size--) {\ int av_unused U, V;\ int Y;\ @@ -145,7 +129,7 @@ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSlic ENDYUV2RGBLINE(dst_delta)\ ENDYUV2RGBFUNC() -YUV2RGBFUNC(yuv2rgb_c_32, uint32_t) +YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0) LOADCHROMA(0); PUTRGB(dst_1,py_1,0,0); PUTRGB(dst_2,py_2,0,1); @@ -174,7 +158,69 @@ ENDYUV2RGBLINE(8) PUTRGB(dst_1,py_1,1,0); ENDYUV2RGBFUNC() -YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t) +YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1) + LOADCHROMA(0); + PUTRGBA(dst_1,py_1,pa_1,0,0,24); + PUTRGBA(dst_2,py_2,pa_2,0,1,24); + + LOADCHROMA(1); + PUTRGBA(dst_2,py_2,pa_1,1,1,24); + PUTRGBA(dst_1,py_1,pa_2,1,0,24); + LOADCHROMA(1); + PUTRGBA(dst_2,py_2,pa_1,1,1,24); + PUTRGBA(dst_1,py_1,pa_2,1,0,24); + + LOADCHROMA(2); + PUTRGBA(dst_1,py_1,pa_1,2,0,24); + PUTRGBA(dst_2,py_2,pa_2,2,1,24); + + LOADCHROMA(3); + PUTRGBA(dst_2,py_2,pa_1,3,1,24); + PUTRGBA(dst_1,py_1,pa_2,3,0,24); + pa_1 += 8;\ + pa_2 += 8;\ +ENDYUV2RGBLINE(8) + LOADCHROMA(0); + PUTRGBA(dst_1,py_1,pa_1,0,0,24); + PUTRGBA(dst_2,py_2,pa_2,0,1,24); + + LOADCHROMA(1); + PUTRGBA(dst_2,py_2,pa_1,1,1,24); + PUTRGBA(dst_1,py_1,pa_2,1,0,24); +ENDYUV2RGBFUNC() + +YUV2RGBFUNC(yuva2argb_c, uint32_t, 1) + LOADCHROMA(0); + PUTRGBA(dst_1,py_1,pa_1,0,0,0); + PUTRGBA(dst_2,py_2,pa_2,0,1,0); + + LOADCHROMA(1); + PUTRGBA(dst_2,py_2,pa_2,1,1,0); + PUTRGBA(dst_1,py_1,pa_1,1,0,0); + LOADCHROMA(1); + PUTRGBA(dst_2,py_2,pa_2,1,1,0); + PUTRGBA(dst_1,py_1,pa_1,1,0,0); + + LOADCHROMA(2); + PUTRGBA(dst_1,py_1,pa_1,2,0,0); + PUTRGBA(dst_2,py_2,pa_2,2,1,0); + + LOADCHROMA(3); + PUTRGBA(dst_2,py_2,pa_2,3,1,0); + PUTRGBA(dst_1,py_1,pa_1,3,0,0); + pa_1 += 8;\ + pa_2 += 8;\ +ENDYUV2RGBLINE(8) + LOADCHROMA(0); + PUTRGBA(dst_1,py_1,pa_1,0,0,0); + PUTRGBA(dst_2,py_2,pa_2,0,1,0); + + LOADCHROMA(1); + PUTRGBA(dst_2,py_2,pa_2,1,1,0); + PUTRGBA(dst_1,py_1,pa_1,1,0,0); +ENDYUV2RGBFUNC() + +YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0) LOADCHROMA(0); PUTRGB24(dst_1,py_1,0); PUTRGB24(dst_2,py_2,0); @@ -201,7 +247,7 @@ ENDYUV2RGBLINE(24) ENDYUV2RGBFUNC() // only trivial mods from yuv2rgb_c_24_rgb -YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t) +YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0) LOADCHROMA(0); PUTBGR24(dst_1,py_1,0); PUTBGR24(dst_2,py_2,0); @@ -229,7 +275,7 @@ ENDYUV2RGBFUNC() // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_16, uint16_t) +YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0) LOADCHROMA(0); PUTRGB(dst_1,py_1,0,0); PUTRGB(dst_2,py_2,0,1); @@ -249,7 +295,7 @@ CLOSEYUV2RGBFUNC(8) // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_8, uint8_t) +YUV2RGBFUNC(yuv2rgb_c_8, uint8_t, 0) LOADCHROMA(0); PUTRGB(dst_1,py_1,0,0); PUTRGB(dst_2,py_2,0,1); @@ -268,7 +314,7 @@ YUV2RGBFUNC(yuv2rgb_c_8, uint8_t) CLOSEYUV2RGBFUNC(8) // r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t) +YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0) const uint8_t *d32 = dither_8x8_32[y&7]; const uint8_t *d64 = dither_8x8_73[y&7]; #define PUTRGB8(dst,src,i,o) \ @@ -297,7 +343,7 @@ CLOSEYUV2RGBFUNC(8) // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_4, uint8_t) +YUV2RGBFUNC(yuv2rgb_c_4, uint8_t, 0) int acc; #define PUTRGB4(dst,src,i) \ Y = src[2*i]; \ @@ -323,7 +369,7 @@ YUV2RGBFUNC(yuv2rgb_c_4, uint8_t) PUTRGB4(dst_1,py_1,3); CLOSEYUV2RGBFUNC(4) -YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t) +YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0) const uint8_t *d64 = dither_8x8_73[y&7]; const uint8_t *d128 = dither_8x8_220[y&7]; int acc; @@ -354,7 +400,7 @@ CLOSEYUV2RGBFUNC(4) // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t) +YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t, 0) LOADCHROMA(0); PUTRGB(dst_1,py_1,0,0); PUTRGB(dst_2,py_2,0,1); @@ -372,7 +418,7 @@ YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t) PUTRGB(dst_1,py_1,3,0); CLOSEYUV2RGBFUNC(8) -YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t) +YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0) const uint8_t *d64 = dither_8x8_73[y&7]; const uint8_t *d128 = dither_8x8_220[y&7]; @@ -399,7 +445,7 @@ YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t) PUTRGB4DB(dst_1,py_1,3,6); CLOSEYUV2RGBFUNC(8) -YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t) +YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0) const uint8_t *d128 = dither_8x8_220[y&7]; char out_1 = 0, out_2 = 0; g= c->table_gU[128] + c->table_gV[128]; @@ -426,41 +472,26 @@ YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t) dst_2[0]= out_2; CLOSEYUV2RGBFUNC(1) -SwsFunc sws_yuv2rgb_get_func_ptr(SwsContext *c) +SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) { SwsFunc t = NULL; #if (HAVE_MMX2 || HAVE_MMX) && CONFIG_GPL - if (c->flags & SWS_CPU_CAPS_MMX2) { - switch (c->dstFormat) { - case PIX_FMT_RGB32: return yuv420_rgb32_MMX2; - case PIX_FMT_BGR24: return yuv420_rgb24_MMX2; - case PIX_FMT_RGB565: return yuv420_rgb16_MMX2; - case PIX_FMT_RGB555: return yuv420_rgb15_MMX2; - } - } - if (c->flags & SWS_CPU_CAPS_MMX) { - switch (c->dstFormat) { - case PIX_FMT_RGB32: return yuv420_rgb32_MMX; - case PIX_FMT_BGR24: return yuv420_rgb24_MMX; - case PIX_FMT_RGB565: return yuv420_rgb16_MMX; - case PIX_FMT_RGB555: return yuv420_rgb15_MMX; - } - } + t = ff_yuv2rgb_init_mmx(c); #endif #if HAVE_VIS - t = sws_yuv2rgb_init_vis(c); + t = ff_yuv2rgb_init_vis(c); #endif #if CONFIG_MLIB - t = sws_yuv2rgb_init_mlib(c); + t = ff_yuv2rgb_init_mlib(c); #endif #if HAVE_ALTIVEC && CONFIG_GPL if (c->flags & SWS_CPU_CAPS_ALTIVEC) - t = sws_yuv2rgb_init_altivec(c); + t = ff_yuv2rgb_init_altivec(c); #endif #if ARCH_BFIN if (c->flags & SWS_CPU_CAPS_BFIN) - t = sws_ff_bfin_yuv2rgb_get_func_ptr(c); + t = ff_yuv2rgb_get_func_ptr_bfin(c); #endif if (t) @@ -469,10 +500,10 @@ SwsFunc sws_yuv2rgb_get_func_ptr(SwsContext *c) av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n"); switch (c->dstFormat) { - case PIX_FMT_BGR32_1: - case PIX_FMT_RGB32_1: - case PIX_FMT_BGR32: - case PIX_FMT_RGB32: return yuv2rgb_c_32; + case PIX_FMT_ARGB: + case PIX_FMT_ABGR: if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) return yuva2argb_c; + case PIX_FMT_RGBA: + case PIX_FMT_BGRA: return (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) ? yuva2rgba_c : yuv2rgb_c_32; case PIX_FMT_RGB24: return yuv2rgb_c_24_rgb; case PIX_FMT_BGR24: return yuv2rgb_c_24_bgr; case PIX_FMT_RGB565: @@ -517,8 +548,8 @@ static void fill_gv_table(int table[256], const int elemsize, const int inc) } } -av_cold int sws_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange, - int brightness, int contrast, int saturation) +av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange, + int brightness, int contrast, int saturation) { const int isRgb = c->dstFormat==PIX_FMT_RGB32 || c->dstFormat==PIX_FMT_RGB32_1 @@ -533,7 +564,7 @@ av_cold int sws_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int uint8_t *y_table; uint16_t *y_table16; uint32_t *y_table32; - int i, base, rbase, gbase, bbase, abase; + int i, base, rbase, gbase, bbase, abase, needAlpha; const int yoffs = fullRange ? 384 : 326; int64_t crv = inv_table[0]; @@ -659,13 +690,15 @@ av_cold int sws_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int rbase = base + (isRgb ? 16 : 0); gbase = base + 8; bbase = base + (isRgb ? 0 : 16); - abase = (base + 24) & 31; + needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat); + if (!needAlpha) + abase = (base + 24) & 31; c->yuvTable = av_malloc(1024*3*4); y_table32 = c->yuvTable; yb = -(384<<16) - oy; for (i = 0; i < 1024; i++) { uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16); - y_table32[i ] = (yval << rbase) + (255 << abase); + y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255 << abase)); y_table32[i+1024] = yval << gbase; y_table32[i+2048] = yval << bbase; yb += cy; |