summaryrefslogtreecommitdiffstats
path: root/libswscale/yuv2rgb.c
diff options
context:
space:
mode:
Diffstat (limited to 'libswscale/yuv2rgb.c')
-rw-r--r--libswscale/yuv2rgb.c175
1 files changed, 104 insertions, 71 deletions
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 65af412c2c..d8438c0c66 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -3,7 +3,6 @@
*
* Copyright (C) 2009 Konstantin Shishkov
*
- * MMX/MMX2 template stuff (needed for fast movntq support),
* 1,4,8bpp support and context / deglobalize stuff
* by Michael Niedermayer (michaelni@gmx.at)
*
@@ -33,38 +32,12 @@
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
-
-#define DITHER1XBPP // only for MMX
+#include "libavutil/x86_cpu.h"
extern const uint8_t dither_8x8_32[8][8];
extern const uint8_t dither_8x8_73[8][8];
extern const uint8_t dither_8x8_220[8][8];
-#if HAVE_MMX && CONFIG_GPL
-
-/* hope these constant values are cache line aligned */
-DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
-
-//MMX versions
-#undef RENAME
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 0
-#define RENAME(a) a ## _MMX
-#include "yuv2rgb_template.c"
-
-//MMX2 versions
-#undef RENAME
-#undef HAVE_MMX2
-#define HAVE_MMX2 1
-#define RENAME(a) a ## _MMX2
-#include "yuv2rgb_template.c"
-
-#endif /* HAVE_MMX && CONFIG_GPL */
-
const int32_t ff_yuv2rgb_coeffs[8][4] = {
{117504, 138453, 13954, 34903}, /* no sequence_display_extension */
{117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
@@ -101,12 +74,18 @@ const int32_t ff_yuv2rgb_coeffs[8][4] = {
Y = src[2*i+1]; \
dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y];
-#define YUV2RGBFUNC(func_name, dst_type) \
+#define PUTRGBA(dst,ysrc,asrc,i,o,s) \
+ Y = ysrc[2*i+o]; \
+ dst[2*i ] = r[Y] + g[Y] + b[Y] + (asrc[2*i ]<<s); \
+ Y = ysrc[2*i+1-o]; \
+ dst[2*i+1] = r[Y] + g[Y] + b[Y] + (asrc[2*i+1]<<s);
+
+#define YUV2RGBFUNC(func_name, dst_type, alpha) \
static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \
int srcSliceH, uint8_t* dst[], int dstStride[]){\
int y;\
\
- if (c->srcFormat == PIX_FMT_YUV422P) {\
+ if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {\
srcStride[1] *= 2;\
srcStride[2] *= 2;\
}\
@@ -119,7 +98,12 @@ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSlic
uint8_t *py_2 = py_1 + srcStride[0];\
uint8_t *pu = src[1] + (y>>1)*srcStride[1];\
uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
+ uint8_t av_unused *pa_1, *pa_2;\
unsigned int h_size = c->dstW>>3;\
+ if (alpha){\
+ pa_1 = src[3] + y*srcStride[3];\
+ pa_2 = pa_1 + srcStride[3];\
+ }\
while (h_size--) {\
int av_unused U, V;\
int Y;\
@@ -145,7 +129,7 @@ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSlic
ENDYUV2RGBLINE(dst_delta)\
ENDYUV2RGBFUNC()
-YUV2RGBFUNC(yuv2rgb_c_32, uint32_t)
+YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
LOADCHROMA(0);
PUTRGB(dst_1,py_1,0,0);
PUTRGB(dst_2,py_2,0,1);
@@ -174,7 +158,69 @@ ENDYUV2RGBLINE(8)
PUTRGB(dst_1,py_1,1,0);
ENDYUV2RGBFUNC()
-YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t)
+YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
+ LOADCHROMA(0);
+ PUTRGBA(dst_1,py_1,pa_1,0,0,24);
+ PUTRGBA(dst_2,py_2,pa_2,0,1,24);
+
+ LOADCHROMA(1);
+ PUTRGBA(dst_2,py_2,pa_1,1,1,24);
+ PUTRGBA(dst_1,py_1,pa_2,1,0,24);
+ LOADCHROMA(1);
+ PUTRGBA(dst_2,py_2,pa_1,1,1,24);
+ PUTRGBA(dst_1,py_1,pa_2,1,0,24);
+
+ LOADCHROMA(2);
+ PUTRGBA(dst_1,py_1,pa_1,2,0,24);
+ PUTRGBA(dst_2,py_2,pa_2,2,1,24);
+
+ LOADCHROMA(3);
+ PUTRGBA(dst_2,py_2,pa_1,3,1,24);
+ PUTRGBA(dst_1,py_1,pa_2,3,0,24);
+ pa_1 += 8;\
+ pa_2 += 8;\
+ENDYUV2RGBLINE(8)
+ LOADCHROMA(0);
+ PUTRGBA(dst_1,py_1,pa_1,0,0,24);
+ PUTRGBA(dst_2,py_2,pa_2,0,1,24);
+
+ LOADCHROMA(1);
+ PUTRGBA(dst_2,py_2,pa_1,1,1,24);
+ PUTRGBA(dst_1,py_1,pa_2,1,0,24);
+ENDYUV2RGBFUNC()
+
+YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
+ LOADCHROMA(0);
+ PUTRGBA(dst_1,py_1,pa_1,0,0,0);
+ PUTRGBA(dst_2,py_2,pa_2,0,1,0);
+
+ LOADCHROMA(1);
+ PUTRGBA(dst_2,py_2,pa_2,1,1,0);
+ PUTRGBA(dst_1,py_1,pa_1,1,0,0);
+ LOADCHROMA(1);
+ PUTRGBA(dst_2,py_2,pa_2,1,1,0);
+ PUTRGBA(dst_1,py_1,pa_1,1,0,0);
+
+ LOADCHROMA(2);
+ PUTRGBA(dst_1,py_1,pa_1,2,0,0);
+ PUTRGBA(dst_2,py_2,pa_2,2,1,0);
+
+ LOADCHROMA(3);
+ PUTRGBA(dst_2,py_2,pa_2,3,1,0);
+ PUTRGBA(dst_1,py_1,pa_1,3,0,0);
+ pa_1 += 8;\
+ pa_2 += 8;\
+ENDYUV2RGBLINE(8)
+ LOADCHROMA(0);
+ PUTRGBA(dst_1,py_1,pa_1,0,0,0);
+ PUTRGBA(dst_2,py_2,pa_2,0,1,0);
+
+ LOADCHROMA(1);
+ PUTRGBA(dst_2,py_2,pa_2,1,1,0);
+ PUTRGBA(dst_1,py_1,pa_1,1,0,0);
+ENDYUV2RGBFUNC()
+
+YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
LOADCHROMA(0);
PUTRGB24(dst_1,py_1,0);
PUTRGB24(dst_2,py_2,0);
@@ -201,7 +247,7 @@ ENDYUV2RGBLINE(24)
ENDYUV2RGBFUNC()
// only trivial mods from yuv2rgb_c_24_rgb
-YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t)
+YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
LOADCHROMA(0);
PUTBGR24(dst_1,py_1,0);
PUTBGR24(dst_2,py_2,0);
@@ -229,7 +275,7 @@ ENDYUV2RGBFUNC()
// This is exactly the same code as yuv2rgb_c_32 except for the types of
// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_16, uint16_t)
+YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0)
LOADCHROMA(0);
PUTRGB(dst_1,py_1,0,0);
PUTRGB(dst_2,py_2,0,1);
@@ -249,7 +295,7 @@ CLOSEYUV2RGBFUNC(8)
// This is exactly the same code as yuv2rgb_c_32 except for the types of
// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_8, uint8_t)
+YUV2RGBFUNC(yuv2rgb_c_8, uint8_t, 0)
LOADCHROMA(0);
PUTRGB(dst_1,py_1,0,0);
PUTRGB(dst_2,py_2,0,1);
@@ -268,7 +314,7 @@ YUV2RGBFUNC(yuv2rgb_c_8, uint8_t)
CLOSEYUV2RGBFUNC(8)
// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t)
+YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
const uint8_t *d32 = dither_8x8_32[y&7];
const uint8_t *d64 = dither_8x8_73[y&7];
#define PUTRGB8(dst,src,i,o) \
@@ -297,7 +343,7 @@ CLOSEYUV2RGBFUNC(8)
// This is exactly the same code as yuv2rgb_c_32 except for the types of
// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_4, uint8_t)
+YUV2RGBFUNC(yuv2rgb_c_4, uint8_t, 0)
int acc;
#define PUTRGB4(dst,src,i) \
Y = src[2*i]; \
@@ -323,7 +369,7 @@ YUV2RGBFUNC(yuv2rgb_c_4, uint8_t)
PUTRGB4(dst_1,py_1,3);
CLOSEYUV2RGBFUNC(4)
-YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t)
+YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
const uint8_t *d64 = dither_8x8_73[y&7];
const uint8_t *d128 = dither_8x8_220[y&7];
int acc;
@@ -354,7 +400,7 @@ CLOSEYUV2RGBFUNC(4)
// This is exactly the same code as yuv2rgb_c_32 except for the types of
// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t)
+YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t, 0)
LOADCHROMA(0);
PUTRGB(dst_1,py_1,0,0);
PUTRGB(dst_2,py_2,0,1);
@@ -372,7 +418,7 @@ YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t)
PUTRGB(dst_1,py_1,3,0);
CLOSEYUV2RGBFUNC(8)
-YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t)
+YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
const uint8_t *d64 = dither_8x8_73[y&7];
const uint8_t *d128 = dither_8x8_220[y&7];
@@ -399,7 +445,7 @@ YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t)
PUTRGB4DB(dst_1,py_1,3,6);
CLOSEYUV2RGBFUNC(8)
-YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t)
+YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
const uint8_t *d128 = dither_8x8_220[y&7];
char out_1 = 0, out_2 = 0;
g= c->table_gU[128] + c->table_gV[128];
@@ -426,41 +472,26 @@ YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t)
dst_2[0]= out_2;
CLOSEYUV2RGBFUNC(1)
-SwsFunc sws_yuv2rgb_get_func_ptr(SwsContext *c)
+SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
{
SwsFunc t = NULL;
#if (HAVE_MMX2 || HAVE_MMX) && CONFIG_GPL
- if (c->flags & SWS_CPU_CAPS_MMX2) {
- switch (c->dstFormat) {
- case PIX_FMT_RGB32: return yuv420_rgb32_MMX2;
- case PIX_FMT_BGR24: return yuv420_rgb24_MMX2;
- case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
- case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
- }
- }
- if (c->flags & SWS_CPU_CAPS_MMX) {
- switch (c->dstFormat) {
- case PIX_FMT_RGB32: return yuv420_rgb32_MMX;
- case PIX_FMT_BGR24: return yuv420_rgb24_MMX;
- case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
- case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
- }
- }
+ t = ff_yuv2rgb_init_mmx(c);
#endif
#if HAVE_VIS
- t = sws_yuv2rgb_init_vis(c);
+ t = ff_yuv2rgb_init_vis(c);
#endif
#if CONFIG_MLIB
- t = sws_yuv2rgb_init_mlib(c);
+ t = ff_yuv2rgb_init_mlib(c);
#endif
#if HAVE_ALTIVEC && CONFIG_GPL
if (c->flags & SWS_CPU_CAPS_ALTIVEC)
- t = sws_yuv2rgb_init_altivec(c);
+ t = ff_yuv2rgb_init_altivec(c);
#endif
#if ARCH_BFIN
if (c->flags & SWS_CPU_CAPS_BFIN)
- t = sws_ff_bfin_yuv2rgb_get_func_ptr(c);
+ t = ff_yuv2rgb_get_func_ptr_bfin(c);
#endif
if (t)
@@ -469,10 +500,10 @@ SwsFunc sws_yuv2rgb_get_func_ptr(SwsContext *c)
av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n");
switch (c->dstFormat) {
- case PIX_FMT_BGR32_1:
- case PIX_FMT_RGB32_1:
- case PIX_FMT_BGR32:
- case PIX_FMT_RGB32: return yuv2rgb_c_32;
+ case PIX_FMT_ARGB:
+ case PIX_FMT_ABGR: if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) return yuva2argb_c;
+ case PIX_FMT_RGBA:
+ case PIX_FMT_BGRA: return (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) ? yuva2rgba_c : yuv2rgb_c_32;
case PIX_FMT_RGB24: return yuv2rgb_c_24_rgb;
case PIX_FMT_BGR24: return yuv2rgb_c_24_bgr;
case PIX_FMT_RGB565:
@@ -517,8 +548,8 @@ static void fill_gv_table(int table[256], const int elemsize, const int inc)
}
}
-av_cold int sws_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange,
- int brightness, int contrast, int saturation)
+av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange,
+ int brightness, int contrast, int saturation)
{
const int isRgb = c->dstFormat==PIX_FMT_RGB32
|| c->dstFormat==PIX_FMT_RGB32_1
@@ -533,7 +564,7 @@ av_cold int sws_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
uint8_t *y_table;
uint16_t *y_table16;
uint32_t *y_table32;
- int i, base, rbase, gbase, bbase, abase;
+ int i, base, rbase, gbase, bbase, abase, needAlpha;
const int yoffs = fullRange ? 384 : 326;
int64_t crv = inv_table[0];
@@ -659,13 +690,15 @@ av_cold int sws_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
rbase = base + (isRgb ? 16 : 0);
gbase = base + 8;
bbase = base + (isRgb ? 0 : 16);
- abase = (base + 24) & 31;
+ needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat);
+ if (!needAlpha)
+ abase = (base + 24) & 31;
c->yuvTable = av_malloc(1024*3*4);
y_table32 = c->yuvTable;
yb = -(384<<16) - oy;
for (i = 0; i < 1024; i++) {
uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
- y_table32[i ] = (yval << rbase) + (255 << abase);
+ y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255 << abase));
y_table32[i+1024] = yval << gbase;
y_table32[i+2048] = yval << bbase;
yb += cy;