From 53b0783bf6356d56658e66e62d479aa8963faf54 Mon Sep 17 00:00:00 2001 From: diego Date: Sun, 29 Apr 2007 21:36:43 +0000 Subject: cosmetics attack, part III: Remove all tabs and prettyprint/reindent the code. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@23175 b3059339-0415-0410-9bf9-f77b7e298cf2 --- libswscale/yuv2rgb.c | 830 ++++++++++++------------- libswscale/yuv2rgb_altivec.c | 1336 +++++++++++++++++++++-------------------- libswscale/yuv2rgb_mlib.c | 36 +- libswscale/yuv2rgb_template.c | 692 ++++++++++----------- 4 files changed, 1449 insertions(+), 1445 deletions(-) diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c index db6534287e..98a3569d85 100644 --- a/libswscale/yuv2rgb.c +++ b/libswscale/yuv2rgb.c @@ -156,7 +156,7 @@ const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={ #ifdef HAVE_MMX /* hope these constant values are cache line aligned */ -static uint64_t attribute_used __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ffULL; +static uint64_t attribute_used __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ffULL; static uint64_t attribute_used __attribute__((aligned(8))) mmx_redmask = 0xf8f8f8f8f8f8f8f8ULL; static uint64_t attribute_used __attribute__((aligned(8))) mmx_grnmask = 0xfcfcfcfcfcfcfcfcULL; @@ -172,12 +172,12 @@ static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither; static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither; static uint64_t __attribute__((aligned(8))) dither4[2]={ - 0x0103010301030103LL, - 0x0200020002000200LL,}; + 0x0103010301030103LL, + 0x0200020002000200LL,}; static uint64_t __attribute__((aligned(8))) dither8[2]={ - 0x0602060206020602LL, - 0x0004000400040004LL,}; + 0x0602060206020602LL, + 0x0004000400040004LL,}; #undef HAVE_MMX @@ -210,404 +210,404 @@ const int32_t Inverse_Table_6_9[8][4] = { {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */ }; -#define RGB(i) \ - U = pu[i]; \ - V = pv[i]; \ - r = (void *)c->table_rV[V]; \ - g = (void *)(c->table_gU[U] + c->table_gV[V]); \ - b = (void *)c->table_bU[U]; - -#define DST1(i) \ - Y = py_1[2*i]; \ - dst_1[2*i] = r[Y] + g[Y] + b[Y]; \ - Y = py_1[2*i+1]; \ - dst_1[2*i+1] = r[Y] + g[Y] + b[Y]; - -#define DST2(i) \ - Y = py_2[2*i]; \ - dst_2[2*i] = r[Y] + g[Y] + b[Y]; \ - Y = py_2[2*i+1]; \ - dst_2[2*i+1] = r[Y] + g[Y] + b[Y]; - -#define DST1RGB(i) \ - Y = py_1[2*i]; \ - dst_1[6*i] = r[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = b[Y]; \ - Y = py_1[2*i+1]; \ - dst_1[6*i+3] = r[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = b[Y]; - -#define DST2RGB(i) \ - Y = py_2[2*i]; \ - dst_2[6*i] = r[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = b[Y]; \ - Y = py_2[2*i+1]; \ - dst_2[6*i+3] = r[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = b[Y]; - -#define DST1BGR(i) \ - Y = py_1[2*i]; \ - dst_1[6*i] = b[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = r[Y]; \ - Y = py_1[2*i+1]; \ - dst_1[6*i+3] = b[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = r[Y]; - -#define DST2BGR(i) \ - Y = py_2[2*i]; \ - dst_2[6*i] = b[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = r[Y]; \ - Y = py_2[2*i+1]; \ - dst_2[6*i+3] = b[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = r[Y]; +#define RGB(i) \ + U = pu[i]; \ + V = pv[i]; \ + r = (void *)c->table_rV[V]; \ + g = (void *)(c->table_gU[U] + c->table_gV[V]); \ + b = (void *)c->table_bU[U]; + +#define DST1(i) \ + Y = py_1[2*i]; \ + dst_1[2*i] = r[Y] + g[Y] + b[Y]; \ + Y = py_1[2*i+1]; \ + dst_1[2*i+1] = r[Y] + g[Y] + b[Y]; + +#define DST2(i) \ + Y = py_2[2*i]; \ + dst_2[2*i] = r[Y] + g[Y] + b[Y]; \ + Y = py_2[2*i+1]; \ + dst_2[2*i+1] = r[Y] + g[Y] + b[Y]; + +#define DST1RGB(i) \ + Y = py_1[2*i]; \ + dst_1[6*i] = r[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = b[Y]; \ + Y = py_1[2*i+1]; \ + dst_1[6*i+3] = r[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = b[Y]; + +#define DST2RGB(i) \ + Y = py_2[2*i]; \ + dst_2[6*i] = r[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = b[Y]; \ + Y = py_2[2*i+1]; \ + dst_2[6*i+3] = r[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = b[Y]; + +#define DST1BGR(i) \ + Y = py_1[2*i]; \ + dst_1[6*i] = b[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = r[Y]; \ + Y = py_1[2*i+1]; \ + dst_1[6*i+3] = b[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = r[Y]; + +#define DST2BGR(i) \ + Y = py_2[2*i]; \ + dst_2[6*i] = b[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = r[Y]; \ + Y = py_2[2*i+1]; \ + dst_2[6*i+3] = b[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = r[Y]; #define PROLOG(func_name, dst_type) \ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \ - int srcSliceH, uint8_t* dst[], int dstStride[]){\ + int srcSliceH, uint8_t* dst[], int dstStride[]){\ int y;\ \ - if(c->srcFormat == PIX_FMT_YUV422P){\ - srcStride[1] *= 2;\ - srcStride[2] *= 2;\ + if (c->srcFormat == PIX_FMT_YUV422P){\ + srcStride[1] *= 2;\ + srcStride[2] *= 2;\ }\ - for(y=0; y>1)*srcStride[1];\ - uint8_t *pv= src[2] + (y>>1)*srcStride[2];\ - unsigned int h_size= c->dstW>>3;\ - while (h_size--) {\ - int attribute_unused U, V;\ - int Y;\ + for (y=0; y>1)*srcStride[1];\ + uint8_t *pv= src[2] + (y>>1)*srcStride[2];\ + unsigned int h_size= c->dstW>>3;\ + while (h_size--) {\ + int attribute_unused U, V;\ + int Y;\ #define EPILOG(dst_delta)\ - pu += 4;\ - pv += 4;\ - py_1 += 8;\ - py_2 += 8;\ - dst_1 += dst_delta;\ - dst_2 += dst_delta;\ - }\ + pu += 4;\ + pv += 4;\ + py_1 += 8;\ + py_2 += 8;\ + dst_1 += dst_delta;\ + dst_2 += dst_delta;\ + }\ }\ return srcSliceH;\ } PROLOG(yuv2rgb_c_32, uint32_t) - RGB(0); - DST1(0); - DST2(0); + RGB(0); + DST1(0); + DST2(0); - RGB(1); - DST2(1); - DST1(1); + RGB(1); + DST2(1); + DST1(1); - RGB(2); - DST1(2); - DST2(2); + RGB(2); + DST1(2); + DST2(2); - RGB(3); - DST2(3); - DST1(3); + RGB(3); + DST2(3); + DST1(3); EPILOG(8) PROLOG(yuv2rgb_c_24_rgb, uint8_t) - RGB(0); - DST1RGB(0); - DST2RGB(0); + RGB(0); + DST1RGB(0); + DST2RGB(0); - RGB(1); - DST2RGB(1); - DST1RGB(1); + RGB(1); + DST2RGB(1); + DST1RGB(1); - RGB(2); - DST1RGB(2); - DST2RGB(2); + RGB(2); + DST1RGB(2); + DST2RGB(2); - RGB(3); - DST2RGB(3); - DST1RGB(3); + RGB(3); + DST2RGB(3); + DST1RGB(3); EPILOG(24) // only trivial mods from yuv2rgb_c_24_rgb PROLOG(yuv2rgb_c_24_bgr, uint8_t) - RGB(0); - DST1BGR(0); - DST2BGR(0); + RGB(0); + DST1BGR(0); + DST2BGR(0); - RGB(1); - DST2BGR(1); - DST1BGR(1); + RGB(1); + DST2BGR(1); + DST1BGR(1); - RGB(2); - DST1BGR(2); - DST2BGR(2); + RGB(2); + DST1BGR(2); + DST2BGR(2); - RGB(3); - DST2BGR(3); - DST1BGR(3); + RGB(3); + DST2BGR(3); + DST1BGR(3); EPILOG(24) // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 PROLOG(yuv2rgb_c_16, uint16_t) - RGB(0); - DST1(0); - DST2(0); + RGB(0); + DST1(0); + DST2(0); - RGB(1); - DST2(1); - DST1(1); + RGB(1); + DST2(1); + DST1(1); - RGB(2); - DST1(2); - DST2(2); + RGB(2); + DST1(2); + DST2(2); - RGB(3); - DST2(3); - DST1(3); + RGB(3); + DST2(3); + DST1(3); EPILOG(8) // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 PROLOG(yuv2rgb_c_8, uint8_t) - RGB(0); - DST1(0); - DST2(0); + RGB(0); + DST1(0); + DST2(0); - RGB(1); - DST2(1); - DST1(1); + RGB(1); + DST2(1); + DST1(1); - RGB(2); - DST1(2); - DST2(2); + RGB(2); + DST1(2); + DST2(2); - RGB(3); - DST2(3); - DST1(3); + RGB(3); + DST2(3); + DST1(3); EPILOG(8) // r, g, b, dst_1, dst_2 PROLOG(yuv2rgb_c_8_ordered_dither, uint8_t) - const uint8_t *d32= dither_8x8_32[y&7]; - const uint8_t *d64= dither_8x8_73[y&7]; -#define DST1bpp8(i,o) \ - Y = py_1[2*i]; \ - dst_1[2*i] = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \ - Y = py_1[2*i+1]; \ - dst_1[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]]; - -#define DST2bpp8(i,o) \ - Y = py_2[2*i]; \ - dst_2[2*i] = r[Y+d32[8+o]] + g[Y+d32[8+o]] + b[Y+d64[8+o]]; \ - Y = py_2[2*i+1]; \ - dst_2[2*i+1] = r[Y+d32[9+o]] + g[Y+d32[9+o]] + b[Y+d64[9+o]]; - - - RGB(0); - DST1bpp8(0,0); - DST2bpp8(0,0); - - RGB(1); - DST2bpp8(1,2); - DST1bpp8(1,2); - - RGB(2); - DST1bpp8(2,4); - DST2bpp8(2,4); - - RGB(3); - DST2bpp8(3,6); - DST1bpp8(3,6); + const uint8_t *d32= dither_8x8_32[y&7]; + const uint8_t *d64= dither_8x8_73[y&7]; +#define DST1bpp8(i,o) \ + Y = py_1[2*i]; \ + dst_1[2*i] = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \ + Y = py_1[2*i+1]; \ + dst_1[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]]; + +#define DST2bpp8(i,o) \ + Y = py_2[2*i]; \ + dst_2[2*i] = r[Y+d32[8+o]] + g[Y+d32[8+o]] + b[Y+d64[8+o]]; \ + Y = py_2[2*i+1]; \ + dst_2[2*i+1] = r[Y+d32[9+o]] + g[Y+d32[9+o]] + b[Y+d64[9+o]]; + + + RGB(0); + DST1bpp8(0,0); + DST2bpp8(0,0); + + RGB(1); + DST2bpp8(1,2); + DST1bpp8(1,2); + + RGB(2); + DST1bpp8(2,4); + DST2bpp8(2,4); + + RGB(3); + DST2bpp8(3,6); + DST1bpp8(3,6); EPILOG(8) // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 PROLOG(yuv2rgb_c_4, uint8_t) - int acc; -#define DST1_4(i) \ - Y = py_1[2*i]; \ - acc = r[Y] + g[Y] + b[Y]; \ - Y = py_1[2*i+1]; \ - acc |= (r[Y] + g[Y] + b[Y])<<4;\ - dst_1[i] = acc; - -#define DST2_4(i) \ - Y = py_2[2*i]; \ - acc = r[Y] + g[Y] + b[Y]; \ - Y = py_2[2*i+1]; \ - acc |= (r[Y] + g[Y] + b[Y])<<4;\ - dst_2[i] = acc; - - RGB(0); - DST1_4(0); - DST2_4(0); - - RGB(1); - DST2_4(1); - DST1_4(1); - - RGB(2); - DST1_4(2); - DST2_4(2); - - RGB(3); - DST2_4(3); - DST1_4(3); + int acc; +#define DST1_4(i) \ + Y = py_1[2*i]; \ + acc = r[Y] + g[Y] + b[Y]; \ + Y = py_1[2*i+1]; \ + acc |= (r[Y] + g[Y] + b[Y])<<4; \ + dst_1[i] = acc; + +#define DST2_4(i) \ + Y = py_2[2*i]; \ + acc = r[Y] + g[Y] + b[Y]; \ + Y = py_2[2*i+1]; \ + acc |= (r[Y] + g[Y] + b[Y])<<4; \ + dst_2[i] = acc; + + RGB(0); + DST1_4(0); + DST2_4(0); + + RGB(1); + DST2_4(1); + DST1_4(1); + + RGB(2); + DST1_4(2); + DST2_4(2); + + RGB(3); + DST2_4(3); + DST1_4(3); EPILOG(4) PROLOG(yuv2rgb_c_4_ordered_dither, uint8_t) - const uint8_t *d64= dither_8x8_73[y&7]; - const uint8_t *d128=dither_8x8_220[y&7]; - int acc; - -#define DST1bpp4(i,o) \ - Y = py_1[2*i]; \ - acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \ - Y = py_1[2*i+1]; \ - acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4;\ - dst_1[i]= acc; - -#define DST2bpp4(i,o) \ - Y = py_2[2*i]; \ - acc = r[Y+d128[8+o]] + g[Y+d64[8+o]] + b[Y+d128[8+o]]; \ - Y = py_2[2*i+1]; \ - acc |= (r[Y+d128[9+o]] + g[Y+d64[9+o]] + b[Y+d128[9+o]])<<4;\ - dst_2[i]= acc; - - - RGB(0); - DST1bpp4(0,0); - DST2bpp4(0,0); - - RGB(1); - DST2bpp4(1,2); - DST1bpp4(1,2); - - RGB(2); - DST1bpp4(2,4); - DST2bpp4(2,4); - - RGB(3); - DST2bpp4(3,6); - DST1bpp4(3,6); + const uint8_t *d64= dither_8x8_73[y&7]; + const uint8_t *d128=dither_8x8_220[y&7]; + int acc; + +#define DST1bpp4(i,o) \ + Y = py_1[2*i]; \ + acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \ + Y = py_1[2*i+1]; \ + acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4; \ + dst_1[i]= acc; + +#define DST2bpp4(i,o) \ + Y = py_2[2*i]; \ + acc = r[Y+d128[8+o]] + g[Y+d64[8+o]] + b[Y+d128[8+o]]; \ + Y = py_2[2*i+1]; \ + acc |= (r[Y+d128[9+o]] + g[Y+d64[9+o]] + b[Y+d128[9+o]])<<4; \ + dst_2[i]= acc; + + + RGB(0); + DST1bpp4(0,0); + DST2bpp4(0,0); + + RGB(1); + DST2bpp4(1,2); + DST1bpp4(1,2); + + RGB(2); + DST1bpp4(2,4); + DST2bpp4(2,4); + + RGB(3); + DST2bpp4(3,6); + DST1bpp4(3,6); EPILOG(4) // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 PROLOG(yuv2rgb_c_4b, uint8_t) - RGB(0); - DST1(0); - DST2(0); + RGB(0); + DST1(0); + DST2(0); - RGB(1); - DST2(1); - DST1(1); + RGB(1); + DST2(1); + DST1(1); - RGB(2); - DST1(2); - DST2(2); + RGB(2); + DST1(2); + DST2(2); - RGB(3); - DST2(3); - DST1(3); + RGB(3); + DST2(3); + DST1(3); EPILOG(8) PROLOG(yuv2rgb_c_4b_ordered_dither, uint8_t) - const uint8_t *d64= dither_8x8_73[y&7]; - const uint8_t *d128=dither_8x8_220[y&7]; + const uint8_t *d64= dither_8x8_73[y&7]; + const uint8_t *d128=dither_8x8_220[y&7]; -#define DST1bpp4b(i,o) \ - Y = py_1[2*i]; \ - dst_1[2*i] = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \ - Y = py_1[2*i+1]; \ - dst_1[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]]; +#define DST1bpp4b(i,o) \ + Y = py_1[2*i]; \ + dst_1[2*i] = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \ + Y = py_1[2*i+1]; \ + dst_1[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]]; -#define DST2bpp4b(i,o) \ - Y = py_2[2*i]; \ - dst_2[2*i] = r[Y+d128[8+o]] + g[Y+d64[8+o]] + b[Y+d128[8+o]]; \ - Y = py_2[2*i+1]; \ - dst_2[2*i+1] = r[Y+d128[9+o]] + g[Y+d64[9+o]] + b[Y+d128[9+o]]; +#define DST2bpp4b(i,o) \ + Y = py_2[2*i]; \ + dst_2[2*i] = r[Y+d128[8+o]] + g[Y+d64[8+o]] + b[Y+d128[8+o]]; \ + Y = py_2[2*i+1]; \ + dst_2[2*i+1] = r[Y+d128[9+o]] + g[Y+d64[9+o]] + b[Y+d128[9+o]]; - RGB(0); - DST1bpp4b(0,0); - DST2bpp4b(0,0); + RGB(0); + DST1bpp4b(0,0); + DST2bpp4b(0,0); - RGB(1); - DST2bpp4b(1,2); - DST1bpp4b(1,2); + RGB(1); + DST2bpp4b(1,2); + DST1bpp4b(1,2); - RGB(2); - DST1bpp4b(2,4); - DST2bpp4b(2,4); + RGB(2); + DST1bpp4b(2,4); + DST2bpp4b(2,4); - RGB(3); - DST2bpp4b(3,6); - DST1bpp4b(3,6); + RGB(3); + DST2bpp4b(3,6); + DST1bpp4b(3,6); EPILOG(8) PROLOG(yuv2rgb_c_1_ordered_dither, uint8_t) - const uint8_t *d128=dither_8x8_220[y&7]; - char out_1=0, out_2=0; - g= c->table_gU[128] + c->table_gV[128]; + const uint8_t *d128=dither_8x8_220[y&7]; + char out_1=0, out_2=0; + g= c->table_gU[128] + c->table_gV[128]; -#define DST1bpp1(i,o) \ - Y = py_1[2*i]; \ - out_1+= out_1 + g[Y+d128[0+o]]; \ - Y = py_1[2*i+1]; \ - out_1+= out_1 + g[Y+d128[1+o]]; +#define DST1bpp1(i,o) \ + Y = py_1[2*i]; \ + out_1+= out_1 + g[Y+d128[0+o]]; \ + Y = py_1[2*i+1]; \ + out_1+= out_1 + g[Y+d128[1+o]]; -#define DST2bpp1(i,o) \ - Y = py_2[2*i]; \ - out_2+= out_2 + g[Y+d128[8+o]]; \ - Y = py_2[2*i+1]; \ - out_2+= out_2 + g[Y+d128[9+o]]; +#define DST2bpp1(i,o) \ + Y = py_2[2*i]; \ + out_2+= out_2 + g[Y+d128[8+o]]; \ + Y = py_2[2*i+1]; \ + out_2+= out_2 + g[Y+d128[9+o]]; - DST1bpp1(0,0); - DST2bpp1(0,0); + DST1bpp1(0,0); + DST2bpp1(0,0); - DST2bpp1(1,2); - DST1bpp1(1,2); + DST2bpp1(1,2); + DST1bpp1(1,2); - DST1bpp1(2,4); - DST2bpp1(2,4); + DST1bpp1(2,4); + DST2bpp1(2,4); - DST2bpp1(3,6); - DST1bpp1(3,6); + DST2bpp1(3,6); + DST1bpp1(3,6); - dst_1[0]= out_1; - dst_2[0]= out_2; + dst_1[0]= out_1; + dst_2[0]= out_2; EPILOG(1) SwsFunc yuv2rgb_get_func_ptr (SwsContext *c) { #if defined(HAVE_MMX2) || defined(HAVE_MMX) - if(c->flags & SWS_CPU_CAPS_MMX2){ - switch(c->dstFormat){ - case PIX_FMT_RGB32: return yuv420_rgb32_MMX2; - case PIX_FMT_BGR24: return yuv420_rgb24_MMX2; - case PIX_FMT_BGR565: return yuv420_rgb16_MMX2; - case PIX_FMT_BGR555: return yuv420_rgb15_MMX2; - } + if (c->flags & SWS_CPU_CAPS_MMX2){ + switch(c->dstFormat){ + case PIX_FMT_RGB32: return yuv420_rgb32_MMX2; + case PIX_FMT_BGR24: return yuv420_rgb24_MMX2; + case PIX_FMT_BGR565: return yuv420_rgb16_MMX2; + case PIX_FMT_BGR555: return yuv420_rgb15_MMX2; + } } - if(c->flags & SWS_CPU_CAPS_MMX){ - switch(c->dstFormat){ - case PIX_FMT_RGB32: return yuv420_rgb32_MMX; - case PIX_FMT_BGR24: return yuv420_rgb24_MMX; - case PIX_FMT_BGR565: return yuv420_rgb16_MMX; - case PIX_FMT_BGR555: return yuv420_rgb15_MMX; - } + if (c->flags & SWS_CPU_CAPS_MMX){ + switch(c->dstFormat){ + case PIX_FMT_RGB32: return yuv420_rgb32_MMX; + case PIX_FMT_BGR24: return yuv420_rgb24_MMX; + case PIX_FMT_BGR565: return yuv420_rgb16_MMX; + case PIX_FMT_BGR555: return yuv420_rgb15_MMX; + } } #endif #ifdef HAVE_MLIB { - SwsFunc t= yuv2rgb_init_mlib(c); - if(t) return t; + SwsFunc t= yuv2rgb_init_mlib(c); + if (t) return t; } #endif #ifdef HAVE_ALTIVEC if (c->flags & SWS_CPU_CAPS_ALTIVEC) { - SwsFunc t = yuv2rgb_init_altivec(c); - if(t) return t; + SwsFunc t = yuv2rgb_init_altivec(c); + if (t) return t; } #endif @@ -630,7 +630,7 @@ SwsFunc yuv2rgb_get_func_ptr (SwsContext *c) case PIX_FMT_BGR4_BYTE: return yuv2rgb_c_4b_ordered_dither; case PIX_FMT_MONOBLACK: return yuv2rgb_c_1_ordered_dither; default: - assert(0); + assert(0); } return NULL; } @@ -638,9 +638,9 @@ SwsFunc yuv2rgb_get_func_ptr (SwsContext *c) static int div_round (int dividend, int divisor) { if (dividend > 0) - return (dividend + (divisor>>1)) / divisor; + return (dividend + (divisor>>1)) / divisor; else - return -((-dividend + (divisor>>1)) / divisor); + return -((-dividend + (divisor>>1)) / divisor); } int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation) @@ -667,9 +667,9 @@ int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int64_t oy = 0; //printf("%lld %lld %lld %lld %lld\n", cy, crv, cbu, cgu, cgv); - if(!fullRange){ - cy= (cy*255) / 219; - oy= 16<<16; + if (!fullRange){ + cy= (cy*255) / 219; + oy= 16<<16; }else{ crv= (crv*224) / 255; cbu= (cbu*224) / 255; @@ -686,163 +686,163 @@ int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, oy -= 256*brightness; for (i = 0; i < 1024; i++) { - int j; + int j; - j= (cy*(((i - 384)<<16) - oy) + (1<<31))>>32; - j = (j < 0) ? 0 : ((j > 255) ? 255 : j); - table_Y[i] = j; + j= (cy*(((i - 384)<<16) - oy) + (1<<31))>>32; + j = (j < 0) ? 0 : ((j > 255) ? 255 : j); + table_Y[i] = j; } switch (bpp) { case 32: - table_start= table_32 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint32_t)); + table_start= table_32 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint32_t)); - entry_size = sizeof (uint32_t); - table_r = table_32 + 197; - table_b = table_32 + 197 + 685; - table_g = table_32 + 197 + 2*682; + entry_size = sizeof (uint32_t); + table_r = table_32 + 197; + table_b = table_32 + 197 + 685; + table_g = table_32 + 197 + 2*682; - for (i = -197; i < 256+197; i++) - ((uint32_t *)table_r)[i] = table_Y[i+384] << (isRgb ? 16 : 0); - for (i = -132; i < 256+132; i++) - ((uint32_t *)table_g)[i] = table_Y[i+384] << 8; - for (i = -232; i < 256+232; i++) - ((uint32_t *)table_b)[i] = table_Y[i+384] << (isRgb ? 0 : 16); - break; + for (i = -197; i < 256+197; i++) + ((uint32_t *)table_r)[i] = table_Y[i+384] << (isRgb ? 16 : 0); + for (i = -132; i < 256+132; i++) + ((uint32_t *)table_g)[i] = table_Y[i+384] << 8; + for (i = -232; i < 256+232; i++) + ((uint32_t *)table_b)[i] = table_Y[i+384] << (isRgb ? 0 : 16); + break; case 24: - table_start= table_8 = av_malloc ((256 + 2*232) * sizeof (uint8_t)); + table_start= table_8 = av_malloc ((256 + 2*232) * sizeof (uint8_t)); - entry_size = sizeof (uint8_t); - table_r = table_g = table_b = table_8 + 232; + entry_size = sizeof (uint8_t); + table_r = table_g = table_b = table_8 + 232; - for (i = -232; i < 256+232; i++) - ((uint8_t * )table_b)[i] = table_Y[i+384]; - break; + for (i = -232; i < 256+232; i++) + ((uint8_t * )table_b)[i] = table_Y[i+384]; + break; case 15: case 16: - table_start= table_16 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t)); + table_start= table_16 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t)); - entry_size = sizeof (uint16_t); - table_r = table_16 + 197; - table_b = table_16 + 197 + 685; - table_g = table_16 + 197 + 2*682; + entry_size = sizeof (uint16_t); + table_r = table_16 + 197; + table_b = table_16 + 197 + 685; + table_g = table_16 + 197 + 2*682; - for (i = -197; i < 256+197; i++) { - int j = table_Y[i+384] >> 3; + for (i = -197; i < 256+197; i++) { + int j = table_Y[i+384] >> 3; - if (isRgb) - j <<= ((bpp==16) ? 11 : 10); + if (isRgb) + j <<= ((bpp==16) ? 11 : 10); - ((uint16_t *)table_r)[i] = j; - } - for (i = -132; i < 256+132; i++) { - int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3); + ((uint16_t *)table_r)[i] = j; + } + for (i = -132; i < 256+132; i++) { + int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3); - ((uint16_t *)table_g)[i] = j << 5; - } - for (i = -232; i < 256+232; i++) { - int j = table_Y[i+384] >> 3; + ((uint16_t *)table_g)[i] = j << 5; + } + for (i = -232; i < 256+232; i++) { + int j = table_Y[i+384] >> 3; - if (!isRgb) - j <<= ((bpp==16) ? 11 : 10); + if (!isRgb) + j <<= ((bpp==16) ? 11 : 10); - ((uint16_t *)table_b)[i] = j; - } - break; + ((uint16_t *)table_b)[i] = j; + } + break; case 8: - table_start= table_332 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t)); + table_start= table_332 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t)); - entry_size = sizeof (uint8_t); - table_r = table_332 + 197; - table_b = table_332 + 197 + 685; - table_g = table_332 + 197 + 2*682; + entry_size = sizeof (uint8_t); + table_r = table_332 + 197; + table_b = table_332 + 197 + 685; + table_g = table_332 + 197 + 2*682; - for (i = -197; i < 256+197; i++) { - int j = (table_Y[i+384 - 16] + 18)/36; + for (i = -197; i < 256+197; i++) { + int j = (table_Y[i+384 - 16] + 18)/36; - if (isRgb) - j <<= 5; + if (isRgb) + j <<= 5; - ((uint8_t *)table_r)[i] = j; - } - for (i = -132; i < 256+132; i++) { - int j = (table_Y[i+384 - 16] + 18)/36; + ((uint8_t *)table_r)[i] = j; + } + for (i = -132; i < 256+132; i++) { + int j = (table_Y[i+384 - 16] + 18)/36; - if (!isRgb) - j <<= 1; + if (!isRgb) + j <<= 1; - ((uint8_t *)table_g)[i] = j << 2; - } - for (i = -232; i < 256+232; i++) { - int j = (table_Y[i+384 - 37] + 43)/85; + ((uint8_t *)table_g)[i] = j << 2; + } + for (i = -232; i < 256+232; i++) { + int j = (table_Y[i+384 - 37] + 43)/85; - if (!isRgb) - j <<= 6; + if (!isRgb) + j <<= 6; - ((uint8_t *)table_b)[i] = j; - } - break; + ((uint8_t *)table_b)[i] = j; + } + break; case 4: case 4|128: - table_start= table_121 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t)); + table_start= table_121 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t)); - entry_size = sizeof (uint8_t); - table_r = table_121 + 197; - table_b = table_121 + 197 + 685; - table_g = table_121 + 197 + 2*682; + entry_size = sizeof (uint8_t); + table_r = table_121 + 197; + table_b = table_121 + 197 + 685; + table_g = table_121 + 197 + 2*682; - for (i = -197; i < 256+197; i++) { - int j = table_Y[i+384 - 110] >> 7; + for (i = -197; i < 256+197; i++) { + int j = table_Y[i+384 - 110] >> 7; - if (isRgb) - j <<= 3; + if (isRgb) + j <<= 3; - ((uint8_t *)table_r)[i] = j; - } - for (i = -132; i < 256+132; i++) { - int j = (table_Y[i+384 - 37]+ 43)/85; + ((uint8_t *)table_r)[i] = j; + } + for (i = -132; i < 256+132; i++) { + int j = (table_Y[i+384 - 37]+ 43)/85; - ((uint8_t *)table_g)[i] = j << 1; - } - for (i = -232; i < 256+232; i++) { - int j =table_Y[i+384 - 110] >> 7; + ((uint8_t *)table_g)[i] = j << 1; + } + for (i = -232; i < 256+232; i++) { + int j =table_Y[i+384 - 110] >> 7; - if (!isRgb) - j <<= 3; + if (!isRgb) + j <<= 3; - ((uint8_t *)table_b)[i] = j; - } - break; + ((uint8_t *)table_b)[i] = j; + } + break; case 1: - table_start= table_1 = av_malloc (256*2 * sizeof (uint8_t)); + table_start= table_1 = av_malloc (256*2 * sizeof (uint8_t)); - entry_size = sizeof (uint8_t); - table_g = table_1; - table_r = table_b = NULL; + entry_size = sizeof (uint8_t); + table_g = table_1; + table_r = table_b = NULL; - for (i = 0; i < 256+256; i++) { - int j = table_Y[i + 384 - 110]>>7; + for (i = 0; i < 256+256; i++) { + int j = table_Y[i + 384 - 110]>>7; - ((uint8_t *)table_g)[i] = j; - } - break; + ((uint8_t *)table_g)[i] = j; + } + break; default: - table_start= NULL; - av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp); - //free mem? - return -1; + table_start= NULL; + av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp); + //free mem? + return -1; } for (i = 0; i < 256; i++) { - c->table_rV[i] = (uint8_t *)table_r + entry_size * div_round (crv * (i-128), 76309); - c->table_gU[i] = (uint8_t *)table_g + entry_size * div_round (cgu * (i-128), 76309); - c->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309); - c->table_bU[i] = (uint8_t *)table_b + entry_size * div_round (cbu * (i-128), 76309); + c->table_rV[i] = (uint8_t *)table_r + entry_size * div_round (crv * (i-128), 76309); + c->table_gU[i] = (uint8_t *)table_g + entry_size * div_round (cgu * (i-128), 76309); + c->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309); + c->table_bU[i] = (uint8_t *)table_b + entry_size * div_round (cbu * (i-128), 76309); } av_free(c->yuvTable); diff --git a/libswscale/yuv2rgb_altivec.c b/libswscale/yuv2rgb_altivec.c index 60cc0a4ac1..04e8e1711e 100644 --- a/libswscale/yuv2rgb_altivec.c +++ b/libswscale/yuv2rgb_altivec.c @@ -139,70 +139,70 @@ typedef signed char sbyte; static const vector unsigned char perm_rgb_0 = (const vector unsigned char)AVV(0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05, - 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a), + 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a), perm_rgb_1 = (const vector unsigned char)AVV(0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17, - 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f), + 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f), perm_rgb_2 = (const vector unsigned char)AVV(0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, - 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05), + 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05), perm_rgb_3 = (const vector unsigned char)AVV(0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a, - 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f); - -#define vec_merge3(x2,x1,x0,y0,y1,y2) \ -do { \ - typeof(x0) o0,o2,o3; \ - o0 = vec_mergeh (x0,x1); \ - y0 = vec_perm (o0, x2, perm_rgb_0);\ - o2 = vec_perm (o0, x2, perm_rgb_1);\ - o3 = vec_mergel (x0,x1); \ - y1 = vec_perm (o3,o2,perm_rgb_2); \ - y2 = vec_perm (o3,o2,perm_rgb_3); \ + 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f); + +#define vec_merge3(x2,x1,x0,y0,y1,y2) \ +do { \ + typeof(x0) o0,o2,o3; \ + o0 = vec_mergeh (x0,x1); \ + y0 = vec_perm (o0, x2, perm_rgb_0); \ + o2 = vec_perm (o0, x2, perm_rgb_1); \ + o3 = vec_mergel (x0,x1); \ + y1 = vec_perm (o3,o2,perm_rgb_2); \ + y2 = vec_perm (o3,o2,perm_rgb_3); \ } while(0) -#define vec_mstbgr24(x0,x1,x2,ptr) \ -do { \ - typeof(x0) _0,_1,_2; \ - vec_merge3 (x0,x1,x2,_0,_1,_2); \ - vec_st (_0, 0, ptr++); \ - vec_st (_1, 0, ptr++); \ - vec_st (_2, 0, ptr++); \ +#define vec_mstbgr24(x0,x1,x2,ptr) \ +do { \ + typeof(x0) _0,_1,_2; \ + vec_merge3 (x0,x1,x2,_0,_1,_2); \ + vec_st (_0, 0, ptr++); \ + vec_st (_1, 0, ptr++); \ + vec_st (_2, 0, ptr++); \ } while (0); -#define vec_mstrgb24(x0,x1,x2,ptr) \ -do { \ - typeof(x0) _0,_1,_2; \ - vec_merge3 (x2,x1,x0,_0,_1,_2); \ - vec_st (_0, 0, ptr++); \ - vec_st (_1, 0, ptr++); \ - vec_st (_2, 0, ptr++); \ +#define vec_mstrgb24(x0,x1,x2,ptr) \ +do { \ + typeof(x0) _0,_1,_2; \ + vec_merge3 (x2,x1,x0,_0,_1,_2); \ + vec_st (_0, 0, ptr++); \ + vec_st (_1, 0, ptr++); \ + vec_st (_2, 0, ptr++); \ } while (0); /* pack the pixels in rgb0 format msb R lsb 0 */ -#define vec_mstrgb32(T,x0,x1,x2,x3,ptr) \ -do { \ - T _0,_1,_2,_3; \ - _0 = vec_mergeh (x0,x1); \ - _1 = vec_mergeh (x2,x3); \ - _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \ - _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \ - vec_st (_2, 0*16, (T *)ptr); \ - vec_st (_3, 1*16, (T *)ptr); \ - _0 = vec_mergel (x0,x1); \ - _1 = vec_mergel (x2,x3); \ - _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \ - _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \ - vec_st (_2, 2*16, (T *)ptr); \ - vec_st (_3, 3*16, (T *)ptr); \ - ptr += 4; \ +#define vec_mstrgb32(T,x0,x1,x2,x3,ptr) \ +do { \ + T _0,_1,_2,_3; \ + _0 = vec_mergeh (x0,x1); \ + _1 = vec_mergeh (x2,x3); \ + _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \ + _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \ + vec_st (_2, 0*16, (T *)ptr); \ + vec_st (_3, 1*16, (T *)ptr); \ + _0 = vec_mergel (x0,x1); \ + _1 = vec_mergel (x2,x3); \ + _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \ + _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \ + vec_st (_2, 2*16, (T *)ptr); \ + vec_st (_3, 3*16, (T *)ptr); \ + ptr += 4; \ } while (0); /* | 1 0 1.4021 | | Y | | 1 -0.3441 -0.7142 |x| Cb| - | 1 1.7718 0 | | Cr| + | 1 1.7718 0 | | Cr| Y: [-128 127] @@ -216,51 +216,51 @@ do { \ #define vec_unh(x) \ - (vector signed short) \ - vec_perm(x,(typeof(x))AVV(0),\ - (vector unsigned char)AVV(0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\ - 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07)) + (vector signed short) \ + vec_perm(x,(typeof(x))AVV(0),\ + (vector unsigned char)AVV(0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\ + 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07)) #define vec_unl(x) \ - (vector signed short) \ - vec_perm(x,(typeof(x))AVV(0),\ - (vector unsigned char)AVV(0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\ - 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F)) + (vector signed short) \ + vec_perm(x,(typeof(x))AVV(0),\ + (vector unsigned char)AVV(0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\ + 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F)) #define vec_clip_s16(x) \ - vec_max (vec_min (x, (vector signed short)AVV(235,235,235,235,235,235,235,235)),\ - (vector signed short)AVV(16, 16, 16, 16, 16, 16, 16, 16 )) + vec_max (vec_min (x, (vector signed short)AVV(235,235,235,235,235,235,235,235)),\ + (vector signed short)AVV( 16, 16, 16, 16, 16, 16, 16, 16)) #define vec_packclp(x,y) \ - (vector unsigned char)vec_packs \ - ((vector unsigned short)vec_max (x,(vector signed short) AVV(0)), \ - (vector unsigned short)vec_max (y,(vector signed short) AVV(0))) + (vector unsigned char)vec_packs \ + ((vector unsigned short)vec_max (x,(vector signed short) AVV(0)), \ + (vector unsigned short)vec_max (y,(vector signed short) AVV(0))) //#define out_pixels(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),a,a,a,ptr) static inline void cvtyuvtoRGB (SwsContext *c, - vector signed short Y, vector signed short U, vector signed short V, - vector signed short *R, vector signed short *G, vector signed short *B) + vector signed short Y, vector signed short U, vector signed short V, + vector signed short *R, vector signed short *G, vector signed short *B) { - vector signed short vx,ux,uvx; + vector signed short vx,ux,uvx; - Y = vec_mradds (Y, c->CY, c->OY); - U = vec_sub (U,(vector signed short) - vec_splat((vector signed short)AVV(128),0)); - V = vec_sub (V,(vector signed short) - vec_splat((vector signed short)AVV(128),0)); + Y = vec_mradds (Y, c->CY, c->OY); + U = vec_sub (U,(vector signed short) + vec_splat((vector signed short)AVV(128),0)); + V = vec_sub (V,(vector signed short) + vec_splat((vector signed short)AVV(128),0)); - // ux = (CBU*(u<CSHIFT)+0x4000)>>15; - ux = vec_sl (U, c->CSHIFT); - *B = vec_mradds (ux, c->CBU, Y); + // ux = (CBU*(u<CSHIFT)+0x4000)>>15; + ux = vec_sl (U, c->CSHIFT); + *B = vec_mradds (ux, c->CBU, Y); - // vx = (CRV*(v<CSHIFT)+0x4000)>>15; - vx = vec_sl (V, c->CSHIFT); - *R = vec_mradds (vx, c->CRV, Y); + // vx = (CRV*(v<CSHIFT)+0x4000)>>15; + vx = vec_sl (V, c->CSHIFT); + *R = vec_mradds (vx, c->CRV, Y); - // uvx = ((CGU*u) + (CGV*v))>>15; - uvx = vec_mradds (U, c->CGU, Y); - *G = vec_mradds (V, c->CGV, uvx); + // uvx = ((CGU*u) + (CGV*v))>>15; + uvx = vec_mradds (U, c->CGU, Y); + *G = vec_mradds (V, c->CGV, uvx); } @@ -271,164 +271,168 @@ static inline void cvtyuvtoRGB (SwsContext *c, */ -#define DEFCSP420_CVT(name,out_pixels) \ -static int altivec_##name (SwsContext *c, \ - unsigned char **in, int *instrides, \ - int srcSliceY, int srcSliceH, \ - unsigned char **oplanes, int *outstrides) \ -{ \ - int w = c->srcW; \ - int h = srcSliceH; \ - int i,j; \ - int instrides_scl[3]; \ - vector unsigned char y0,y1; \ - \ - vector signed char u,v; \ - \ - vector signed short Y0,Y1,Y2,Y3; \ - vector signed short U,V; \ - vector signed short vx,ux,uvx; \ - vector signed short vx0,ux0,uvx0; \ - vector signed short vx1,ux1,uvx1; \ - vector signed short R0,G0,B0; \ - vector signed short R1,G1,B1; \ - vector unsigned char R,G,B; \ - \ - vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ - vector unsigned char align_perm; \ - \ - vector signed short \ - lCY = c->CY, \ - lOY = c->OY, \ - lCRV = c->CRV, \ - lCBU = c->CBU, \ - lCGU = c->CGU, \ - lCGV = c->CGV; \ - \ - vector unsigned short lCSHIFT = c->CSHIFT; \ - \ - ubyte *y1i = in[0]; \ - ubyte *y2i = in[0]+instrides[0]; \ - ubyte *ui = in[1]; \ - ubyte *vi = in[2]; \ - \ - vector unsigned char *oute \ - = (vector unsigned char *) \ - (oplanes[0]+srcSliceY*outstrides[0]); \ - vector unsigned char *outo \ - = (vector unsigned char *) \ - (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); \ - \ - \ - instrides_scl[0] = instrides[0]*2-w; /* the loop moves y{1,2}i by w */ \ - instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */ \ - instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */ \ - \ - \ - for (i=0;i>15 */ \ - ux = vec_sl (U, lCSHIFT); \ - ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); \ - ux0 = vec_mergeh (ux,ux); \ - ux1 = vec_mergel (ux,ux); \ - \ - /* vx = (CRV*(v<>15; */ \ - vx = vec_sl (V, lCSHIFT); \ - vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); \ - vx0 = vec_mergeh (vx,vx); \ - vx1 = vec_mergel (vx,vx); \ - \ - /* uvx = ((CGU*u) + (CGV*v))>>15 */ \ - uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0)); \ - uvx = vec_mradds (V, lCGV, uvx); \ - uvx0 = vec_mergeh (uvx,uvx); \ - uvx1 = vec_mergel (uvx,uvx); \ - \ - R0 = vec_add (Y0,vx0); \ - G0 = vec_add (Y0,uvx0); \ - B0 = vec_add (Y0,ux0); \ - R1 = vec_add (Y1,vx1); \ - G1 = vec_add (Y1,uvx1); \ - B1 = vec_add (Y1,ux1); \ - \ - R = vec_packclp (R0,R1); \ - G = vec_packclp (G0,G1); \ - B = vec_packclp (B0,B1); \ - \ - out_pixels(R,G,B,oute); \ - \ - R0 = vec_add (Y2,vx0); \ - G0 = vec_add (Y2,uvx0); \ - B0 = vec_add (Y2,ux0); \ - R1 = vec_add (Y3,vx1); \ - G1 = vec_add (Y3,uvx1); \ - B1 = vec_add (Y3,ux1); \ - R = vec_packclp (R0,R1); \ - G = vec_packclp (G0,G1); \ - B = vec_packclp (B0,B1); \ - \ - \ - out_pixels(R,G,B,outo); \ - \ - y1i += 16; \ - y2i += 16; \ - ui += 8; \ - vi += 8; \ - \ - } \ - \ - outo += (outstrides[0])>>4; \ - oute += (outstrides[0])>>4; \ - \ - ui += instrides_scl[1]; \ - vi += instrides_scl[2]; \ - y1i += instrides_scl[0]; \ - y2i += instrides_scl[0]; \ - } \ - return srcSliceH; \ +#define DEFCSP420_CVT(name,out_pixels) \ +static int altivec_##name (SwsContext *c, \ + unsigned char **in, int *instrides, \ + int srcSliceY, int srcSliceH, \ + unsigned char **oplanes, int *outstrides) \ +{ \ + int w = c->srcW; \ + int h = srcSliceH; \ + int i,j; \ + int instrides_scl[3]; \ + vector unsigned char y0,y1; \ + \ + vector signed char u,v; \ + \ + vector signed short Y0,Y1,Y2,Y3; \ + vector signed short U,V; \ + vector signed short vx,ux,uvx; \ + vector signed short vx0,ux0,uvx0; \ + vector signed short vx1,ux1,uvx1; \ + vector signed short R0,G0,B0; \ + vector signed short R1,G1,B1; \ + vector unsigned char R,G,B; \ + \ + vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ + vector unsigned char align_perm; \ + \ + vector signed short \ + lCY = c->CY, \ + lOY = c->OY, \ + lCRV = c->CRV, \ + lCBU = c->CBU, \ + lCGU = c->CGU, \ + lCGV = c->CGV; \ + \ + vector unsigned short lCSHIFT = c->CSHIFT; \ + \ + ubyte *y1i = in[0]; \ + ubyte *y2i = in[0]+instrides[0]; \ + ubyte *ui = in[1]; \ + ubyte *vi = in[2]; \ + \ + vector unsigned char *oute \ + = (vector unsigned char *) \ + (oplanes[0]+srcSliceY*outstrides[0]); \ + vector unsigned char *outo \ + = (vector unsigned char *) \ + (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); \ + \ + \ + instrides_scl[0] = instrides[0]*2-w; /* the loop moves y{1,2}i by w */ \ + instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */ \ + instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */ \ + \ + \ + for (i=0;i>15 */ \ + ux = vec_sl (U, lCSHIFT); \ + ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); \ + ux0 = vec_mergeh (ux,ux); \ + ux1 = vec_mergel (ux,ux); \ + \ + /* vx = (CRV*(v<>15; */ \ + vx = vec_sl (V, lCSHIFT); \ + vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); \ + vx0 = vec_mergeh (vx,vx); \ + vx1 = vec_mergel (vx,vx); \ + \ + /* uvx = ((CGU*u) + (CGV*v))>>15 */ \ + uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0)); \ + uvx = vec_mradds (V, lCGV, uvx); \ + uvx0 = vec_mergeh (uvx,uvx); \ + uvx1 = vec_mergel (uvx,uvx); \ + \ + R0 = vec_add (Y0,vx0); \ + G0 = vec_add (Y0,uvx0); \ + B0 = vec_add (Y0,ux0); \ + R1 = vec_add (Y1,vx1); \ + G1 = vec_add (Y1,uvx1); \ + B1 = vec_add (Y1,ux1); \ + \ + R = vec_packclp (R0,R1); \ + G = vec_packclp (G0,G1); \ + B = vec_packclp (B0,B1); \ + \ + out_pixels(R,G,B,oute); \ + \ + R0 = vec_add (Y2,vx0); \ + G0 = vec_add (Y2,uvx0); \ + B0 = vec_add (Y2,ux0); \ + R1 = vec_add (Y3,vx1); \ + G1 = vec_add (Y3,uvx1); \ + B1 = vec_add (Y3,ux1); \ + R = vec_packclp (R0,R1); \ + G = vec_packclp (G0,G1); \ + B = vec_packclp (B0,B1); \ + \ + \ + out_pixels(R,G,B,outo); \ + \ + y1i += 16; \ + y2i += 16; \ + ui += 8; \ + vi += 8; \ + \ + } \ + \ + outo += (outstrides[0])>>4; \ + oute += (outstrides[0])>>4; \ + \ + ui += instrides_scl[1]; \ + vi += instrides_scl[2]; \ + y1i += instrides_scl[0]; \ + y2i += instrides_scl[0]; \ + } \ + return srcSliceH; \ } @@ -444,150 +448,150 @@ DEFCSP420_CVT (yuv2_abgr, out_abgr) DEFCSP420_CVT (yuv2_bgra, out_bgra) #else static int altivec_yuv2_bgra32 (SwsContext *c, - unsigned char **in, int *instrides, - int srcSliceY, int srcSliceH, - unsigned char **oplanes, int *outstrides) + unsigned char **in, int *instrides, + int srcSliceY, int srcSliceH, + unsigned char **oplanes, int *outstrides) { - int w = c->srcW; - int h = srcSliceH; - int i,j; - int instrides_scl[3]; - vector unsigned char y0,y1; - - vector signed char u,v; - - vector signed short Y0,Y1,Y2,Y3; - vector signed short U,V; - vector signed short vx,ux,uvx; - vector signed short vx0,ux0,uvx0; - vector signed short vx1,ux1,uvx1; - vector signed short R0,G0,B0; - vector signed short R1,G1,B1; - vector unsigned char R,G,B; - - vector unsigned char *uivP, *vivP; - vector unsigned char align_perm; - - vector signed short - lCY = c->CY, - lOY = c->OY, - lCRV = c->CRV, - lCBU = c->CBU, - lCGU = c->CGU, - lCGV = c->CGV; - - vector unsigned short lCSHIFT = c->CSHIFT; - - ubyte *y1i = in[0]; - ubyte *y2i = in[0]+w; - ubyte *ui = in[1]; - ubyte *vi = in[2]; - - vector unsigned char *oute - = (vector unsigned char *) - (oplanes[0]+srcSliceY*outstrides[0]); - vector unsigned char *outo - = (vector unsigned char *) - (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); - - - instrides_scl[0] = instrides[0]; - instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */ - instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */ - - - for (i=0;i>15 */ - ux = vec_sl (U, lCSHIFT); - ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); - ux0 = vec_mergeh (ux,ux); - ux1 = vec_mergel (ux,ux); - - /* vx = (CRV*(v<>15; */ - vx = vec_sl (V, lCSHIFT); - vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); - vx0 = vec_mergeh (vx,vx); - vx1 = vec_mergel (vx,vx); - /* uvx = ((CGU*u) + (CGV*v))>>15 */ - uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0)); - uvx = vec_mradds (V, lCGV, uvx); - uvx0 = vec_mergeh (uvx,uvx); - uvx1 = vec_mergel (uvx,uvx); - R0 = vec_add (Y0,vx0); - G0 = vec_add (Y0,uvx0); - B0 = vec_add (Y0,ux0); - R1 = vec_add (Y1,vx1); - G1 = vec_add (Y1,uvx1); - B1 = vec_add (Y1,ux1); - R = vec_packclp (R0,R1); - G = vec_packclp (G0,G1); - B = vec_packclp (B0,B1); - - out_argb(R,G,B,oute); - R0 = vec_add (Y2,vx0); - G0 = vec_add (Y2,uvx0); - B0 = vec_add (Y2,ux0); - R1 = vec_add (Y3,vx1); - G1 = vec_add (Y3,uvx1); - B1 = vec_add (Y3,ux1); - R = vec_packclp (R0,R1); - G = vec_packclp (G0,G1); - B = vec_packclp (B0,B1); - - out_argb(R,G,B,outo); - y1i += 16; - y2i += 16; - ui += 8; - vi += 8; + int w = c->srcW; + int h = srcSliceH; + int i,j; + int instrides_scl[3]; + vector unsigned char y0,y1; + + vector signed char u,v; + + vector signed short Y0,Y1,Y2,Y3; + vector signed short U,V; + vector signed short vx,ux,uvx; + vector signed short vx0,ux0,uvx0; + vector signed short vx1,ux1,uvx1; + vector signed short R0,G0,B0; + vector signed short R1,G1,B1; + vector unsigned char R,G,B; + + vector unsigned char *uivP, *vivP; + vector unsigned char align_perm; + + vector signed short + lCY = c->CY, + lOY = c->OY, + lCRV = c->CRV, + lCBU = c->CBU, + lCGU = c->CGU, + lCGV = c->CGV; + + vector unsigned short lCSHIFT = c->CSHIFT; + + ubyte *y1i = in[0]; + ubyte *y2i = in[0]+w; + ubyte *ui = in[1]; + ubyte *vi = in[2]; + + vector unsigned char *oute + = (vector unsigned char *) + (oplanes[0]+srcSliceY*outstrides[0]); + vector unsigned char *outo + = (vector unsigned char *) + (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); + + + instrides_scl[0] = instrides[0]; + instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */ + instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */ + + + for (i=0;i>15 */ + ux = vec_sl (U, lCSHIFT); + ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); + ux0 = vec_mergeh (ux,ux); + ux1 = vec_mergel (ux,ux); + + /* vx = (CRV*(v<>15; */ + vx = vec_sl (V, lCSHIFT); + vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); + vx0 = vec_mergeh (vx,vx); + vx1 = vec_mergel (vx,vx); + /* uvx = ((CGU*u) + (CGV*v))>>15 */ + uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0)); + uvx = vec_mradds (V, lCGV, uvx); + uvx0 = vec_mergeh (uvx,uvx); + uvx1 = vec_mergel (uvx,uvx); + R0 = vec_add (Y0,vx0); + G0 = vec_add (Y0,uvx0); + B0 = vec_add (Y0,ux0); + R1 = vec_add (Y1,vx1); + G1 = vec_add (Y1,uvx1); + B1 = vec_add (Y1,ux1); + R = vec_packclp (R0,R1); + G = vec_packclp (G0,G1); + B = vec_packclp (B0,B1); + + out_argb(R,G,B,oute); + R0 = vec_add (Y2,vx0); + G0 = vec_add (Y2,uvx0); + B0 = vec_add (Y2,ux0); + R1 = vec_add (Y3,vx1); + G1 = vec_add (Y3,uvx1); + B1 = vec_add (Y3,ux1); + R = vec_packclp (R0,R1); + G = vec_packclp (G0,G1); + B = vec_packclp (B0,B1); + + out_argb(R,G,B,outo); + y1i += 16; + y2i += 16; + ui += 8; + vi += 8; - } + } - outo += (outstrides[0])>>4; - oute += (outstrides[0])>>4; + outo += (outstrides[0])>>4; + oute += (outstrides[0])>>4; - ui += instrides_scl[1]; - vi += instrides_scl[2]; - y1i += instrides_scl[0]; - y2i += instrides_scl[0]; - } - return srcSliceH; + ui += instrides_scl[1]; + vi += instrides_scl[2]; + y1i += instrides_scl[0]; + y2i += instrides_scl[0]; + } + return srcSliceH; } #endif @@ -603,77 +607,77 @@ DEFCSP420_CVT (yuv2_bgr24, out_bgr24) // 0123 4567 89ab cdef static const vector