diff options
Diffstat (limited to 'postproc/swscale.c')
-rw-r--r-- | postproc/swscale.c | 2707 |
1 files changed, 0 insertions, 2707 deletions
diff --git a/postproc/swscale.c b/postproc/swscale.c deleted file mode 100644 index 11f774d99a..0000000000 --- a/postproc/swscale.c +++ /dev/null @@ -1,2707 +0,0 @@ -/* - Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -/* - supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09 - supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09 - {BGR,RGB}{1,4,8,15,16} support dithering - - unscaled special converters (YV12=I420=IYUV, Y800=Y8) - YV12 -> {BGR,RGB}{1,4,8,15,16,24,32} - x -> x - YUV9 -> YV12 - YUV9/YV12 -> Y800 - Y800 -> YUV9/YV12 - BGR24 -> BGR32 & RGB24 -> RGB32 - BGR32 -> BGR24 & RGB32 -> RGB24 - BGR15 -> BGR16 -*/ - -/* -tested special converters (most are tested actually but i didnt write it down ...) - YV12 -> BGR16 - YV12 -> YV12 - BGR15 -> BGR16 - BGR16 -> BGR16 - YVU9 -> YV12 - -untested special converters - YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok) - YV12/I420 -> YV12/I420 - YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format - BGR24 -> BGR32 & RGB24 -> RGB32 - BGR32 -> BGR24 & RGB32 -> RGB24 - BGR24 -> YV12 -*/ - -#include <inttypes.h> -#include <string.h> -#include <math.h> -#include <stdio.h> -#include <unistd.h> -#include "config.h" -#include "mangle.h" -#include <assert.h> -#ifdef HAVE_MALLOC_H -#include <malloc.h> -#else -#include <stdlib.h> -#endif -#ifdef HAVE_SYS_MMAN_H -#include <sys/mman.h> -#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS) -#define MAP_ANONYMOUS MAP_ANON -#endif -#endif -#include "swscale.h" -#include "swscale_internal.h" -#include "cpudetect.h" -#include "bswap.h" -#include "libvo/img_format.h" -#include "rgb2rgb.h" -#include "libvo/fastmemcpy.h" - -#undef MOVNTQ -#undef PAVGB - -//#undef HAVE_MMX2 -//#define HAVE_3DNOW -//#undef HAVE_MMX -//#undef ARCH_X86 -//#define WORDS_BIGENDIAN -#define DITHER1XBPP - -#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit - -#define RET 0xC3 //near return opcode for X86 - -#ifdef MP_DEBUG -#define ASSERT(x) assert(x); -#else -#define ASSERT(x) ; -#endif - -#ifdef M_PI -#define PI M_PI -#else -#define PI 3.14159265358979323846 -#endif - -//FIXME replace this with something faster -#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \ - || (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21 \ - || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P) -#define isYUV(x) ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || isPlanarYUV(x)) -#define isGray(x) ((x)==IMGFMT_Y800) -#define isRGB(x) (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB) -#define isBGR(x) (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR) -#define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\ - || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\ - || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\ - || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\ - || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P) -#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\ - || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\ - || isRGB(x) || isBGR(x)\ - || (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21\ - || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9) -#define isPacked(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||isRGB(x) || isBGR(x)) - -#define RGB2YUV_SHIFT 16 -#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) -#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5)) -#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) -#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5)) -#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5)) -#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5)) -#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5)) -#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) -#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) - -extern const int32_t Inverse_Table_6_9[8][4]; - -/* -NOTES -Special versions: fast Y 1:1 scaling (no interpolation in y direction) - -TODO -more intelligent missalignment avoidance for the horizontal scaler -write special vertical cubic upscale version -Optimize C code (yv12 / minmax) -add support for packed pixel yuv input & output -add support for Y8 output -optimize bgr24 & bgr32 -add BGR4 output support -write special BGR->BGR scaler -*/ - -#define ABS(a) ((a) > 0 ? (a) : (-(a))) -#define MIN(a,b) ((a) > (b) ? (b) : (a)) -#define MAX(a,b) ((a) < (b) ? (b) : (a)) - -#if defined(ARCH_X86) || defined(ARCH_X86_64) -static uint64_t attribute_used __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL; -static uint64_t attribute_used __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL; -static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL; -static uint64_t attribute_used __attribute__((aligned(8))) w02= 0x0002000200020002LL; -static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL; -static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL; -static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL; -static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL; - -static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither; -static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither; -static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither; -static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither; - -static uint64_t __attribute__((aligned(8))) dither4[2]={ - 0x0103010301030103LL, - 0x0200020002000200LL,}; - -static uint64_t __attribute__((aligned(8))) dither8[2]={ - 0x0602060206020602LL, - 0x0004000400040004LL,}; - -static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL; -static uint64_t attribute_used __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL; -static uint64_t attribute_used __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL; -static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL; -static uint64_t attribute_used __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL; -static uint64_t attribute_used __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL; - -static uint64_t attribute_used __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; -static uint64_t attribute_used __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; -static uint64_t attribute_used __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; - -#ifdef FAST_BGR2YV12 -static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL; -static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL; -static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL; -#else -static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL; -static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL; -static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL; -#endif -static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL; -static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL; -static const uint64_t w1111 attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL; -#endif - -// clipping helper table for C implementations: -static unsigned char clip_table[768]; - -static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b); - -extern const uint8_t dither_2x2_4[2][8]; -extern const uint8_t dither_2x2_8[2][8]; -extern const uint8_t dither_8x8_32[8][8]; -extern const uint8_t dither_8x8_73[8][8]; -extern const uint8_t dither_8x8_220[8][8]; - -#if defined(ARCH_X86) || defined(ARCH_X86_64) -void in_asm_used_var_warning_killer() -{ - volatile int i= bF8+bFC+w10+ - bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+ - M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101; - if(i) i=0; -} -#endif - -static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, - int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) -{ - //FIXME Optimize (just quickly writen not opti..) - int i; - for(i=0; i<dstW; i++) - { - int val=1<<18; - int j; - for(j=0; j<lumFilterSize; j++) - val += lumSrc[j][i] * lumFilter[j]; - - dest[i]= MIN(MAX(val>>19, 0), 255); - } - - if(uDest != NULL) - for(i=0; i<chrDstW; i++) - { - int u=1<<18; - int v=1<<18; - int j; - for(j=0; j<chrFilterSize; j++) - { - u += chrSrc[j][i] * chrFilter[j]; - v += chrSrc[j][i + 2048] * chrFilter[j]; - } - - uDest[i]= MIN(MAX(u>>19, 0), 255); - vDest[i]= MIN(MAX(v>>19, 0), 255); - } -} - -static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, - int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat) -{ - //FIXME Optimize (just quickly writen not opti..) - int i; - for(i=0; i<dstW; i++) - { - int val=1<<18; - int j; - for(j=0; j<lumFilterSize; j++) - val += lumSrc[j][i] * lumFilter[j]; - - dest[i]= MIN(MAX(val>>19, 0), 255); - } - - if(uDest == NULL) - return; - - if(dstFormat == IMGFMT_NV12) - for(i=0; i<chrDstW; i++) - { - int u=1<<18; - int v=1<<18; - int j; - for(j=0; j<chrFilterSize; j++) - { - u += chrSrc[j][i] * chrFilter[j]; - v += chrSrc[j][i + 2048] * chrFilter[j]; - } - - uDest[2*i]= MIN(MAX(u>>19, 0), 255); - uDest[2*i+1]= MIN(MAX(v>>19, 0), 255); - } - else - for(i=0; i<chrDstW; i++) - { - int u=1<<18; - int v=1<<18; - int j; - for(j=0; j<chrFilterSize; j++) - { - u += chrSrc[j][i] * chrFilter[j]; - v += chrSrc[j][i + 2048] * chrFilter[j]; - } - - uDest[2*i]= MIN(MAX(v>>19, 0), 255); - uDest[2*i+1]= MIN(MAX(u>>19, 0), 255); - } -} - -#define YSCALE_YUV_2_PACKEDX_C(type) \ - for(i=0; i<(dstW>>1); i++){\ - int j;\ - int Y1=1<<18;\ - int Y2=1<<18;\ - int U=1<<18;\ - int V=1<<18;\ - type *r, *b, *g;\ - const int i2= 2*i;\ - \ - for(j=0; j<lumFilterSize; j++)\ - {\ - Y1 += lumSrc[j][i2] * lumFilter[j];\ - Y2 += lumSrc[j][i2+1] * lumFilter[j];\ - }\ - for(j=0; j<chrFilterSize; j++)\ - {\ - U += chrSrc[j][i] * chrFilter[j];\ - V += chrSrc[j][i+2048] * chrFilter[j];\ - }\ - Y1>>=19;\ - Y2>>=19;\ - U >>=19;\ - V >>=19;\ - if((Y1|Y2|U|V)&256)\ - {\ - if(Y1>255) Y1=255;\ - else if(Y1<0)Y1=0;\ - if(Y2>255) Y2=255;\ - else if(Y2<0)Y2=0;\ - if(U>255) U=255;\ - else if(U<0) U=0;\ - if(V>255) V=255;\ - else if(V<0) V=0;\ - } - -#define YSCALE_YUV_2_RGBX_C(type) \ - YSCALE_YUV_2_PACKEDX_C(type)\ - r = c->table_rV[V];\ - g = c->table_gU[U] + c->table_gV[V];\ - b = c->table_bU[U];\ - -#define YSCALE_YUV_2_PACKED2_C \ - for(i=0; i<(dstW>>1); i++){\ - const int i2= 2*i;\ - int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19;\ - int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\ - int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19;\ - int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\ - -#define YSCALE_YUV_2_RGB2_C(type) \ - YSCALE_YUV_2_PACKED2_C\ - type *r, *b, *g;\ - r = c->table_rV[V];\ - g = c->table_gU[U] + c->table_gV[V];\ - b = c->table_bU[U];\ - -#define YSCALE_YUV_2_PACKED1_C \ - for(i=0; i<(dstW>>1); i++){\ - const int i2= 2*i;\ - int Y1= buf0[i2 ]>>7;\ - int Y2= buf0[i2+1]>>7;\ - int U= (uvbuf1[i ])>>7;\ - int V= (uvbuf1[i+2048])>>7;\ - -#define YSCALE_YUV_2_RGB1_C(type) \ - YSCALE_YUV_2_PACKED1_C\ - type *r, *b, *g;\ - r = c->table_rV[V];\ - g = c->table_gU[U] + c->table_gV[V];\ - b = c->table_bU[U];\ - -#define YSCALE_YUV_2_PACKED1B_C \ - for(i=0; i<(dstW>>1); i++){\ - const int i2= 2*i;\ - int Y1= buf0[i2 ]>>7;\ - int Y2= buf0[i2+1]>>7;\ - int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\ - int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\ - -#define YSCALE_YUV_2_RGB1B_C(type) \ - YSCALE_YUV_2_PACKED1B_C\ - type *r, *b, *g;\ - r = c->table_rV[V];\ - g = c->table_gU[U] + c->table_gV[V];\ - b = c->table_bU[U];\ - -#define YSCALE_YUV_2_ANYRGB_C(func, func2)\ - switch(c->dstFormat)\ - {\ - case IMGFMT_BGR32:\ - case IMGFMT_RGB32:\ - func(uint32_t)\ - ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\ - ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\ - } \ - break;\ - case IMGFMT_RGB24:\ - func(uint8_t)\ - ((uint8_t*)dest)[0]= r[Y1];\ - ((uint8_t*)dest)[1]= g[Y1];\ - ((uint8_t*)dest)[2]= b[Y1];\ - ((uint8_t*)dest)[3]= r[Y2];\ - ((uint8_t*)dest)[4]= g[Y2];\ - ((uint8_t*)dest)[5]= b[Y2];\ - dest+=6;\ - }\ - break;\ - case IMGFMT_BGR24:\ - func(uint8_t)\ - ((uint8_t*)dest)[0]= b[Y1];\ - ((uint8_t*)dest)[1]= g[Y1];\ - ((uint8_t*)dest)[2]= r[Y1];\ - ((uint8_t*)dest)[3]= b[Y2];\ - ((uint8_t*)dest)[4]= g[Y2];\ - ((uint8_t*)dest)[5]= r[Y2];\ - dest+=6;\ - }\ - break;\ - case IMGFMT_RGB16:\ - case IMGFMT_BGR16:\ - {\ - const int dr1= dither_2x2_8[y&1 ][0];\ - const int dg1= dither_2x2_4[y&1 ][0];\ - const int db1= dither_2x2_8[(y&1)^1][0];\ - const int dr2= dither_2x2_8[y&1 ][1];\ - const int dg2= dither_2x2_4[y&1 ][1];\ - const int db2= dither_2x2_8[(y&1)^1][1];\ - func(uint16_t)\ - ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\ - ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\ - }\ - }\ - break;\ - case IMGFMT_RGB15:\ - case IMGFMT_BGR15:\ - {\ - const int dr1= dither_2x2_8[y&1 ][0];\ - const int dg1= dither_2x2_8[y&1 ][1];\ - const int db1= dither_2x2_8[(y&1)^1][0];\ - const int dr2= dither_2x2_8[y&1 ][1];\ - const int dg2= dither_2x2_8[y&1 ][0];\ - const int db2= dither_2x2_8[(y&1)^1][1];\ - func(uint16_t)\ - ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\ - ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\ - }\ - }\ - break;\ - case IMGFMT_RGB8:\ - case IMGFMT_BGR8:\ - {\ - const uint8_t * const d64= dither_8x8_73[y&7];\ - const uint8_t * const d32= dither_8x8_32[y&7];\ - func(uint8_t)\ - ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\ - ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\ - }\ - }\ - break;\ - case IMGFMT_RGB4:\ - case IMGFMT_BGR4:\ - {\ - const uint8_t * const d64= dither_8x8_73 [y&7];\ - const uint8_t * const d128=dither_8x8_220[y&7];\ - func(uint8_t)\ - ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\ - + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\ - }\ - }\ - break;\ - case IMGFMT_RG4B:\ - case IMGFMT_BG4B:\ - {\ - const uint8_t * const d64= dither_8x8_73 [y&7];\ - const uint8_t * const d128=dither_8x8_220[y&7];\ - func(uint8_t)\ - ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\ - ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\ - }\ - }\ - break;\ - case IMGFMT_RGB1:\ - case IMGFMT_BGR1:\ - {\ - const uint8_t * const d128=dither_8x8_220[y&7];\ - uint8_t *g= c->table_gU[128] + c->table_gV[128];\ - for(i=0; i<dstW-7; i+=8){\ - int acc;\ - acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\ - acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\ - acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\ - acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\ - acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\ - acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\ - acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\ - acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\ - ((uint8_t*)dest)[0]= acc;\ - dest++;\ - }\ -\ -/*\ -((uint8_t*)dest)-= dstW>>4;\ -{\ - int acc=0;\ - int left=0;\ - static int top[1024];\ - static int last_new[1024][1024];\ - static int last_in3[1024][1024];\ - static int drift[1024][1024];\ - int topLeft=0;\ - int shift=0;\ - int count=0;\ - const uint8_t * const d128=dither_8x8_220[y&7];\ - int error_new=0;\ - int error_in3=0;\ - int f=0;\ - \ - for(i=dstW>>1; i<dstW; i++){\ - int in= ((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19);\ - int in2 = (76309 * (in - 16) + 32768) >> 16;\ - int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\ - int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\ - + (last_new[y][i] - in3)*f/256;\ - int new= old> 128 ? 255 : 0;\ -\ - error_new+= ABS(last_new[y][i] - new);\ - error_in3+= ABS(last_in3[y][i] - in3);\ - f= error_new - error_in3*4;\ - if(f<0) f=0;\ - if(f>256) f=256;\ -\ - topLeft= top[i];\ - left= top[i]= old - new;\ - last_new[y][i]= new;\ - last_in3[y][i]= in3;\ -\ - acc+= acc + (new&1);\ - if((i&7)==6){\ - ((uint8_t*)dest)[0]= acc;\ - ((uint8_t*)dest)++;\ - }\ - }\ -}\ -*/\ - }\ - break;\ - case IMGFMT_YUY2:\ - func2\ - ((uint8_t*)dest)[2*i2+0]= Y1;\ - ((uint8_t*)dest)[2*i2+1]= U;\ - ((uint8_t*)dest)[2*i2+2]= Y2;\ - ((uint8_t*)dest)[2*i2+3]= V;\ - } \ - break;\ - case IMGFMT_UYVY:\ - func2\ - ((uint8_t*)dest)[2*i2+0]= U;\ - ((uint8_t*)dest)[2*i2+1]= Y1;\ - ((uint8_t*)dest)[2*i2+2]= V;\ - ((uint8_t*)dest)[2*i2+3]= Y2;\ - } \ - break;\ - }\ - - -static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, - int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, int dstW, int y) -{ - int i; - switch(c->dstFormat) - { - case IMGFMT_RGB32: - case IMGFMT_BGR32: - YSCALE_YUV_2_RGBX_C(uint32_t) - ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1]; - ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2]; - } - break; - case IMGFMT_RGB24: - YSCALE_YUV_2_RGBX_C(uint8_t) - ((uint8_t*)dest)[0]= r[Y1]; - ((uint8_t*)dest)[1]= g[Y1]; - ((uint8_t*)dest)[2]= b[Y1]; - ((uint8_t*)dest)[3]= r[Y2]; - ((uint8_t*)dest)[4]= g[Y2]; - ((uint8_t*)dest)[5]= b[Y2]; - dest+=6; - } - break; - case IMGFMT_BGR24: - YSCALE_YUV_2_RGBX_C(uint8_t) - ((uint8_t*)dest)[0]= b[Y1]; - ((uint8_t*)dest)[1]= g[Y1]; - ((uint8_t*)dest)[2]= r[Y1]; - ((uint8_t*)dest)[3]= b[Y2]; - ((uint8_t*)dest)[4]= g[Y2]; - ((uint8_t*)dest)[5]= r[Y2]; - dest+=6; - } - break; - case IMGFMT_RGB16: - case IMGFMT_BGR16: - { - const int dr1= dither_2x2_8[y&1 ][0]; - const int dg1= dither_2x2_4[y&1 ][0]; - const int db1= dither_2x2_8[(y&1)^1][0]; - const int dr2= dither_2x2_8[y&1 ][1]; - const int dg2= dither_2x2_4[y&1 ][1]; - const int db2= dither_2x2_8[(y&1)^1][1]; - YSCALE_YUV_2_RGBX_C(uint16_t) - ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1]; - ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2]; - } - } - break; - case IMGFMT_RGB15: - case IMGFMT_BGR15: - { - const int dr1= dither_2x2_8[y&1 ][0]; - const int dg1= dither_2x2_8[y&1 ][1]; - const int db1= dither_2x2_8[(y&1)^1][0]; - const int dr2= dither_2x2_8[y&1 ][1]; - const int dg2= dither_2x2_8[y&1 ][0]; - const int db2= dither_2x2_8[(y&1)^1][1]; - YSCALE_YUV_2_RGBX_C(uint16_t) - ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1]; - ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2]; - } - } - break; - case IMGFMT_RGB8: - case IMGFMT_BGR8: - { - const uint8_t * const d64= dither_8x8_73[y&7]; - const uint8_t * const d32= dither_8x8_32[y&7]; - YSCALE_YUV_2_RGBX_C(uint8_t) - ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]]; - ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]]; - } - } - break; - case IMGFMT_RGB4: - case IMGFMT_BGR4: - { - const uint8_t * const d64= dither_8x8_73 [y&7]; - const uint8_t * const d128=dither_8x8_220[y&7]; - YSCALE_YUV_2_RGBX_C(uint8_t) - ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]] - +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4); - } - } - break; - case IMGFMT_RG4B: - case IMGFMT_BG4B: - { - const uint8_t * const d64= dither_8x8_73 [y&7]; - const uint8_t * const d128=dither_8x8_220[y&7]; - YSCALE_YUV_2_RGBX_C(uint8_t) - ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]; - ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]]; - } - } - break; - case IMGFMT_RGB1: - case IMGFMT_BGR1: - { - const uint8_t * const d128=dither_8x8_220[y&7]; - uint8_t *g= c->table_gU[128] + c->table_gV[128]; - int acc=0; - for(i=0; i<dstW-1; i+=2){ - int j; - int Y1=1<<18; - int Y2=1<<18; - - for(j=0; j<lumFilterSize; j++) - { - Y1 += lumSrc[j][i] * lumFilter[j]; - Y2 += lumSrc[j][i+1] * lumFilter[j]; - } - Y1>>=19; - Y2>>=19; - if((Y1|Y2)&256) - { - if(Y1>255) Y1=255; - else if(Y1<0)Y1=0; - if(Y2>255) Y2=255; - else if(Y2<0)Y2=0; - } - acc+= acc + g[Y1+d128[(i+0)&7]]; - acc+= acc + g[Y2+d128[(i+1)&7]]; - if((i&7)==6){ - ((uint8_t*)dest)[0]= acc; - dest++; - } - } - } - break; - case IMGFMT_YUY2: - YSCALE_YUV_2_PACKEDX_C(void) - ((uint8_t*)dest)[2*i2+0]= Y1; - ((uint8_t*)dest)[2*i2+1]= U; - ((uint8_t*)dest)[2*i2+2]= Y2; - ((uint8_t*)dest)[2*i2+3]= V; - } - break; - case IMGFMT_UYVY: - YSCALE_YUV_2_PACKEDX_C(void) - ((uint8_t*)dest)[2*i2+0]= U; - ((uint8_t*)dest)[2*i2+1]= Y1; - ((uint8_t*)dest)[2*i2+2]= V; - ((uint8_t*)dest)[2*i2+3]= Y2; - } - break; - } -} - - -//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one -//Plain C versions -#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) -#define COMPILE_C -#endif - -#ifdef ARCH_POWERPC -#if defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT) -#define COMPILE_ALTIVEC -#endif //HAVE_ALTIVEC -#endif //ARCH_POWERPC - -#if defined(ARCH_X86) || defined(ARCH_X86_64) - -#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) -#define COMPILE_MMX -#endif - -#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT) -#define COMPILE_MMX2 -#endif - -#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) -#define COMPILE_3DNOW -#endif -#endif //ARCH_X86 || ARCH_X86_64 - -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_3DNOW - -#ifdef COMPILE_C -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_3DNOW -#undef HAVE_ALTIVEC -#define RENAME(a) a ## _C -#include "swscale_template.c" -#endif - -#ifdef ARCH_POWERPC -#ifdef COMPILE_ALTIVEC -#undef RENAME -#define HAVE_ALTIVEC -#define RENAME(a) a ## _altivec -#include "swscale_template.c" -#endif -#endif //ARCH_POWERPC - -#if defined(ARCH_X86) || defined(ARCH_X86_64) - -//X86 versions -/* -#undef RENAME -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_3DNOW -#define ARCH_X86 -#define RENAME(a) a ## _X86 -#include "swscale_template.c" -*/ -//MMX versions -#ifdef COMPILE_MMX -#undef RENAME -#define HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_3DNOW -#define RENAME(a) a ## _MMX -#include "swscale_template.c" -#endif - -//MMX2 versions -#ifdef COMPILE_MMX2 -#undef RENAME -#define HAVE_MMX -#define HAVE_MMX2 -#undef HAVE_3DNOW -#define RENAME(a) a ## _MMX2 -#include "swscale_template.c" -#endif - -//3DNOW versions -#ifdef COMPILE_3DNOW -#undef RENAME -#define HAVE_MMX -#undef HAVE_MMX2 -#define HAVE_3DNOW -#define RENAME(a) a ## _3DNow -#include "swscale_template.c" -#endif - -#endif //ARCH_X86 || ARCH_X86_64 - -// minor note: the HAVE_xyz is messed up after that line so don't use it - -static double getSplineCoeff(double a, double b, double c, double d, double dist) -{ -// printf("%f %f %f %f %f\n", a,b,c,d,dist); - if(dist<=1.0) return ((d*dist + c)*dist + b)*dist +a; - else return getSplineCoeff( 0.0, - b+ 2.0*c + 3.0*d, - c + 3.0*d, - -b- 3.0*c - 6.0*d, - dist-1.0); -} - -static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, - int srcW, int dstW, int filterAlign, int one, int flags, - SwsVector *srcFilter, SwsVector *dstFilter, double param[2]) -{ - int i; - int filterSize; - int filter2Size; - int minFilterSize; - double *filter=NULL; - double *filter2=NULL; -#if defined(ARCH_X86) || defined(ARCH_X86_64) - if(flags & SWS_CPU_CAPS_MMX) - asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) -#endif - - // Note the +1 is for the MMXscaler which reads over the end - *filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t)); - - if(ABS(xInc - 0x10000) <10) // unscaled - { - int i; - filterSize= 1; - filter= (double*)memalign(8, dstW*sizeof(double)*filterSize); - for(i=0; i<dstW*filterSize; i++) filter[i]=0; - - for(i=0; i<dstW; i++) - { - filter[i*filterSize]=1; - (*filterPos)[i]=i; - } - - } - else if(flags&SWS_POINT) // lame looking point sampling mode - { - int i; - int xDstInSrc; - filterSize= 1; - filter= (double*)memalign(8, dstW*sizeof(double)*filterSize); - - xDstInSrc= xInc/2 - 0x8000; - for(i=0; i<dstW; i++) - { - int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16; - - (*filterPos)[i]= xx; - filter[i]= 1.0; - xDstInSrc+= xInc; - } - } - else if((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale - { - int i; - int xDstInSrc; - if (flags&SWS_BICUBIC) filterSize= 4; - else if(flags&SWS_X ) filterSize= 4; - else filterSize= 2; // SWS_BILINEAR / SWS_AREA - filter= (double*)memalign(8, dstW*sizeof(double)*filterSize); - - xDstInSrc= xInc/2 - 0x8000; - for(i=0; i<dstW; i++) - { - int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16; - int j; - - (*filterPos)[i]= xx; - //Bilinear upscale / linear interpolate / Area averaging - for(j=0; j<filterSize; j++) - { - double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16); - double coeff= 1.0 - d; - if(coeff<0) coeff=0; - filter[i*filterSize + j]= coeff; - xx++; - } - xDstInSrc+= xInc; - } - } - else - { - double xDstInSrc; - double sizeFactor, filterSizeInSrc; - const double xInc1= (double)xInc / (double)(1<<16); - - if (flags&SWS_BICUBIC) sizeFactor= 4.0; - else if(flags&SWS_X) sizeFactor= 8.0; - else if(flags&SWS_AREA) sizeFactor= 1.0; //downscale only, for upscale it is bilinear - else if(flags&SWS_GAUSS) sizeFactor= 8.0; // infinite ;) - else if(flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0; - else if(flags&SWS_SINC) sizeFactor= 20.0; // infinite ;) - else if(flags&SWS_SPLINE) sizeFactor= 20.0; // infinite ;) - else if(flags&SWS_BILINEAR) sizeFactor= 2.0; - else { - sizeFactor= 0.0; //GCC warning killer - ASSERT(0) - } - - if(xInc1 <= 1.0) filterSizeInSrc= sizeFactor; // upscale - else filterSizeInSrc= sizeFactor*srcW / (double)dstW; - - filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible - if(filterSize > srcW-2) filterSize=srcW-2; - - filter= (double*)memalign(16, dstW*sizeof(double)*filterSize); - - xDstInSrc= xInc1 / 2.0 - 0.5; - for(i=0; i<dstW; i++) - { - int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5); - int j; - (*filterPos)[i]= xx; - for(j=0; j<filterSize; j++) - { - double d= ABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor; - double coeff; - if(flags & SWS_BICUBIC) - { - double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0; - double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6; - - if(d<1.0) - coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B; - else if(d<2.0) - coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C; - else - coeff=0.0; - } -/* else if(flags & SWS_X) - { - double p= param ? param*0.01 : 0.3; - coeff = d ? sin(d*PI)/(d*PI) : 1.0; - coeff*= pow(2.0, - p*d*d); - }*/ - else if(flags & SWS_X) - { - double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0; - - if(d<1.0) - coeff = cos(d*PI); - else - coeff=-1.0; - if(coeff<0.0) coeff= -pow(-coeff, A); - else coeff= pow( coeff, A); - coeff= coeff*0.5 + 0.5; - } - else if(flags & SWS_AREA) - { - double srcPixelSize= 1.0/xInc1; - if(d + srcPixelSize/2 < 0.5) coeff= 1.0; - else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5; - else coeff=0.0; - } - else if(flags & SWS_GAUSS) - { - double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; - coeff = pow(2.0, - p*d*d); - } - else if(flags & SWS_SINC) - { - coeff = d ? sin(d*PI)/(d*PI) : 1.0; - } - else if(flags & SWS_LANCZOS) - { - double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; - coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0; - if(d>p) coeff=0; - } - else if(flags & SWS_BILINEAR) - { - coeff= 1.0 - d; - if(coeff<0) coeff=0; - } - else if(flags & SWS_SPLINE) - { - double p=-2.196152422706632; - coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d); - } - else { - coeff= 0.0; //GCC warning killer - ASSERT(0) - } - - filter[i*filterSize + j]= coeff; - xx++; - } - xDstInSrc+= xInc1; - } - } - - /* apply src & dst Filter to filter -> filter2 - free(filter); - */ - ASSERT(filterSize>0) - filter2Size= filterSize; - if(srcFilter) filter2Size+= srcFilter->length - 1; - if(dstFilter) filter2Size+= dstFilter->length - 1; - ASSERT(filter2Size>0) - filter2= (double*)memalign(8, filter2Size*dstW*sizeof(double)); - - for(i=0; i<dstW; i++) - { - int j; - SwsVector scaleFilter; - SwsVector *outVec; - - scaleFilter.coeff= filter + i*filterSize; - scaleFilter.length= filterSize; - - if(srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter); - else outVec= &scaleFilter; - - ASSERT(outVec->length == filter2Size) - //FIXME dstFilter - - for(j=0; j<outVec->length; j++) - { - filter2[i*filter2Size + j]= outVec->coeff[j]; - } - - (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2; - - if(outVec != &scaleFilter) sws_freeVec(outVec); - } - free(filter); filter=NULL; - - /* try to reduce the filter-size (step1 find size and shift left) */ - // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not) - minFilterSize= 0; - for(i=dstW-1; i>=0; i--) - { - int min= filter2Size; - int j; - double cutOff=0.0; - - /* get rid off near zero elements on the left by shifting left */ - for(j=0; j<filter2Size; j++) - { - int k; - cutOff += ABS(filter2[i*filter2Size]); - - if(cutOff > SWS_MAX_REDUCE_CUTOFF) break; - - /* preserve Monotonicity because the core can't handle the filter otherwise */ - if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break; - - // Move filter coeffs left - for(k=1; k<filter2Size; k++) - filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k]; - filter2[i*filter2Size + k - 1]= 0.0; - (*filterPos)[i]++; - } - - cutOff=0.0; - /* count near zeros on the right */ |