diff options
Diffstat (limited to 'libswscale/swscale_template.c')
-rw-r--r-- | libswscale/swscale_template.c | 147 |
1 files changed, 78 insertions, 69 deletions
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c index ecf5c10131..4562866b7b 100644 --- a/libswscale/swscale_template.c +++ b/libswscale/swscale_template.c @@ -26,15 +26,6 @@ #undef PAVGB #undef PREFETCH #undef PREFETCHW -#undef EMMS -#undef SFENCE - -#if HAVE_AMD3DNOW -/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ -#define EMMS "femms" -#else -#define EMMS "emms" -#endif #if HAVE_AMD3DNOW #define PREFETCH "prefetch" @@ -48,12 +39,6 @@ #endif #if HAVE_MMX2 -#define SFENCE "sfence" -#else -#define SFENCE " # nop" -#endif - -#if HAVE_MMX2 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" #elif HAVE_AMD3DNOW #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" @@ -2258,13 +2243,34 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW #endif /* HAVE_ALTIVEC */ #endif /* HAVE_MMX */ } + +static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, + int dstWidth, uint8_t *src, int srcW, + int xInc) +{ + int i; + unsigned int xpos=0; + for (i=0;i<dstWidth;i++) + { + register unsigned int xx=xpos>>16; + register unsigned int xalpha=(xpos&0xFFFF)>>9; + dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; + xpos+=xInc; + } +} + // *** horizontal scale Y line to temp buffer static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc, - int flags, int canMMX2BeUsed, int16_t *hLumFilter, - int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode, - int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, - int32_t *mmx2FilterPos, uint32_t *pal, int isAlpha) + int flags, int16_t *hLumFilter, + int16_t *hLumFilterPos, int hLumFilterSize, + int srcFormat, uint8_t *formatConvBuffer, + uint32_t *pal, int isAlpha) { + int32_t *mmx2FilterPos = c->lumMmx2FilterPos; + int16_t *mmx2Filter = c->lumMmx2Filter; + int canMMX2BeUsed = c->canMMX2BeUsed; + void *funnyYCode = c->funnyYCode; + if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE) { RENAME(yuy2ToY)(formatConvBuffer, src, srcW, pal); @@ -2480,15 +2486,7 @@ FUNNY_Y_CODE } //if MMX2 can't be used #endif #else - int i; - unsigned int xpos=0; - for (i=0;i<dstWidth;i++) - { - register unsigned int xx=xpos>>16; - register unsigned int xalpha=(xpos&0xFFFF)>>9; - dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; - xpos+=xInc; - } + RENAME(hyscale_fast)(c, dst, dstWidth, src, srcW, xInc); #endif /* ARCH_X86 */ } @@ -2506,12 +2504,37 @@ FUNNY_Y_CODE } } +static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst, + int dstWidth, uint8_t *src1, + uint8_t *src2, int srcW, int xInc) +{ + int i; + unsigned int xpos=0; + for (i=0;i<dstWidth;i++) + { + register unsigned int xx=xpos>>16; + register unsigned int xalpha=(xpos&0xFFFF)>>9; + dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); + dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); + /* slower + dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; + dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; + */ + xpos+=xInc; + } +} + inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2, - int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, - int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, - int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, - int32_t *mmx2FilterPos, uint32_t *pal) + int srcW, int xInc, int flags, int16_t *hChrFilter, + int16_t *hChrFilterPos, int hChrFilterSize, + int srcFormat, uint8_t *formatConvBuffer, + uint32_t *pal) { + int32_t *mmx2FilterPos = c->chrMmx2FilterPos; + int16_t *mmx2Filter = c->chrMmx2Filter; + int canMMX2BeUsed = c->canMMX2BeUsed; + void *funnyUVCode = c->funnyUVCode; + if (srcFormat==PIX_FMT_YUYV422) { RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); @@ -2769,20 +2792,7 @@ FUNNY_UV_CODE } //if MMX2 can't be used #endif #else - int i; - unsigned int xpos=0; - for (i=0;i<dstWidth;i++) - { - register unsigned int xx=xpos>>16; - register unsigned int xalpha=(xpos&0xFFFF)>>9; - dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); - dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); - /* slower - dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; - dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; - */ - xpos+=xInc; - } + RENAME(hcscale_fast)(c, dst, dstWidth, src1, src2, srcW, xInc); #endif /* ARCH_X86 */ } if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){ @@ -2817,7 +2827,6 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s const int dstFormat= c->dstFormat; const int srcFormat= c->srcFormat; const int flags= c->flags; - const int canMMX2BeUsed= c->canMMX2BeUsed; int16_t *vLumFilterPos= c->vLumFilterPos; int16_t *vChrFilterPos= c->vChrFilterPos; int16_t *hLumFilterPos= c->hLumFilterPos; @@ -2838,8 +2847,6 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s int16_t **alpPixBuf= c->alpPixBuf; const int vLumBufSize= c->vLumBufSize; const int vChrBufSize= c->vChrBufSize; - uint8_t *funnyYCode= c->funnyYCode; - uint8_t *funnyUVCode= c->funnyUVCode; uint8_t *formatConvBuffer= c->formatConvBuffer; const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); @@ -2942,14 +2949,14 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s assert(lastInLumBuf + 1 - srcSliceY >= 0); //printf("%d %d\n", lumBufIndex, vLumBufSize); RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc, - flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, - funnyYCode, c->srcFormat, formatConvBuffer, - c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 0); + flags, hLumFilter, hLumFilterPos, hLumFilterSize, + c->srcFormat, formatConvBuffer, + pal, 0); if (CONFIG_SWSCALE_ALPHA && alpPixBuf) RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc, - flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, - funnyYCode, c->srcFormat, formatConvBuffer, - c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 1); + flags, hLumFilter, hLumFilterPos, hLumFilterSize, + c->srcFormat, formatConvBuffer, + pal, 1); lastInLumBuf++; } while(lastInChrBuf < lastChrSrcY) @@ -2964,9 +2971,9 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s if (!(isGray(srcFormat) || isGray(dstFormat))) RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, - flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, - funnyUVCode, c->srcFormat, formatConvBuffer, - c->chrMmx2Filter, c->chrMmx2FilterPos, pal); + flags, hChrFilter, hChrFilterPos, hChrFilterSize, + c->srcFormat, formatConvBuffer, + pal); lastInChrBuf++; } //wrap buf index around to stay inside the ring buffer @@ -2990,14 +2997,14 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); assert(lastInLumBuf + 1 - srcSliceY >= 0); RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc, - flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, - funnyYCode, c->srcFormat, formatConvBuffer, - c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 0); + flags, hLumFilter, hLumFilterPos, hLumFilterSize, + c->srcFormat, formatConvBuffer, + pal, 0); if (CONFIG_SWSCALE_ALPHA && alpPixBuf) RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc, - flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, - funnyYCode, c->srcFormat, formatConvBuffer, - c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 1); + flags, hLumFilter, hLumFilterPos, hLumFilterSize, + c->srcFormat, formatConvBuffer, + pal, 1); lastInLumBuf++; } while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH)) @@ -3011,9 +3018,9 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s if (!(isGray(srcFormat) || isGray(dstFormat))) RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, - flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, - funnyUVCode, c->srcFormat, formatConvBuffer, - c->chrMmx2Filter, c->chrMmx2FilterPos, pal); + flags, hChrFilter, hChrFilterPos, hChrFilterSize, + c->srcFormat, formatConvBuffer, + pal); lastInChrBuf++; } //wrap buf index around to stay inside the ring buffer @@ -3209,8 +3216,10 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255); #if HAVE_MMX - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); + if (flags & SWS_CPU_CAPS_MMX2 ) __asm__ volatile("sfence":::"memory"); + /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ + if (flags & SWS_CPU_CAPS_3DNOW) __asm__ volatile("femms" :::"memory"); + else __asm__ volatile("emms" :::"memory"); #endif /* store changed local vars back in the context */ c->dstY= dstY; |