summaryrefslogtreecommitdiffstats
path: root/postproc
diff options
context:
space:
mode:
authordiego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2>2006-02-11 14:16:10 +0000
committerdiego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2>2006-02-11 14:16:10 +0000
commit844c1f8f8a7d95e7726fd3c7e6426ab410a0a1fe (patch)
tree77e94198157a2817344ac5ba0657e16d84f4a762 /postproc
parentb5e3a24fbb052f669e93b1a234b1dcc2946b6f1b (diff)
downloadmpv-844c1f8f8a7d95e7726fd3c7e6426ab410a0a1fe.tar.bz2
mpv-844c1f8f8a7d95e7726fd3c7e6426ab410a0a1fe.tar.xz
Move the v{Y,C}CoeffsBank vectors into the SwsContext, filling them in just
once when the scaler is initialized, instead of building them and freeing them over and over. This gives massive performance improvements. patch by Alan Curry, pacman*at*TheWorld*dot*com git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@17589 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc')
-rw-r--r--postproc/swscale.c25
-rw-r--r--postproc/swscale_internal.h1
-rw-r--r--postproc/yuv2rgb_altivec.c28
3 files changed, 28 insertions, 26 deletions
diff --git a/postproc/swscale.c b/postproc/swscale.c
index e4537f7bf2..6f9c203a2a 100644
--- a/postproc/swscale.c
+++ b/postproc/swscale.c
@@ -2110,6 +2110,25 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int
c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
srcFilter->chrV, dstFilter->chrV, c->param);
+
+#ifdef HAVE_ALTIVEC
+ c->vYCoeffsBank = memalign (16, sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
+ c->vCCoeffsBank = memalign (16, sizeof (vector signed short)*c->vChrFilterSize*c->dstH);
+
+ for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
+ int j;
+ short *p = (short *)&c->vYCoeffsBank[i];
+ for (j=0;j<8;j++)
+ p[j] = c->vLumFilter[i];
+ }
+
+ for (i=0;i<c->vChrFilterSize*c->dstH;i++) {
+ int j;
+ short *p = (short *)&c->vCCoeffsBank[i];
+ for (j=0;j<8;j++)
+ p[j] = c->vChrFilter[i];
+ }
+#endif
}
// Calculate Buffer Sizes so that they won't run out while handling these damn slices
@@ -2644,6 +2663,12 @@ void sws_freeContext(SwsContext *c){
c->hLumFilter = NULL;
if(c->hChrFilter) free(c->hChrFilter);
c->hChrFilter = NULL;
+#ifdef HAVE_ALTIVEC
+ if(c->vYCoeffsBank) free(c->vYCoeffsBank);
+ c->vYCoeffsBank = NULL;
+ if(c->vCCoeffsBank) free(c->vCCoeffsBank);
+ c->vCCoeffsBank = NULL;
+#endif
if(c->vLumFilterPos) free(c->vLumFilterPos);
c->vLumFilterPos = NULL;
diff --git a/postproc/swscale_internal.h b/postproc/swscale_internal.h
index c6611da509..b4e1dbeea1 100644
--- a/postproc/swscale_internal.h
+++ b/postproc/swscale_internal.h
@@ -154,6 +154,7 @@ typedef struct SwsContext{
vector signed short CGV;
vector signed short OY;
vector unsigned short CSHIFT;
+ vector signed short *vYCoeffsBank, *vCCoeffsBank;
#endif
diff --git a/postproc/yuv2rgb_altivec.c b/postproc/yuv2rgb_altivec.c
index 2d2f7766b2..dee68b2f2e 100644
--- a/postproc/yuv2rgb_altivec.c
+++ b/postproc/yuv2rgb_altivec.c
@@ -774,8 +774,6 @@ altivec_yuv2packedX (SwsContext *c,
uint8_t *dest, int dstW, int dstY)
{
int i,j;
- short tmp __attribute__((aligned (16)));
- int16_t *p;
short *f;
vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
vector signed short R0,G0,B0,R1,G1,B1;
@@ -787,29 +785,10 @@ altivec_yuv2packedX (SwsContext *c,
vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0);
unsigned long scratch[16] __attribute__ ((aligned (16)));
- vector signed short *vYCoeffsBank, *vCCoeffsBank;
-
vector signed short *YCoeffs, *CCoeffs;
- vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH);
- vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH);
-
- for (i=0;i<lumFilterSize*c->dstH;i++) {
- tmp = c->vLumFilter[i];
- p = &vYCoeffsBank[i];
- for (j=0;j<8;j++)
- p[j] = tmp;
- }
-
- for (i=0;i<chrFilterSize*c->dstH;i++) {
- tmp = c->vChrFilter[i];
- p = &vCCoeffsBank[i];
- for (j=0;j<8;j++)
- p[j] = tmp;
- }
-
- YCoeffs = vYCoeffsBank+dstY*lumFilterSize;
- CCoeffs = vCCoeffsBank+dstY*chrFilterSize;
+ YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
+ CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
out = (vector unsigned char *)dest;
@@ -962,7 +941,4 @@ altivec_yuv2packedX (SwsContext *c,
memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
}
- if (vYCoeffsBank) free (vYCoeffsBank);
- if (vCCoeffsBank) free (vCCoeffsBank);
-
}