diff options
author | diego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2006-02-08 08:20:40 +0000 |
---|---|---|
committer | diego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2006-02-08 08:20:40 +0000 |
commit | 1806f8776f86feac7f95cabca9dd7b4f2f243bf4 (patch) | |
tree | e749b26c4384d3650fdc1bd86638ebbaf0e41e61 /postproc | |
parent | 7d56335977eb85573aa57367924fd97ba239be5c (diff) | |
download | mpv-1806f8776f86feac7f95cabca9dd7b4f2f243bf4.tar.bz2 mpv-1806f8776f86feac7f95cabca9dd7b4f2f243bf4.tar.xz |
AltiVec operations need to have memory aligned on 16-byte boundaries.
patch by Alan Curry, pacman at world dot std dot com
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@17559 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc')
-rw-r--r-- | postproc/swscale.c | 8 | ||||
-rw-r--r-- | postproc/yuv2rgb_altivec.c | 7 |
2 files changed, 10 insertions, 5 deletions
diff --git a/postproc/swscale.c b/postproc/swscale.c index fbdedaecd3..2b0d585b1a 100644 --- a/postproc/swscale.c +++ b/postproc/swscale.c @@ -1166,7 +1166,8 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out } // Note the +1 is for the MMXscaler which reads over the end - *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t)); + /* align at 16 for AltiVec (needed by hScale_altivec_real) */ + *outFilter= (int16_t*)memalign(16, *outFilterSize*(dstW+1)*sizeof(int16_t)); memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t)); /* Normalize & Store in outFilter */ @@ -2132,10 +2133,11 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*)); c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*)); //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000) + /* align at 16 bytes for AltiVec */ for(i=0; i<c->vLumBufSize; i++) - c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000); + c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(16, 4000); for(i=0; i<c->vChrBufSize; i++) - c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000); + c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(16, 8000); //try to avoid drawing green stuff between the right end and the stride end for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000); diff --git a/postproc/yuv2rgb_altivec.c b/postproc/yuv2rgb_altivec.c index 16acc42564..69b5b302da 100644 --- a/postproc/yuv2rgb_altivec.c +++ b/postproc/yuv2rgb_altivec.c @@ -68,6 +68,9 @@ #include <inttypes.h> #include <assert.h> #include "config.h" +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif #include "rgb2rgb.h" #include "swscale.h" #include "swscale_internal.h" @@ -788,8 +791,8 @@ altivec_yuv2packedX (SwsContext *c, vector signed short *YCoeffs, *CCoeffs; - vYCoeffsBank = malloc (sizeof (vector signed short)*lumFilterSize*c->dstH); - vCCoeffsBank = malloc (sizeof (vector signed short)*chrFilterSize*c->dstH); + vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH); + vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH); for (i=0;i<lumFilterSize*c->dstH;i++) { tmp = c->vLumFilter[i]; |