summaryrefslogtreecommitdiffstats
path: root/postproc
diff options
context:
space:
mode:
authormichael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-10-30 22:03:25 +0000
committermichael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-10-30 22:03:25 +0000
commit478d5d3a7e415526a30335a9049e5ae125b08225 (patch)
tree42bbfc90d4edcacf957d55df8c1929441e973f83 /postproc
parent83ce6ce362dc3a0775f91f4c79c5d4770f6bb1d2 (diff)
downloadmpv-478d5d3a7e415526a30335a9049e5ae125b08225.tar.bz2
mpv-478d5d3a7e415526a30335a9049e5ae125b08225.tar.xz
c optimizations
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2576 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc')
-rw-r--r--postproc/swscale.c85
-rw-r--r--postproc/swscale_template.c85
2 files changed, 136 insertions, 34 deletions
diff --git a/postproc/swscale.c b/postproc/swscale.c
index 9749f66b93..fd385b4c7a 100644
--- a/postproc/swscale.c
+++ b/postproc/swscale.c
@@ -788,45 +788,96 @@ FULL_YSCALEYUV2RGB
//FIXME unroll C loop and dont recalculate UV
asm volatile ("\n\t"::: "memory");
- if(dstbpp==32 || dstbpp==24)
+ if(dstbpp==32)
{
- for(i=0;i<dstw;i++){
+ for(i=0; i<dstw-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
- dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
- dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
- dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
- dest+=dstbpp>>3;
+
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
+ dest[4*i+0]=clip_table[((Y1 + Cb) >>13)];
+ dest[4*i+1]=clip_table[((Y1 + Cg) >>13)];
+ dest[4*i+2]=clip_table[((Y1 + Cr) >>13)];
+
+ dest[4*i+4]=clip_table[((Y2 + Cb) >>13)];
+ dest[4*i+5]=clip_table[((Y2 + Cg) >>13)];
+ dest[4*i+6]=clip_table[((Y2 + Cr) >>13)];
+ }
+ }
+ if(dstbpp==24)
+ {
+ for(i=0; i<dstw-1; i+=2){
+ // vertical linear interpolation && yuv2rgb in a single step:
+ int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
+ int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
+ int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
+ dest[0]=clip_table[((Y1 + Cb) >>13)];
+ dest[1]=clip_table[((Y1 + Cg) >>13)];
+ dest[2]=clip_table[((Y1 + Cr) >>13)];
+
+ dest[3]=clip_table[((Y2 + Cb) >>13)];
+ dest[4]=clip_table[((Y2 + Cg) >>13)];
+ dest[5]=clip_table[((Y2 + Cr) >>13)];
+ dest+=6;
}
}
else if(dstbpp==16)
{
- for(i=0;i<dstw;i++){
+ for(i=0; i<dstw-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
((uint16_t*)dest)[i] =
- (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
- ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) |
- ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800);
+ (clip_table[(Y1 + Cb) >>13]>>3) |
+ ((clip_table[(Y1 + Cg) >>13]<<3)&0x07E0) |
+ ((clip_table[(Y1 + Cr) >>13]<<8)&0xF800);
+
+ ((uint16_t*)dest)[i+1] =
+ (clip_table[(Y2 + Cb) >>13]>>3) |
+ ((clip_table[(Y2 + Cg) >>13]<<3)&0x07E0) |
+ ((clip_table[(Y2 + Cr) >>13]<<8)&0xF800);
}
}
else if(dstbpp==15)
{
- for(i=0;i<dstw;i++){
+ for(i=0; i<dstw-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
((uint16_t*)dest)[i] =
- (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
- ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) |
- ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00);
+ (clip_table[(Y1 + Cb) >>13]>>3) |
+ ((clip_table[(Y1 + Cg) >>13]<<2)&0x03E0) |
+ ((clip_table[(Y1 + Cr) >>13]<<7)&0x7C00);
+ ((uint16_t*)dest)[i+1] =
+ (clip_table[(Y2 + Cb) >>13]>>3) |
+ ((clip_table[(Y2 + Cg) >>13]<<2)&0x03E0) |
+ ((clip_table[(Y2 + Cr) >>13]<<7)&0x7C00);
}
}
#endif
diff --git a/postproc/swscale_template.c b/postproc/swscale_template.c
index 9749f66b93..fd385b4c7a 100644
--- a/postproc/swscale_template.c
+++ b/postproc/swscale_template.c
@@ -788,45 +788,96 @@ FULL_YSCALEYUV2RGB
//FIXME unroll C loop and dont recalculate UV
asm volatile ("\n\t"::: "memory");
- if(dstbpp==32 || dstbpp==24)
+ if(dstbpp==32)
{
- for(i=0;i<dstw;i++){
+ for(i=0; i<dstw-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
- dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
- dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
- dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
- dest+=dstbpp>>3;
+
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
+ dest[4*i+0]=clip_table[((Y1 + Cb) >>13)];
+ dest[4*i+1]=clip_table[((Y1 + Cg) >>13)];
+ dest[4*i+2]=clip_table[((Y1 + Cr) >>13)];
+
+ dest[4*i+4]=clip_table[((Y2 + Cb) >>13)];
+ dest[4*i+5]=clip_table[((Y2 + Cg) >>13)];
+ dest[4*i+6]=clip_table[((Y2 + Cr) >>13)];
+ }
+ }
+ if(dstbpp==24)
+ {
+ for(i=0; i<dstw-1; i+=2){
+ // vertical linear interpolation && yuv2rgb in a single step:
+ int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
+ int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
+ int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
+ dest[0]=clip_table[((Y1 + Cb) >>13)];
+ dest[1]=clip_table[((Y1 + Cg) >>13)];
+ dest[2]=clip_table[((Y1 + Cr) >>13)];
+
+ dest[3]=clip_table[((Y2 + Cb) >>13)];
+ dest[4]=clip_table[((Y2 + Cg) >>13)];
+ dest[5]=clip_table[((Y2 + Cr) >>13)];
+ dest+=6;
}
}
else if(dstbpp==16)
{
- for(i=0;i<dstw;i++){
+ for(i=0; i<dstw-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
((uint16_t*)dest)[i] =
- (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
- ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) |
- ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800);
+ (clip_table[(Y1 + Cb) >>13]>>3) |
+ ((clip_table[(Y1 + Cg) >>13]<<3)&0x07E0) |
+ ((clip_table[(Y1 + Cr) >>13]<<8)&0xF800);
+
+ ((uint16_t*)dest)[i+1] =
+ (clip_table[(Y2 + Cb) >>13]>>3) |
+ ((clip_table[(Y2 + Cg) >>13]<<3)&0x07E0) |
+ ((clip_table[(Y2 + Cr) >>13]<<8)&0xF800);
}
}
else if(dstbpp==15)
{
- for(i=0;i<dstw;i++){
+ for(i=0; i<dstw-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
+ int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
((uint16_t*)dest)[i] =
- (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
- ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) |
- ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00);
+ (clip_table[(Y1 + Cb) >>13]>>3) |
+ ((clip_table[(Y1 + Cg) >>13]<<2)&0x03E0) |
+ ((clip_table[(Y1 + Cr) >>13]<<7)&0x7C00);
+ ((uint16_t*)dest)[i+1] =
+ (clip_table[(Y2 + Cb) >>13]>>3) |
+ ((clip_table[(Y2 + Cg) >>13]<<2)&0x03E0) |
+ ((clip_table[(Y2 + Cr) >>13]<<7)&0x7C00);
}
}
#endif