From 39595a9f2ebe3daf21063824d29174716c11788a Mon Sep 17 00:00:00 2001 From: michael Date: Sat, 24 Nov 2001 01:38:30 +0000 Subject: faster dering git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@3094 b3059339-0415-0410-9bf9-f77b7e298cf2 --- postproc/postprocess.c | 176 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 119 insertions(+), 57 deletions(-) (limited to 'postproc/postprocess.c') diff --git a/postproc/postprocess.c b/postproc/postprocess.c index d590b01a46..d0ae70b81e 100644 --- a/postproc/postprocess.c +++ b/postproc/postprocess.c @@ -47,7 +47,6 @@ c = checked against the other implementations (-vo md5) /* TODO: -verify that everything workes as it should (how?) reduce the time wasted on the mem transfer implement everything in C at least (done at the moment but ...) unroll stuff if instructions depend too much on the prior one @@ -62,7 +61,8 @@ border remover optimize c versions try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks smart blur -commandline option for the deblock thresholds +commandline option for the deblock / dering thresholds +memcpy chrominance if no chroma filtering is done ... */ @@ -162,6 +162,7 @@ static uint8_t tempBlocks[8*16*2]; //used for the horizontal code int hFlatnessThreshold= 56 - 16; int vFlatnessThreshold= 56 - 16; +int deringThreshold= 20; //amount of "black" u r willing to loose to get a brightness corrected picture double maxClippedThreshold= 0.01; @@ -310,28 +311,26 @@ asm volatile( "paddb %%mm2, %%mm0 \n\t" " \n\t" +#ifdef HAVE_MMX2 + "pxor %%mm7, %%mm7 \n\t" + "psadbw %%mm7, %%mm0 \n\t" +#else "movq %%mm0, %%mm1 \n\t" "psrlw $8, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t" -#ifdef HAVE_MMX2 - "pshufw $0xF9, %%mm0, %%mm1 \n\t" - "paddb %%mm1, %%mm0 \n\t" - "pshufw $0xFE, %%mm0, %%mm1 \n\t" -#else "movq %%mm0, %%mm1 \n\t" "psrlq $16, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "psrlq $32, %%mm0 \n\t" -#endif "paddb %%mm1, %%mm0 \n\t" +#endif "movd %%mm0, %0 \n\t" : "=r" (numEq) : "r" (src), "r" (stride) - : "%eax", "%ebx" + : "%ebx" ); - - numEq= (256 - numEq) &0xFF; + numEq= (-numEq) &0xFF; #else for(y=0; y>4; +#ifdef DEBUG_DERING_THRESHOLD + asm volatile("emms\n\t":); + { + static long long numPixels=0; + if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++; +// if((max-min)<20 || (max-min)*QP<200) +// if((max-min)*QP < 500) +// if(max-min worstDiff) + { + worstDiff= absDiff; + worstQP= QP; + worstRange= max-min; + } + errorSum+= error; + + if(1024LL*1024LL*1024LL % numSkiped == 0) + { + printf( "sum:%1.3f, skip:%d, wQP:%d, " + "wRange:%d, wDiff:%d, relSkip:%1.3f\n", + (float)errorSum/numSkiped, numSkiped, worstQP, worstRange, + worstDiff, (float)numSkiped/numPixels); + } + } + } +#endif if (*p + 2*QP < f) *p= *p + 2*QP; else if(*p - 2*QP > f) *p= *p - 2*QP; else *p=f; } } } - +#ifdef DEBUG_DERING_THRESHOLD + if(max-min < 20) + { + for(y=1; y<9; y++) + { + int x; + int t = 0; + p= src + stride*y; + for(x=1; x<9; x++) + { + p++; + *p = MIN(*p + 20, 255); + } + } +// src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; + } +#endif #endif } -- cgit v1.2.3