summaryrefslogtreecommitdiffstats
path: root/postproc
diff options
context:
space:
mode:
authormichael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-10-19 13:41:38 +0000
committermichael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-10-19 13:41:38 +0000
commit13994ecabb0e5e40159f3f1161de4567cce38ce0 (patch)
tree450df93ee52e7bad77200b8d32115a8ec5a00d43 /postproc
parente890543a4c1752a302974cfcb3e6141b2e5ef7e8 (diff)
downloadmpv-13994ecabb0e5e40159f3f1161de4567cce38ce0.tar.bz2
mpv-13994ecabb0e5e40159f3f1161de4567cce38ce0.tar.xz
fixed a bug in the tmp buffer
fixed the color range for yuv fixed the width %8!=0 bug (another 1% speed loss) git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2286 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc')
-rw-r--r--postproc/postprocess.c54
-rw-r--r--postproc/postprocess.h3
-rw-r--r--postproc/postprocess_template.c54
3 files changed, 103 insertions, 8 deletions
diff --git a/postproc/postprocess.c b/postproc/postprocess.c
index 33ebf42c34..f558a1e9f6 100644
--- a/postproc/postprocess.c
+++ b/postproc/postprocess.c
@@ -122,7 +122,7 @@ static uint64_t temp3=0;
static uint64_t temp4=0;
static uint64_t temp5=0;
static uint64_t pQPb=0;
-static uint8_t tempBlock[16*16];
+static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
int hFlatnessThreshold= 56 - 16;
int vFlatnessThreshold= 56 - 16;
@@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01;
int maxAllowedY=255;
//FIXME can never make a movieŽs black brighter (anyone needs that?)
-int minAllowedY=0;
+int minAllowedY=16;
#ifdef TIMING
static inline long long rdtsc()
@@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
static uint8_t *tempDst= NULL;
static uint8_t *tempSrc= NULL;
+ /* Temporary buffers for handling the last block */
+ static uint8_t *tempDstBlock= NULL;
+ static uint8_t *tempSrcBlock= NULL;
+
+ uint8_t *dstBlockPtrBackup;
+ uint8_t *srcBlockPtrBackup;
+
#ifdef TIMING
long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
sumTime= rdtsc();
@@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
{
tempDst= (uint8_t*)memalign(8, 1024*24);
tempSrc= (uint8_t*)memalign(8, 1024*24);
+ tempDstBlock= (uint8_t*)memalign(8, 1024*24);
+ tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
}
if(!yHistogram)
@@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
int i;
yHistogram= (uint64_t*)malloc(8*256);
for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
+
+ if(mode & FULL_Y_RANGE)
+ {
+ maxAllowedY=255;
+ minAllowedY=0;
+ }
}
if(!isColor)
@@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
srcBlock= tempSrc;
}
+ // From this point on it is guranteed that we can read and write 16 lines downward
// finish 1 block before the next otherwise weŽll might have a problem
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
for(x=0; x<width; x+=BLOCK_SIZE)
@@ -2545,6 +2561,23 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
+ //can we mess with a 8x16 block, if not use a temp buffer, yes again
+ if(x+7 >= width)
+ {
+ int i;
+ dstBlockPtrBackup= dstBlock;
+ srcBlockPtrBackup= srcBlock;
+
+ for(i=0;i<BLOCK_SIZE*2; i++)
+ {
+ memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
+ memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
+ }
+
+ dstBlock= tempDstBlock;
+ srcBlock= tempSrcBlock;
+ }
+
blockCopy(dstBlock + dstStride*5, dstStride,
srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
@@ -2593,7 +2626,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
}
/* check if we have a previous block to deblock it with dstBlock */
- if(x - 8 >= 0 && x<width)
+ if(x - 8 >= 0)
{
#ifdef MORE_TIMING
T0= rdtsc();
@@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
dering(dstBlock - stride*9 + width-9, stride, QP);
//FIXME dering filter will not be applied to last block (bottom right)
+ /* did we use a tmp-block buffer */
+ if(x+7 >= width)
+ {
+ int i;
+ dstBlock= dstBlockPtrBackup;
+ srcBlock= srcBlockPtrBackup;
+
+ for(i=0;i<BLOCK_SIZE*2; i++)
+ {
+ memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
+ }
+ }
+
dstBlock+=8;
srcBlock+=8;
}
/* did we use a tmp buffer */
- if(y+15 > height)
+ if(y+15 >= height)
{
uint8_t *dstBlock= &(dst[y*dstStride]);
memcpy(dstBlock, tempDst, dstStride*(height-y) );
diff --git a/postproc/postprocess.h b/postproc/postprocess.h
index e7eb248512..20880a9874 100644
--- a/postproc/postprocess.h
+++ b/postproc/postprocess.h
@@ -46,6 +46,9 @@
#define H_RK1_FILTER 0x1000 // 4096 (not implemented yet)
#define H_X1_FILTER 0x2000 // 8192
+// select between full y range (255-0) or standart one (
+#define FULL_Y_RANGE 0x8000 // 32768
+
//Deinterlacing Filters
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072
diff --git a/postproc/postprocess_template.c b/postproc/postprocess_template.c
index 33ebf42c34..f558a1e9f6 100644
--- a/postproc/postprocess_template.c
+++ b/postproc/postprocess_template.c
@@ -122,7 +122,7 @@ static uint64_t temp3=0;
static uint64_t temp4=0;
static uint64_t temp5=0;
static uint64_t pQPb=0;
-static uint8_t tempBlock[16*16];
+static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
int hFlatnessThreshold= 56 - 16;
int vFlatnessThreshold= 56 - 16;
@@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01;
int maxAllowedY=255;
//FIXME can never make a movieŽs black brighter (anyone needs that?)
-int minAllowedY=0;
+int minAllowedY=16;
#ifdef TIMING
static inline long long rdtsc()
@@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
static uint8_t *tempDst= NULL;
static uint8_t *tempSrc= NULL;
+ /* Temporary buffers for handling the last block */
+ static uint8_t *tempDstBlock= NULL;
+ static uint8_t *tempSrcBlock= NULL;
+
+ uint8_t *dstBlockPtrBackup;
+ uint8_t *srcBlockPtrBackup;
+
#ifdef TIMING
long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
sumTime= rdtsc();
@@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
{
tempDst= (uint8_t*)memalign(8, 1024*24);
tempSrc= (uint8_t*)memalign(8, 1024*24);
+ tempDstBlock= (uint8_t*)memalign(8, 1024*24);
+ tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
}
if(!yHistogram)
@@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
int i;
yHistogram= (uint64_t*)malloc(8*256);
for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
+
+ if(mode & FULL_Y_RANGE)
+ {
+ maxAllowedY=255;
+ minAllowedY=0;
+ }
}
if(!isColor)
@@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
srcBlock= tempSrc;
}
+ // From this point on it is guranteed that we can read and write 16 lines downward
// finish 1 block before the next otherwise weŽll might have a problem
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
for(x=0; x<width; x+=BLOCK_SIZE)
@@ -2545,6 +2561,23 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
+ //can we mess with a 8x16 block, if not use a temp buffer, yes again
+ if(x+7 >= width)
+ {
+ int i;
+ dstBlockPtrBackup= dstBlock;
+ srcBlockPtrBackup= srcBlock;
+
+ for(i=0;i<BLOCK_SIZE*2; i++)
+ {
+ memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
+ memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
+ }
+
+ dstBlock= tempDstBlock;
+ srcBlock= tempSrcBlock;
+ }
+
blockCopy(dstBlock + dstStride*5, dstStride,
srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
@@ -2593,7 +2626,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
}
/* check if we have a previous block to deblock it with dstBlock */
- if(x - 8 >= 0 && x<width)
+ if(x - 8 >= 0)
{
#ifdef MORE_TIMING
T0= rdtsc();
@@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
dering(dstBlock - stride*9 + width-9, stride, QP);
//FIXME dering filter will not be applied to last block (bottom right)
+ /* did we use a tmp-block buffer */
+ if(x+7 >= width)
+ {
+ int i;
+ dstBlock= dstBlockPtrBackup;
+ srcBlock= srcBlockPtrBackup;
+
+ for(i=0;i<BLOCK_SIZE*2; i++)
+ {
+ memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
+ }
+ }
+
dstBlock+=8;
srcBlock+=8;
}
/* did we use a tmp buffer */
- if(y+15 > height)
+ if(y+15 >= height)
{
uint8_t *dstBlock= &(dst[y*dstStride]);
memcpy(dstBlock, tempDst, dstStride*(height-y) );