summaryrefslogtreecommitdiffstats
path: root/postproc/postprocess_template.c
diff options
context:
space:
mode:
authormichael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2002-10-28 19:31:04 +0000
committermichael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2002-10-28 19:31:04 +0000
commit2d28fdd64cf1aef1a80d87c124f9f60a277391b0 (patch)
tree85d7368aa74cbfbd85c5e7e23e8ca820751fa22c /postproc/postprocess_template.c
parenta64160a3bdc601dc45d4ebf47ebbc7faaf4f8095 (diff)
downloadmpv-2d28fdd64cf1aef1a80d87c124f9f60a277391b0.tar.bz2
mpv-2d28fdd64cf1aef1a80d87c124f9f60a277391b0.tar.xz
postprocessing cleanup:
remove opendivx #ifdefs remove rk1 filter remove unused / obsolete stuff add -1,4,2,4,-1 deinterlacing filter (ffmpeg uses that) threadsafe / no more non-const globals some optimizations different strides for Y,U,V possible remove ebx usage (someone really should fix gcc, this is really lame) change the dering filter slightly (tell me if its worse for any files) git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@7947 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc/postprocess_template.c')
-rw-r--r--postproc/postprocess_template.c954
1 files changed, 431 insertions, 523 deletions
diff --git a/postproc/postprocess_template.c b/postproc/postprocess_template.c
index dbb6dd5f42..6cd35b0752 100644
--- a/postproc/postprocess_template.c
+++ b/postproc/postprocess_template.c
@@ -45,23 +45,19 @@
//FIXME? |255-0| = 1 (shouldnt be a problem ...)
+#ifdef HAVE_MMX
/**
* Check if the middle 8x8 Block in the given 8x16 block is flat
*/
-static inline int RENAME(isVertDC)(uint8_t src[], int stride){
+static inline int RENAME(isVertDC)(uint8_t src[], int stride, PPContext *c){
int numEq= 0;
-#ifndef HAVE_MMX
- int y;
-#endif
src+= stride*4; // src points to begin of the 8x8 Block
-#ifdef HAVE_MMX
asm volatile(
"leal (%1, %2), %%eax \n\t"
- "leal (%%eax, %2, 4), %%ebx \n\t"
// 0 1 2 3 4 5 6 7 8 9
-// %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2
- "movq "MANGLE(mmxDCOffset)", %%mm7 \n\t" // mm7 = 0x7F
- "movq "MANGLE(mmxDCThreshold)", %%mm6 \n\t" // mm6 = 0x7D
+// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
+ "movq %3, %%mm7 \n\t" // mm7 = 0x7F
+ "movq %4, %%mm6 \n\t" // mm6 = 0x7D
"movq (%1), %%mm0 \n\t"
"movq (%%eax), %%mm1 \n\t"
"psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
@@ -79,6 +75,8 @@ asm volatile(
"paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t"
"paddb %%mm2, %%mm0 \n\t"
+
+ "leal (%%eax, %2, 4), %%eax \n\t"
"movq (%1, %2, 4), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t"
@@ -86,19 +84,19 @@ asm volatile(
"pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t"
- "movq (%%ebx), %%mm1 \n\t"
+ "movq (%%eax), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t"
"paddb %%mm2, %%mm0 \n\t"
- "movq (%%ebx, %2), %%mm2 \n\t"
+ "movq (%%eax, %2), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t"
- "movq (%%ebx, %2, 2), %%mm1 \n\t"
+ "movq (%%eax, %2, 2), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t"
@@ -121,49 +119,20 @@ asm volatile(
#endif
"movd %%mm0, %0 \n\t"
: "=r" (numEq)
- : "r" (src), "r" (stride)
- : "%eax", "%ebx"
+ : "r" (src), "r" (stride), "m" (c->mmxDcOffset), "m" (c->mmxDcThreshold)
+ : "%eax"
);
numEq= (-numEq) &0xFF;
-
-#else
- for(y=0; y<BLOCK_SIZE-1; y++)
- {
- if(((src[0] - src[0+stride] + dcOffset)&0xFFFF) < dcThreshold) numEq++;
- if(((src[1] - src[1+stride] + dcOffset)&0xFFFF) < dcThreshold) numEq++;
- if(((src[2] - src[2+stride] + dcOffset)&0xFFFF) < dcThreshold) numEq++;
- if(((src[3] - src[3+stride] + dcOffset)&0xFFFF) < dcThreshold) numEq++;
- if(((src[4] - src[4+stride] + dcOffset)&0xFFFF) < dcThreshold) numEq++;
- if(((src[5] - src[5+stride] + dcOffset)&0xFFFF) < dcThreshold) numEq++;
- if(((src[6] - src[6+stride] + dcOffset)&0xFFFF) < dcThreshold) numEq++;
- if(((src[7] - src[7+stride] + dcOffset)&0xFFFF) < dcThreshold) numEq++;
- src+= stride;
- }
-#endif
-/* if(abs(numEq - asmEq) > 0)
- {
- printf("\nasm:%d c:%d\n", asmEq, numEq);
- for(int y=0; y<8; y++)
- {
- for(int x=0; x<8; x++)
- {
- printf("%d ", temp[x + y*stride]);
- }
- printf("\n");
- }
- }
-*/
-// for(int i=0; i<numEq/8; i++) src[i]=255;
- return (numEq > vFlatnessThreshold) ? 1 : 0;
+ return numEq > c->ppMode.flatnessThreshold;
}
+#endif
-static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, int QP)
+static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c)
{
#ifdef HAVE_MMX
int isOk;
src+= stride*3;
asm volatile(
-// "int $3 \n\t"
"movq (%1, %2), %%mm0 \n\t"
"movq (%1, %2, 8), %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
@@ -171,55 +140,39 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, int QP)
"psubusb %%mm2, %%mm1 \n\t"
"por %%mm1, %%mm0 \n\t" // ABS Diff
- "movq "MANGLE(pQPb)", %%mm7 \n\t" // QP,..., QP
+ "movq %3, %%mm7 \n\t" // QP,..., QP
"paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP
"psubusb %%mm7, %%mm0 \n\t" // Diff <= 2QP -> 0
- "pcmpeqd "MANGLE(b00)", %%mm0 \n\t"
- "psrlq $16, %%mm0 \n\t"
- "pcmpeqd "MANGLE(bFF)", %%mm0 \n\t"
-// "movd %%mm0, (%1, %2, 4)\n\t"
+ "packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, %0 \n\t"
: "=r" (isOk)
- : "r" (src), "r" (stride)
+ : "r" (src), "r" (stride), "m" (c->pQPb)
);
- return isOk;
+ return isOk==0;
#else
-
- int isOk2= 1;
int x;
+ const int QP= c->QP;
src+= stride*3;
for(x=0; x<BLOCK_SIZE; x++)
{
- if(abs((int)src[x + stride] - (int)src[x + (stride<<3)]) > 2*QP) isOk2=0;
+ if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
}
-/* if(isOk && !isOk2 || !isOk && isOk2)
- {
- printf("\nasm:%d c:%d QP:%d\n", isOk, isOk2, QP);
- for(int y=0; y<9; y++)
- {
- for(int x=0; x<8; x++)
- {
- printf("%d ", src[x + y*stride]);
- }
- printf("\n");
- }
- } */
- return isOk2;
+ return 1;
#endif
-
}
/**
* Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
*/
-static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
+static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
asm volatile( //"movv %0 %1 %2\n\t"
- "movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP
+ "movq %2, %%mm0 \n\t" // QP,..., QP
+ "pxor %%mm4, %%mm4 \n\t"
"movq (%0), %%mm6 \n\t"
"movq (%0, %1), %%mm5 \n\t"
@@ -229,7 +182,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
"psubusb %%mm1, %%mm2 \n\t"
"por %%mm5, %%mm2 \n\t" // ABS Diff of lines
"psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
- "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // diff <= QP -> FF
+ "pcmpeqb %%mm4, %%mm2 \n\t" // diff <= QP -> FF
"pand %%mm2, %%mm6 \n\t"
"pandn %%mm1, %%mm2 \n\t"
@@ -237,8 +190,8 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
"movq (%0, %1, 8), %%mm5 \n\t"
"leal (%0, %1, 4), %%eax \n\t"
- "leal (%0, %1, 8), %%ebx \n\t"
- "subl %1, %%ebx \n\t"
+ "leal (%0, %1, 8), %%ecx \n\t"
+ "subl %1, %%ecx \n\t"
"addl %1, %0 \n\t" // %0 points to line 1 not 0
"movq (%0, %1, 8), %%mm7 \n\t"
"movq %%mm5, %%mm1 \n\t"
@@ -247,7 +200,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
"psubusb %%mm1, %%mm2 \n\t"
"por %%mm5, %%mm2 \n\t" // ABS Diff of lines
"psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
- "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // diff <= QP -> FF
+ "pcmpeqb %%mm4, %%mm2 \n\t" // diff <= QP -> FF
"pand %%mm2, %%mm7 \n\t"
"pandn %%mm1, %%mm2 \n\t"
@@ -255,7 +208,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
// 1 2 3 4 5 6 7 8
- // %0 %0+%1 %0+2%1 eax %0+4%1 eax+2%1 ebx eax+4%1
+ // %0 %0+%1 %0+2%1 eax %0+4%1 eax+2%1 ecx eax+4%1
// 6 4 2 2 1 1
// 6 4 4 2
// 6 8 2
@@ -286,7 +239,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
"movq %%mm3, (%0,%1) \n\t" // X
// mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9
PAVGB(%%mm4, %%mm6) //11 /2
- "movq (%%ebx), %%mm0 \n\t" // 1
+ "movq (%%ecx), %%mm0 \n\t" // 1
PAVGB((%%eax, %1, 2), %%mm0) // 11/2
"movq %%mm0, %%mm3 \n\t" // 11/2
PAVGB(%%mm1, %%mm0) // 2 11/4
@@ -296,7 +249,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
"movq %%mm0, (%0, %1, 2) \n\t" // X
// mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9
"movq (%%eax, %1, 4), %%mm0 \n\t" // 1
- PAVGB((%%ebx), %%mm0) // 11 /2
+ PAVGB((%%ecx), %%mm0) // 11 /2
PAVGB(%%mm0, %%mm6) //11 11 /4
PAVGB(%%mm1, %%mm4) // 11 /2
PAVGB(%%mm2, %%mm1) // 11 /2
@@ -323,12 +276,12 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
PAVGB(%%mm0, %%mm1) // 11224222 /16
"movq %%mm1, (%%eax, %1, 2) \n\t" // X
// mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9
- PAVGB((%%ebx), %%mm2) // 112 4 /8
+ PAVGB((%%ecx), %%mm2) // 112 4 /8
"movq (%%eax, %1, 4), %%mm0 \n\t" // 1
PAVGB(%%mm0, %%mm6) // 1 1 /2
PAVGB(%%mm7, %%mm6) // 1 12 /4
PAVGB(%%mm2, %%mm6) // 1122424 /4
- "movq %%mm6, (%%ebx) \n\t" // X
+ "movq %%mm6, (%%ecx) \n\t" // X
// mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9
PAVGB(%%mm7, %%mm5) // 11 2 /4
PAVGB(%%mm7, %%mm5) // 11 6 /8
@@ -339,8 +292,8 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
"subl %1, %0 \n\t"
:
- : "r" (src), "r" (stride)
- : "%eax", "%ebx"
+ : "r" (src), "r" (stride), "m" (c->pQPb)
+ : "%eax", "%ecx"
);
#else
const int l1= stride;
@@ -356,8 +309,8 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
src+= stride*3;
for(x=0; x<BLOCK_SIZE; x++)
{
- const int first= ABS(src[0] - src[l1]) < QP ? src[0] : src[l1];
- const int last= ABS(src[l8] - src[l9]) < QP ? src[l9] : src[l8];
+ const int first= ABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
+ const int last= ABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
int sums[9];
sums[0] = first + src[l1];
@@ -381,10 +334,10 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
src++;
}
-
#endif
}
+#if 0
/**
* Experimental implementation of the filter (Algorithm 1) described in a paper from Ramkishor & Karandikar
* values are correctly clipped (MMX2)
@@ -405,9 +358,9 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
"pxor %%mm7, %%mm7 \n\t" // 0
"movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
"leal (%0, %1), %%eax \n\t"
- "leal (%%eax, %1, 4), %%ebx \n\t"
+ "leal (%%eax, %1, 4), %%ecx \n\t"
// 0 1 2 3 4 5 6 7 8 9
-// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
+// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1
"movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP
"movq %%mm0, %%mm1 \n\t" // QP,..., QP
"paddusb "MANGLE(b02)", %%mm0 \n\t"
@@ -415,7 +368,7 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
"pand "MANGLE(b3F)", %%mm0 \n\t" // QP/4,..., QP/4
"paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ...
"movq (%0, %1, 4), %%mm2 \n\t" // line 4
- "movq (%%ebx), %%mm3 \n\t" // line 5
+ "movq (%%ecx), %%mm3 \n\t" // line 5
"movq %%mm2, %%mm4 \n\t" // line 4
"pcmpeqb %%mm5, %%mm5 \n\t" // -1
"pxor %%mm2, %%mm5 \n\t" // -line 4 - 1
@@ -433,11 +386,11 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
// "psubb %%mm6, %%mm2 \n\t"
"movq %%mm2, (%0,%1, 4) \n\t"
- "movq (%%ebx), %%mm2 \n\t"
+ "movq (%%ecx), %%mm2 \n\t"
// "paddb %%mm6, %%mm2 \n\t" // line 5 + 0x80
"psubb %%mm5, %%mm2 \n\t"
// "psubb %%mm6, %%mm2 \n\t"
- "movq %%mm2, (%%ebx) \n\t"
+ "movq %%mm2, (%%ecx) \n\t"
"paddb %%mm6, %%mm5 \n\t"
"psrlw $2, %%mm5 \n\t"
@@ -450,15 +403,15 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
"psubb %%mm6, %%mm2 \n\t"
"movq %%mm2, (%%eax, %1, 2) \n\t"
- "movq (%%ebx, %1), %%mm2 \n\t"
+ "movq (%%ecx, %1), %%mm2 \n\t"
"paddb %%mm6, %%mm2 \n\t" // line 6 + 0x80
"psubsb %%mm5, %%mm2 \n\t"
"psubb %%mm6, %%mm2 \n\t"
- "movq %%mm2, (%%ebx, %1) \n\t"
+ "movq %%mm2, (%%ecx, %1) \n\t"
:
: "r" (src), "r" (stride)
- : "%eax", "%ebx"
+ : "%eax", "%ecx"
);
#else
const int l1= stride;
@@ -488,6 +441,7 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
#endif
}
+#endif
/**
* Experimental Filter 1
@@ -496,7 +450,7 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
* can only smooth blocks at the expected locations (it cant smooth them if they did move)
* MMX2 version does correct clipping C version doesnt
*/
-static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
+static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
@@ -504,17 +458,17 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
asm volatile(
"pxor %%mm7, %%mm7 \n\t" // 0
"leal (%0, %1), %%eax \n\t"
- "leal (%%eax, %1, 4), %%ebx \n\t"
+ "leal (%%eax, %1, 4), %%ecx \n\t"
// 0 1 2 3 4 5 6 7 8 9
-// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
+// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1
"movq (%%eax, %1, 2), %%mm0 \n\t" // line 3
"movq (%0, %1, 4), %%mm1 \n\t" // line 4
"movq %%mm1, %%mm2 \n\t" // line 4
"psubusb %%mm0, %%mm1 \n\t"
"psubusb %%mm2, %%mm0 \n\t"
"por %%mm1, %%mm0 \n\t" // |l2 - l3|
- "movq (%%ebx), %%mm3 \n\t" // line 5
- "movq (%%ebx, %1), %%mm4 \n\t" // line 6
+ "movq (%%ecx), %%mm3 \n\t" // line 5
+ "movq (%%ecx, %1), %%mm4 \n\t" // line 6
"movq %%mm3, %%mm5 \n\t" // line 5
"psubusb %%mm4, %%mm3 \n\t"
"psubusb %%mm5, %%mm4 \n\t"
@@ -528,7 +482,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
"por %%mm5, %%mm4 \n\t" // |l4 - l5|
"psubusb %%mm0, %%mm4 \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
"movq %%mm4, %%mm3 \n\t" // d
- "movq "MANGLE(pQPb)", %%mm0 \n\t"
+ "movq %2, %%mm0 \n\t"
"paddusb %%mm0, %%mm0 \n\t"
"psubusb %%mm0, %%mm4 \n\t"
"pcmpeqb %%mm7, %%mm4 \n\t" // d <= QP ? -1 : 0
@@ -546,11 +500,11 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
"pxor %%mm2, %%mm0 \n\t"
"movq %%mm0, (%0, %1, 4) \n\t" // line 4
- "movq (%%ebx), %%mm0 \n\t" // line 5
+ "movq (%%ecx), %%mm0 \n\t" // line 5
"pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
"paddusb %%mm3, %%mm0 \n\t"
"pxor %%mm2, %%mm0 \n\t"
- "movq %%mm0, (%%ebx) \n\t" // line 5
+ "movq %%mm0, (%%ecx) \n\t" // line 5
PAVGB(%%mm7, %%mm1) // d/4
@@ -560,11 +514,11 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
"pxor %%mm2, %%mm0 \n\t"
"movq %%mm0, (%%eax, %1, 2) \n\t" // line 3
- "movq (%%ebx, %1), %%mm0 \n\t" // line 6
+ "movq (%%ecx, %1), %%mm0 \n\t" // line 6
"pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
"paddusb %%mm1, %%mm0 \n\t"
"pxor %%mm2, %%mm0 \n\t"
- "movq %%mm0, (%%ebx, %1) \n\t" // line 6
+ "movq %%mm0, (%%ecx, %1) \n\t" // line 6
PAVGB(%%mm7, %%mm1) // d/8
@@ -574,15 +528,15 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
"pxor %%mm2, %%mm0 \n\t"
"movq %%mm0, (%%eax, %1) \n\t" // line 2
- "movq (%%ebx, %1, 2), %%mm0 \n\t" // line 7
+ "movq (%%ecx, %1, 2), %%mm0 \n\t" // line 7
"pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l7-1 : l7
"paddusb %%mm1, %%mm0 \n\t"
"pxor %%mm2, %%mm0 \n\t"
- "movq %%mm0, (%%ebx, %1, 2) \n\t" // line 7
+ "movq %%mm0, (%%ecx, %1, 2) \n\t" // line 7
:
- : "r" (src), "r" (stride)
- : "%eax", "%ebx"
+ : "r" (src), "r" (stride), "m" (co->pQPb)
+ : "%eax", "%ecx"
);
#else
@@ -607,7 +561,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
int d= ABS(b) - ((ABS(a) + ABS(c))>>1);
d= MAX(d, 0);
- if(d < QP*2)
+ if(d < co->QP*2)
{
int v = d * SIGN(-b);
@@ -621,39 +575,10 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
}
src++;
}
- /*
- const int l1= stride;
- const int l2= stride + l1;
- const int l3= stride + l2;
- const int l4= stride + l3;
- const int l5= stride + l4;
- const int l6= stride + l5;
- const int l7= stride + l6;
- const int l8= stride + l7;
- const int l9= stride + l8;
- for(int x=0; x<BLOCK_SIZE; x++)
- {
- int v2= src[l2];
- int v3= src[l3];
- int v4= src[l4];
- int v5= src[l5];
- int v6= src[l6];
- int v7= src[l7];
-
- if(ABS(v4-v5)<QP && ABS(v4-v5) - (ABS(v3-v4) + ABS(v5-v6))>0 )
- {
- src[l3] = (6*v2 + 4*v3 + 3*v4 + 2*v5 + v6 )/16;
- src[l4] = (3*v2 + 3*v3 + 4*v4 + 3*v5 + 2*v6 + v7 )/16;
- src[l5] = (1*v2 + 2*v3 + 3*v4 + 4*v5 + 3*v6 + 3*v7)/16;
- src[l6] = ( 1*v3 + 2*v4 + 3*v5 + 4*v6 + 6*v7)/16;
- }
- src++;
- }
-*/
#endif
}
-static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
+static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
/*
@@ -676,10 +601,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
#if 0 //sligtly more accurate and slightly slower
"pxor %%mm7, %%mm7 \n\t" // 0
"leal (%0, %1), %%eax \n\t"
- "leal (%%eax, %1, 4), %%ebx \n\t"
+ "leal (%%eax, %1, 4), %%ecx \n\t"
// 0 1 2 3 4 5 6 7
-// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ebx+%1 ebx+2%1
-// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1
+// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1
+// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1
"movq (%0, %1, 2), %%mm0 \n\t" // l2
@@ -708,7 +633,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
PAVGB(%%mm2, %%mm0) // ~(l4 + 2l2)/4
PAVGB(%%mm4, %%mm0) // ~(5l4 + 2l2)/8
- "movq (%%ebx), %%mm2 \n\t" // l5
+ "movq (%%ecx), %%mm2 \n\t" // l5
"movq %%mm3, %%mm5 \n\t" // l3
PAVGB(%%mm7, %%mm3) // ~l3/2
PAVGB(%%mm2, %%mm3) // ~(l3 + 2l5)/4
@@ -721,13 +646,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"pcmpeqb %%mm7, %%mm0 \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5)
// mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0
- "movq (%%ebx, %1), %%mm6 \n\t" // l6
+ "movq (%%ecx, %1), %%mm6 \n\t" // l6
"movq %%mm6, %%mm5 \n\t" // l6
PAVGB(%%mm7, %%mm6) // ~l6/2
PAVGB(%%mm4, %%mm6) // ~(l6 + 2l4)/4
PAVGB(%%mm5, %%mm6) // ~(5l6 + 2l4)/8
- "movq (%%ebx, %1, 2), %%mm5 \n\t" // l7
+ "movq (%%ecx, %1, 2), %%mm5 \n\t" // l7
"movq %%mm2, %%mm4 \n\t" // l5
PAVGB(%%mm7, %%mm2) // ~l5/2
PAVGB(%%mm5, %%mm2) // ~(l5 + 2l7)/4
@@ -741,7 +666,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8
- "movq "MANGLE(pQPb)", %%mm4 \n\t" // QP //FIXME QP+1 ?
+ "movq %2, %%mm4 \n\t" // QP //FIXME QP+1 ?
"paddusb "MANGLE(b01)", %%mm4 \n\t"
"pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP
"psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
@@ -783,8 +708,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"leal (%0, %1), %%eax \n\t"
"pcmpeqb %%mm6, %%mm6 \n\t" // -1
// 0 1 2 3 4 5 6 7
-// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ebx+%1 ebx+2%1
-// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1
+// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1
+// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1
"movq (%%eax, %1, 2), %%mm1 \n\t" // l3
@@ -798,7 +723,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"pxor %%mm6, %%mm2 \n\t" // -l5-1
"movq %%mm2, %%mm5 \n\t" // -l5-1
"movq "MANGLE(b80)", %%mm4 \n\t" // 128
- "leal (%%eax, %1, 4), %%ebx \n\t"
+ "leal (%%eax, %1, 4), %%ecx \n\t"
PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2
PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128
PAVGB(%%mm2, %%mm4) // ~(l2-l5)/4 +(l4-l3)/8 + 128
@@ -815,8 +740,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128
// mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1
- PAVGB((%%ebx, %1), %%mm5) // (l6-l5+256)/2
- "movq (%%ebx, %1, 2), %%mm1 \n\t" // l7
+ PAVGB((%%ecx, %1), %%mm5) // (l6-l5+256)/2
+ "movq (%%ecx, %1, 2), %%mm1 \n\t" // l7
"pxor %%mm6, %%mm1 \n\t" // -l7-1
PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2
"movq "MANGLE(b80)", %%mm2 \n\t" // 128
@@ -836,7 +761,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
"movq "MANGLE(b00)", %%mm7 \n\t" // 0
- "movq "MANGLE(pQPb)", %%mm2 \n\t" // QP
+ "movq %2, %%mm2 \n\t" // QP
PAVGB(%%mm6, %%mm2) // 128 + QP/2
"psubb %%mm6, %%mm2 \n\t"
@@ -877,8 +802,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"movq %%mm2, (%0, %1, 4) \n\t"
:
- : "r" (src), "r" (stride)
- : "%eax", "%ebx"
+ : "r" (src), "r" (stride), "m" (c->pQPb)
+ : "%eax", "%ecx"
);
/*
@@ -951,10 +876,12 @@ src-=8;
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"leal (%0, %1), %%eax \n\t"
- "leal (%%eax, %1, 4), %%ebx \n\t"
+ "leal (%%eax, %1, 4), %%edx \n\t"
+ "leal -40(%%esp), %%ecx \n\t" // make space for 4 8-byte vars
+ "andl $0xFFFFFFF8, %%ecx \n\t" // align
// 0 1 2 3 4 5 6 7
-// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ebx+%1 ebx+2%1
-// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1
+// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1
+// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1
"movq (%0), %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
@@ -992,8 +919,8 @@ src-=8;
"psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3
"psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
- "movq %%mm0, "MANGLE(temp0)" \n\t" // 2L0 - 5L1 + 5L2 - 2L3
- "movq %%mm1, "MANGLE(temp1)" \n\t" // 2H0 - 5H1 + 5H2 - 2H3
+ "movq %%mm0, (%%ecx) \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+ "movq %%mm1, 8(%%ecx) \n\t" // 2H0 - 5H1 + 5H2 - 2H3
"movq (%0, %1, 4), %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
@@ -1002,8 +929,8 @@ src-=8;
"psubw %%mm0, %%mm2 \n\t" // L3 - L4
"psubw %%mm1, %%mm3 \n\t" // H3 - H4
- "movq %%mm2, "MANGLE(temp2)" \n\t" // L3 - L4
- "movq %%mm3, "MANGLE(temp3)" \n\t" // H3 - H4
+ "movq %%mm2, 16(%%ecx) \n\t" // L3 - L4
+ "movq %%mm3, 24(%%ecx) \n\t" // H3 - H4
"paddw %%mm4, %%mm4 \n\t" // 2L2
"paddw %%mm5, %%mm5 \n\t" // 2H2
"psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4
@@ -1014,7 +941,7 @@ src-=8;
"psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4
"psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4
//50 opcodes so far
- "movq (%%ebx), %%mm2 \n\t"
+ "movq (%%edx), %%mm2 \n\t"
"movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" // L5
"punpckhbw %%mm7, %%mm3 \n\t" // H5
@@ -1023,10 +950,10 @@ src-=8;
"psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - 2L5
"psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - 2H5
- "movq (%%ebx, %1), %%mm6 \n\t"
+ "movq (%%edx, %1), %%mm6 \n\t"
"punpcklbw %%mm7, %%mm6 \n\t" // L6
"psubw %%mm6, %%mm2 \n\t" // L5 - L6
- "movq (%%ebx, %1), %%mm6 \n\t"
+ "movq (%%edx, %1), %%mm6 \n\t"
"punpckhbw %%mm7, %%mm6 \n\t" // H6
"psubw %%mm6, %%mm3 \n\t" // H5 - H6
@@ -1040,7 +967,7 @@ src-=8;
"psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6
"psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6
- "movq (%%ebx, %1, 2), %%mm2 \n\t"
+ "movq (%%edx, %1, 2), %%mm2 \n\t"
"movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" // L7
"punpckhbw %%mm7, %%mm3 \n\t" // H7
@@ -1050,8 +977,8 @@ src-=8;
"psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7
"psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7
- "movq "MANGLE(temp0)", %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
- "movq "MANGLE(temp1)", %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
+ "movq (%%ecx), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+ "movq 8(%%ecx), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
#ifdef HAVE_MMX2
"movq %%mm7, %%mm6 \n\t" // 0
@@ -1106,8 +1033,6 @@ src-=8;
"psubw %%mm7, %%mm5 \n\t" // |2H2 - 5H3 + 5H4 - 2H5|
// 100 opcodes
"movd %2, %%mm2 \n\t" // QP
- "punpcklwd %%mm2, %%mm2 \n\t"
- "punpcklwd %%mm2, %%mm2 \n\t"
"psllw $3, %%mm2 \n\t" // 8QP
"movq %%mm2, %%mm3 \n\t" // 8QP
"pcmpgtw %%mm4, %%mm2 \n\t"
@@ -1129,18 +1054,8 @@ src-=8;
"psrlw $6, %%mm4 \n\t"
"psrlw $6, %%mm5 \n\t"
-/*
- "movq w06, %%mm2 \n\t" // 6
- "paddw %%mm2, %%mm4 \n\t"
- "paddw %%mm2, %%mm5 \n\t"
- "movq w1400, %%mm2 \n\t" // 1400h = 5120 = 5/64*2^16
-//FIXME if *5/64 is supposed to be /13 then we should use 5041 instead of 5120
- "pmulhw %%mm2, %%mm4 \n\t" // hd/13
- "pmulhw %%mm2, %%mm5 \n\t" // ld/13
-*/
-
- "movq "MANGLE(temp2)", %%mm0 \n\t" // L3 - L4
- "movq "MANGLE(temp3)", %%mm1 \n\t" // H3 - H4
+ "movq 16(%%ecx), %%mm0 \n\t" // L3 - L4
+ "movq 24(%%ecx), %%mm1 \n\t" // H3 - H4
"pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t"
@@ -1183,8 +1098,8 @@ src-=8;
"movq %%mm0, (%0, %1, 4) \n\t"
:
- : "r" (src), "r" (stride), "r" (QP)
- : "%eax", "%ebx"
+ : "r" (src), "r" (stride), "m" (c->pQPb)
+ : "%eax", "%edx", "%ecx"
);
#else
const int l1= stride;
@@ -1201,7 +1116,7 @@ src-=8;
for(x=0; x<BLOCK_SIZE; x++)
{
const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
- if(ABS(middleEnergy) < 8*QP)
+ if(ABS(middleEnergy) < 8*c->QP)
{
const int q=(src[l4] - src[l5])/2;
const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
@@ -1232,21 +1147,25 @@ src-=8;
#endif
}
-static inline void RENAME(dering)(uint8_t src[], int stride, int QP)
+static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
asm volatile(
- "movq "MANGLE(pQPb)", %%mm0 \n\t"
- "paddusb %%mm0, %%mm0 \n\t"
- "movq %%mm0, "MANGLE(pQPb2)" \n\t"
+ "pxor %%mm6, %%mm6 \n\t"
+ "pcmpeqb %%mm7, %%mm7 \n\t"
+ "movq %2, %%mm0 \n\t"
+ "punpcklbw %%mm6, %%mm0 \n\t"
+ "psrlw $1, %%mm0 \n\t"
+ "psubw %%mm7, %%mm0 \n\t"
+ "packuswb %%mm0, %%mm0 \n\t"
+ "movq %%mm0, %3 \n\t"
"leal (%0, %1), %%eax \n\t"
- "leal (%%eax, %1, 4), %%ebx \n\t"
+ "leal (%%eax, %1, 4), %%edx \n\t"
+
// 0 1 2 3 4 5 6 7 8 9
-// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
+// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
- "pcmpeqb %%mm7, %%mm7 \n\t"
- "pxor %%mm6, %%mm6 \n\t"
#undef FIND_MIN_MAX
#ifdef HAVE_MMX2
#define FIND_MIN_MAX(addr)\
@@ -1267,9 +1186,9 @@ FIND_MIN_MAX((%%eax))
FIND_MIN_MAX((%%eax, %1))
FIND_MIN_MAX((%%eax, %1, 2))
FIND_MIN_MAX((%0, %1, 4))
-FIND_MIN_MAX((%%ebx))
-FIND_MIN_MAX((%%ebx, %1))
-FIND_MIN_MAX((%%ebx, %1, 2))
+FIND_MIN_MAX((%%edx))
+FIND_MIN_MAX((%%edx, %1))
+FIND_MIN_MAX((%%edx, %1, 2))
FIND_MIN_MAX((%0, %1, 8))
"movq %%mm7, %%mm4 \n\t"
@@ -1322,11 +1241,13 @@ FIND_MIN_MAX((%0, %1, 8))
"movd %%mm6, %%ecx \n\t"
"cmpb "MANGLE(deringThreshold)", %%cl \n\t"
" jb 1f \n\t"
+ "leal -24(%%esp), %%ecx \n\t"
+ "andl $0xFFFFFFF8, %%ecx \n\t"
PAVGB(%%mm0, %%mm7) // a=(max + min)/2
"punpcklbw %%mm7, %%mm7 \n\t"
"punpcklbw %%mm7, %%mm7 \n\t"
"punpcklbw %%mm7, %%mm7 \n\t"
- "movq %%mm7, "MANGLE(temp0)" \n\t"
+ "movq %%mm7, (%%ecx) \n\t"
"movq (%0), %%mm0 \n\t" // L10
"movq %%mm0, %%mm1 \n\t" // L10
@@ -1390,8 +1311,8 @@ FIND_MIN_MAX((%0, %1, 8))
PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\
PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\
PAVGB(lx, pplx) \
- "movq " #lx ", "MANGLE(temp1)" \n\t"\
- "movq "MANGLE(temp0)", " #lx " \n\t"\
+ "movq " #lx ", 8(%%ecx) \n\t"\
+ "movq (%%ecx), " #lx " \n\t"\
"psubusb " #lx ", " #t1 " \n\t"\
"psubusb " #lx ", " #t0 " \n\t"\
"psubusb " #lx ", " #sx " \n\t"\
@@ -1405,8 +1326,8 @@ FIND_MIN_MAX((%0, %1, 8))
PAVGB(plx, pplx) /* filtered */\
"movq " #dst ", " #t0 " \n\t" /* dst */\
"movq " #t0 ", " #t1 " \n\t" /* dst */\
- "psubusb "MANGLE(pQPb2)", " #t0 " \n\t"\
- "paddusb "MANGLE(pQPb2)", " #t1 " \n\t"\
+ "psubusb %3, " #t0 " \n\t"\
+ "paddusb %3, " #t1 " \n\t"\
PMAXUB(t0, pplx)\
PMINUB(t1, pplx, t0)\
"paddb " #sx ", " #ppsx " \n\t"\
@@ -1418,7 +1339,7 @@ FIND_MIN_MAX((%0, %1, 8))
"pandn " #dst ", " #ppsx " \n\t"\
"por " #pplx ", " #ppsx " \n\t"\
"movq " #ppsx ", " #dst " \n\t"\
- "movq "MANGLE(temp1)", " #lx " \n\t"
+ "movq 8(%%ecx), " #lx " \n\t"
/*
0000000
@@ -1439,15 +1360,15 @@ FIND_MIN_MAX((%0, %1, 8))
DERING_CORE((%%eax),(%%eax, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
DERING_CORE((%%eax, %1),(%%eax, %1, 2) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
DERING_CORE((%%eax, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
-DERING_CORE((%0, %1, 4),(%%ebx) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%%ebx),(%%ebx, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
-DERING_CORE((%%ebx, %1), (%%ebx, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
-DERING_CORE((%%ebx, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%0, %1, 8),(%%ebx, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
+DERING_CORE((%0, %1, 4),(%%edx) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
+DERING_CORE((%%edx),(%%edx, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
+DERING_CORE((%%edx, %1), (%%edx, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
+DERING_CORE((%%edx, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
+DERING_CORE((%0, %1, 8),(%%edx, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
"1: \n\t"
- : : "r" (src), "r" (stride), "r" (QP)
- : "%eax", "%ebx", "%ecx"
+ : : "r" (src), "r" (stride), "m" (c->pQPb), "m"(c->pQPb2)
+ : "%eax", "%edx", "%ecx"
);
#else
int y;
@@ -1456,6 +1377,7 @@ DERING_CORE((%0, %1, 8),(%%ebx, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm
int avg;
uint8_t *p;
int s[10];
+ const int QP2= c->QP/2 + 1;
for(y=1; y<9; y++)
{
@@ -1468,30 +1390,41 @@ DERING_CORE((%0, %1, 8),(%%ebx, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm
if(*p < min) min= *p;
}
}
- avg= (min + max + 1)/2;
+ avg= (min + max + 1)>>1;
if(max - min <deringThreshold) return;
for(y=0; y<10; y++)
{
- int x;
int t = 0;
- p= src + stride*y;
- for(x=0; x<10; x++)
- {
- if(*p > avg) t |= (1<<x);
- p++;
- }
+
+ if(src[stride*y + 0] > avg) t+= 1;
+ if(src[stride*y + 1] > avg) t+= 2;
+ if(src[stride*y + 2] > avg) t+= 4;
+ if(src[stride*y + 3] > avg) t+= 8;
+ if(src[stride*y + 4] > avg) t+= 16;
+ if(src[stride*y + 5] > avg) t+= 32;
+ if(src[stride*y + 6] > avg) t+= 64;
+ if(src[stride*y + 7] > avg) t+= 128;
+ if(src[stride*y + 8] > avg) t+= 256;
+ if(src[stride*y + 9] > avg) t+= 512;
+
t |= (~t)<<16;
t &= (t<<1) & (t>>1);
s[y] = t;
}
-
+
for(y=1; y<9; y++)
{
- int x;
int t = s[y-1] & s[y] & s[y+1];
t|= t>>16;
+ s[y-1]= t;
+ }
+
+ for(y=1; y<9; y++)
+ {
+ int x;
+ int t = s[y-1];
p= src + stride*y;
for(x=1; x<9; x++)
@@ -1544,8 +1477,8 @@ DERING_CORE((%0, %1, 8),(%%ebx, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm
}
}
#endif
- if (*p + 2*QP < f) *p= *p + 2*QP;
- else if(*p - 2*QP > f) *p= *p - 2*QP;
+ if (*p + QP2 < f) *p= *p + QP2;
+ else if(*p - QP2 > f) *p= *p - QP2;
else *p=f;
}
}
@@ -1582,9 +1515,9 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
src+= 4*stride;
asm volatile(
"leal (%0, %1), %%eax \n\t"
- "leal (%%eax, %1, 4), %%ebx \n\t"
+ "leal (%%eax, %1, 4), %%ecx \n\t"
// 0 1 2 3 4 5 6 7 8 9
-// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
+// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1
"movq (%0), %%mm0 \n\t"
"movq (%%eax, %1), %%mm1 \n\t"
@@ -1593,15 +1526,15 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
"movq (%0, %1, 4), %%mm0 \n\t"
PAVGB(%%mm0, %%mm1)
"movq %%mm1, (%%eax, %1, 2) \n\t"
- "movq (%%ebx, %1), %%mm1 \n\t"
+ "movq (%%ecx, %1), %%mm1 \n\t"
PAVGB(%%mm1, %%mm0)
- "movq %%mm0, (%%ebx) \n\t"
+ "movq %%mm0, (%%ecx) \n\t"
"movq (%0, %1, 8), %%mm0 \n\t"
PAVGB(%%mm0, %%mm1)
- "movq %%mm1, (%%ebx, %1, 2) \n\t"
+ "movq %%mm1, (%%ecx, %1, 2) \n\t"
: : "r" (src), "r" (stride)
- : "%eax", "%ebx"
+ : "%eax", "%ecx"
);
#else
int x;
@@ -1631,12 +1564,12 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride
src+= stride*3;
asm volatile(
"leal (%0, %1), %%eax \n\t"
- "leal (%%eax, %1, 4), %%ebx \n\t"
- "leal (%%ebx, %1, 4), %%ecx \n\t"
+ "leal (%%eax, %1, 4), %%edx \n\t"
+ "leal (%%edx, %1, 4), %%ecx \n\t"
"addl %1, %%ecx \n\t"
"pxor %%mm7, %%mm7 \n\t"
// 0 1 2 3 4 5 6 7 8 9 10
-// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 ecx
+// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx
#define DEINT_CUBIC(a,b,c,d,e)\
"movq " #a ", %%mm0 \n\t"\
@@ -1660,13 +1593,13 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride
"packuswb %%mm3, %%mm1 \n\t"\
"movq %%mm1, " #c " \n\t"
-DEINT_CUBIC((%0), (%%eax, %1), (%%eax, %1, 2), (%0, %1, 4), (%%ebx, %1))
-DEINT_CUBIC((%%eax, %1), (%0, %1, 4), (%%ebx), (%%ebx, %1), (%0, %1, 8))
-DEINT_CUBIC((%0,