summaryrefslogtreecommitdiffstats
path: root/postproc
diff options
context:
space:
mode:
authormichael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-10-23 10:29:48 +0000
committermichael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-10-23 10:29:48 +0000
commit2c4c72959daf97d974d1238a60a28dc6abd8556f (patch)
tree97826499e99064b757f97c8e7f3a38562c7a3471 /postproc
parentbd07786382b38d7637ae0db0342c1c7bc20905d2 (diff)
downloadmpv-2c4c72959daf97d974d1238a60a28dc6abd8556f.tar.bz2
mpv-2c4c72959daf97d974d1238a60a28dc6abd8556f.tar.xz
optimizations (+2% speedup)
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2414 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'postproc')
-rw-r--r--postproc/postprocess.c45
-rw-r--r--postproc/postprocess_template.c45
2 files changed, 32 insertions, 58 deletions
diff --git a/postproc/postprocess.c b/postproc/postprocess.c
index df2cd3cb3b..b32d3fe032 100644
--- a/postproc/postprocess.c
+++ b/postproc/postprocess.c
@@ -214,54 +214,50 @@ static inline int isVertDC(uint8_t src[], int stride){
int y;
src+= stride*4; // src points to begin of the 8x8 Block
#ifdef HAVE_MMX
- asm volatile(
- "pushl %1\n\t"
+asm volatile(
+ "leal (%1, %2), %%eax \n\t"
+ "leal (%%eax, %2, 4), %%ebx \n\t"
+// 0 1 2 3 4 5 6 7 8 9
+// %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2
"movq b7E, %%mm7 \n\t" // mm7 = 0x7F
"movq b7C, %%mm6 \n\t" // mm6 = 0x7D
"movq (%1), %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
+ "movq (%%eax), %%mm1 \n\t"
"psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
"paddb %%mm7, %%mm0 \n\t"
"pcmpgtb %%mm6, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm2 \n\t"
+ "movq (%%eax,%2), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
+ "movq (%%eax, %2, 2), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t"
"paddb %%mm2, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm2 \n\t"
+ "movq (%1, %2, 4), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
+ "movq (%%ebx), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t"
"paddb %%mm2, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm2 \n\t"
+ "movq (%%ebx, %2), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
+ "movq (%%ebx, %2, 2), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t"
@@ -277,17 +273,12 @@ static inline int isVertDC(uint8_t src[], int stride){
"movq %%mm0, %%mm1 \n\t"
"psrlq $32, %%mm0 \n\t"
"paddb %%mm1, %%mm0 \n\t"
- "popl %1\n\t"
"movd %%mm0, %0 \n\t"
: "=r" (numEq)
: "r" (src), "r" (stride)
);
-// printf("%d\n", numEq);
- numEq= (256 - (numEq & 0xFF)) &0xFF;
-// int asmEq= numEq;
-// numEq=0;
-// uint8_t *temp= src;
+ numEq= (256 - numEq) &0xFF;
#else
for(y=0; y<BLOCK_SIZE-1; y++)
@@ -2491,8 +2482,6 @@ static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int sr
{
#ifdef HAVE_MMX
asm volatile(
- "pushl %0 \n\t"
- "pushl %1 \n\t"
"leal (%2,%2), %%eax \n\t"
"leal (%3,%3), %%ebx \n\t"
"movq packedYOffset, %%mm2 \n\t"
@@ -2534,11 +2523,9 @@ SCALED_CPY
"addl %%ebx, %1 \n\t"
SCALED_CPY
- "popl %1 \n\t"
- "popl %0 \n\t"
- : : "r" (src),
- "r" (dst),
- "r" (srcStride),
+ : "+r"(src),
+ "+r"(dst)
+ :"r" (srcStride),
"r" (dstStride)
: "%eax", "%ebx"
);
diff --git a/postproc/postprocess_template.c b/postproc/postprocess_template.c
index df2cd3cb3b..b32d3fe032 100644
--- a/postproc/postprocess_template.c
+++ b/postproc/postprocess_template.c
@@ -214,54 +214,50 @@ static inline int isVertDC(uint8_t src[], int stride){
int y;
src+= stride*4; // src points to begin of the 8x8 Block
#ifdef HAVE_MMX
- asm volatile(
- "pushl %1\n\t"
+asm volatile(
+ "leal (%1, %2), %%eax \n\t"
+ "leal (%%eax, %2, 4), %%ebx \n\t"
+// 0 1 2 3 4 5 6 7 8 9
+// %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2
"movq b7E, %%mm7 \n\t" // mm7 = 0x7F
"movq b7C, %%mm6 \n\t" // mm6 = 0x7D
"movq (%1), %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
+ "movq (%%eax), %%mm1 \n\t"
"psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
"paddb %%mm7, %%mm0 \n\t"
"pcmpgtb %%mm6, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm2 \n\t"
+ "movq (%%eax,%2), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
+ "movq (%%eax, %2, 2), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t"
"paddb %%mm2, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm2 \n\t"
+ "movq (%1, %2, 4), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
+ "movq (%%ebx), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t"
"paddb %%mm2, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm2 \n\t"
+ "movq (%%ebx, %2), %%mm2 \n\t"
"psubb %%mm2, %%mm1 \n\t"
"paddb %%mm7, %%mm1 \n\t"
"pcmpgtb %%mm6, %%mm1 \n\t"
"paddb %%mm1, %%mm0 \n\t"
- "addl %2, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
+ "movq (%%ebx, %2, 2), %%mm1 \n\t"
"psubb %%mm1, %%mm2 \n\t"
"paddb %%mm7, %%mm2 \n\t"
"pcmpgtb %%mm6, %%mm2 \n\t"
@@ -277,17 +273,12 @@ static inline int isVertDC(uint8_t src[], int stride){
"movq %%mm0, %%mm1 \n\t"
"psrlq $32, %%mm0 \n\t"
"paddb %%mm1, %%mm0 \n\t"
- "popl %1\n\t"
"movd %%mm0, %0 \n\t"
: "=r" (numEq)
: "r" (src), "r" (stride)
);
-// printf("%d\n", numEq);
- numEq= (256 - (numEq & 0xFF)) &0xFF;
-// int asmEq= numEq;
-// numEq=0;
-// uint8_t *temp= src;
+ numEq= (256 - numEq) &0xFF;
#else
for(y=0; y<BLOCK_SIZE-1; y++)
@@ -2491,8 +2482,6 @@ static inline void blockCopy(uint8_t dst[], int dstStride, uint8_t src[], int sr
{
#ifdef HAVE_MMX
asm volatile(
- "pushl %0 \n\t"
- "pushl %1 \n\t"
"leal (%2,%2), %%eax \n\t"
"leal (%3,%3), %%ebx \n\t"
"movq packedYOffset, %%mm2 \n\t"
@@ -2534,11 +2523,9 @@ SCALED_CPY
"addl %%ebx, %1 \n\t"
SCALED_CPY
- "popl %1 \n\t"
- "popl %0 \n\t"
- : : "r" (src),
- "r" (dst),
- "r" (srcStride),
+ : "+r"(src),
+ "+r"(dst)
+ :"r" (srcStride),
"r" (dstStride)
: "%eax", "%ebx"
);