summaryrefslogtreecommitdiffstats
path: root/libmpcodecs
diff options
context:
space:
mode:
authorrfelker <rfelker@b3059339-0415-0410-9bf9-f77b7e298cf2>2003-08-31 21:12:44 +0000
committerrfelker <rfelker@b3059339-0415-0410-9bf9-f77b7e298cf2>2003-08-31 21:12:44 +0000
commitfc253c8c757f13064fb074d7382a973f73a54a97 (patch)
tree469000f5c42d648d4b8d13e1c8c035ac1c4a048b /libmpcodecs
parent4736af8f1acd2de465f7f2b8b9f8f0314bd1e837 (diff)
downloadmpv-fc253c8c757f13064fb074d7382a973f73a54a97.tar.bz2
mpv-fc253c8c757f13064fb074d7382a973f73a54a97.tar.xz
more mmx and fix 100l sig11 in the previous mmx commit
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@10748 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libmpcodecs')
-rw-r--r--libmpcodecs/pullup.c107
1 files changed, 104 insertions, 3 deletions
diff --git a/libmpcodecs/pullup.c b/libmpcodecs/pullup.c
index b994556901..5468f5aa22 100644
--- a/libmpcodecs/pullup.c
+++ b/libmpcodecs/pullup.c
@@ -45,12 +45,102 @@ static int diff_y_mmx(unsigned char *a, unsigned char *b, int s)
"paddd %%mm4, %%mm3 \n\t"
"movd %%mm3, %%eax \n\t"
"psrlq $32, %%mm3 \n\t"
- "movd %%mm3, %%ebx \n\t"
- "addl %%ebx, %%eax \n\t"
+ "movd %%mm3, %%edx \n\t"
+ "addl %%edx, %%eax \n\t"
+ "emms \n\t"
+ : "=a" (ret)
+ : "S" (a), "D" (b), "a" (s)
+ : "%edx"
+ );
+ return ret;
+}
+
+static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s)
+{
+ int ret;
+ asm volatile (
+ "movl $8, %%ecx \n\t"
+ "pxor %%mm6, %%mm6 \n\t"
+ "pxor %%mm7, %%mm7 \n\t"
+ "subl %%eax, %%edi \n\t"
+
+ ".balign 16 \n\t"
+ "2: \n\t"
+
+ "movq (%%esi), %%mm0 \n\t"
+ "movq (%%edi), %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "movq (%%edi,%%eax), %%mm2 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "paddw %%mm0, %%mm0 \n\t"
+ "paddw %%mm2, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "psubusw %%mm1, %%mm0 \n\t"
+ "psubusw %%mm2, %%mm1 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "paddw %%mm1, %%mm6 \n\t"
+
+ "movq (%%esi), %%mm0 \n\t"
+ "movq (%%edi), %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm0 \n\t"
+ "movq (%%edi,%%eax), %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm2 \n\t"
+ "paddw %%mm0, %%mm0 \n\t"
+ "paddw %%mm2, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "psubusw %%mm1, %%mm0 \n\t"
+ "psubusw %%mm2, %%mm1 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "paddw %%mm1, %%mm6 \n\t"
+
+ "movq (%%edi,%%eax), %%mm0 \n\t"
+ "movq (%%esi), %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "movq (%%esi,%%eax), %%mm2 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "paddw %%mm0, %%mm0 \n\t"
+ "paddw %%mm2, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "psubusw %%mm1, %%mm0 \n\t"
+ "psubusw %%mm2, %%mm1 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "paddw %%mm1, %%mm6 \n\t"
+
+ "movq (%%edi,%%eax), %%mm0 \n\t"
+ "movq (%%esi), %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm0 \n\t"
+ "movq (%%esi,%%eax), %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm2 \n\t"
+ "paddw %%mm0, %%mm0 \n\t"
+ "paddw %%mm2, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "psubusw %%mm1, %%mm0 \n\t"
+ "psubusw %%mm2, %%mm1 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+ "paddw %%mm1, %%mm6 \n\t"
+
+ "addl %%eax, %%esi \n\t"
+ "addl %%eax, %%edi \n\t"
+ "decl %%ecx \n\t"
+ "jnz 2b \n\t"
+
+ "movq %%mm6, %%mm5 \n\t"
+ "punpcklwd %%mm7, %%mm6 \n\t"
+ "punpckhwd %%mm7, %%mm5 \n\t"
+ "paddd %%mm6, %%mm5 \n\t"
+ "movd %%mm5, %%eax \n\t"
+ "psrlq $32, %%mm5 \n\t"
+ "movd %%mm5, %%edx \n\t"
+ "addl %%edx, %%eax \n\t"
"emms \n\t"
: "=a" (ret)
: "S" (a), "D" (b), "a" (s)
+ : "%edx"
);
return ret;
}
@@ -80,6 +170,14 @@ static int licomb_y(unsigned char *a, unsigned char *b, int s)
return diff;
}
+static int licomb_y_test(unsigned char *a, unsigned char *b, int s)
+{
+ int c = licomb_y(a,b,s);
+ int m = licomb_y_mmx(a,b,s);
+ if (c != m) printf("%d != %d\n", c, m);
+ return m;
+}
+
@@ -552,7 +650,10 @@ void pullup_init_context(struct pullup_context *c)
c->diff = diff_y;
c->licomb = licomb_y;
#ifdef HAVE_MMX
- if (c->cpu & PULLUP_CPU_MMX) c->diff = diff_y_mmx;
+ if (c->cpu & PULLUP_CPU_MMX) {
+ c->diff = diff_y_mmx;
+ c->licomb = licomb_y_mmx;
+ }
#endif
break;
#if 0