summaryrefslogtreecommitdiffstats
path: root/libmpcodecs/vf_ilpack.c
diff options
context:
space:
mode:
authorrfelker <rfelker@b3059339-0415-0410-9bf9-f77b7e298cf2>2003-12-15 04:07:17 +0000
committerrfelker <rfelker@b3059339-0415-0410-9bf9-f77b7e298cf2>2003-12-15 04:07:17 +0000
commitedf2a8bb43c6d5b58a28d60ee27d8764df8a64e1 (patch)
tree1f709a56fc12d065bb348346d33019a51e769c84 /libmpcodecs/vf_ilpack.c
parent76bfd0eb9f0bb7155472a0802ebbe9da9c5a6873 (diff)
downloadmpv-edf2a8bb43c6d5b58a28d60ee27d8764df8a64e1.tar.bz2
mpv-edf2a8bb43c6d5b58a28d60ee27d8764df8a64e1.tar.xz
mmx optimizations
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@11646 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libmpcodecs/vf_ilpack.c')
-rw-r--r--libmpcodecs/vf_ilpack.c256
1 files changed, 255 insertions, 1 deletions
diff --git a/libmpcodecs/vf_ilpack.c b/libmpcodecs/vf_ilpack.c
index 068135d2d1..1d9a7341f6 100644
--- a/libmpcodecs/vf_ilpack.c
+++ b/libmpcodecs/vf_ilpack.c
@@ -109,6 +109,256 @@ static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
);
pack_nn_C(dst, y, u, v, (w&7));
}
+
+static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
+ unsigned char *u, unsigned char *v, int w, int us, int vs)
+{
+ asm volatile (""
+ "pushl %%ebp \n\t"
+ "movl 4(%%edx), %%ebp \n\t"
+ "movl (%%edx), %%edx \n\t"
+ "pxor %%mm0, %%mm0 \n\t"
+
+ ".balign 16 \n\t"
+ ".Lli0: \n\t"
+ "movq (%%esi), %%mm1 \n\t"
+ "movq (%%esi), %%mm2 \n\t"
+ "punpcklbw %%mm0, %%mm1 \n\t"
+ "punpckhbw %%mm0, %%mm2 \n\t"
+
+ "movq (%%eax,%%edx,2), %%mm4 \n\t"
+ "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
+ "punpcklbw %%mm0, %%mm4 \n\t"
+ "punpcklbw %%mm0, %%mm6 \n\t"
+ "movq (%%eax), %%mm3 \n\t"
+ "movq (%%ebx), %%mm5 \n\t"
+ "punpcklbw %%mm0, %%mm3 \n\t"
+ "punpcklbw %%mm0, %%mm5 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "psrlw $3, %%mm4 \n\t"
+ "psrlw $3, %%mm6 \n\t"
+ "movq %%mm4, %%mm3 \n\t"
+ "movq %%mm6, %%mm5 \n\t"
+ "punpcklwd %%mm0, %%mm3 \n\t"
+ "punpckhwd %%mm0, %%mm4 \n\t"
+ "punpcklwd %%mm0, %%mm5 \n\t"
+ "punpckhwd %%mm0, %%mm6 \n\t"
+ "pslld $8, %%mm3 \n\t"
+ "pslld $8, %%mm4 \n\t"
+ "pslld $24, %%mm5 \n\t"
+ "pslld $24, %%mm6 \n\t"
+
+ "por %%mm3, %%mm1 \n\t"
+ "por %%mm4, %%mm2 \n\t"
+ "por %%mm5, %%mm1 \n\t"
+ "por %%mm6, %%mm2 \n\t"
+
+ "movq %%mm1, (%%edi) \n\t"
+ "movq %%mm2, 8(%%edi) \n\t"
+
+ "movq 8(%%esi), %%mm1 \n\t"
+ "movq 8(%%esi), %%mm2 \n\t"
+ "punpcklbw %%mm0, %%mm1 \n\t"
+ "punpckhbw %%mm0, %%mm2 \n\t"
+
+ "movq (%%eax,%%edx,2), %%mm4 \n\t"
+ "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
+ "punpckhbw %%mm0, %%mm4 \n\t"
+ "punpckhbw %%mm0, %%mm6 \n\t"
+ "movq (%%eax), %%mm3 \n\t"
+ "movq (%%ebx), %%mm5 \n\t"
+ "punpckhbw %%mm0, %%mm3 \n\t"
+ "punpckhbw %%mm0, %%mm5 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "psrlw $3, %%mm4 \n\t"
+ "psrlw $3, %%mm6 \n\t"
+ "movq %%mm4, %%mm3 \n\t"
+ "movq %%mm6, %%mm5 \n\t"
+ "punpcklwd %%mm0, %%mm3 \n\t"
+ "punpckhwd %%mm0, %%mm4 \n\t"
+ "punpcklwd %%mm0, %%mm5 \n\t"
+ "punpckhwd %%mm0, %%mm6 \n\t"
+ "pslld $8, %%mm3 \n\t"
+ "pslld $8, %%mm4 \n\t"
+ "pslld $24, %%mm5 \n\t"
+ "pslld $24, %%mm6 \n\t"
+
+ "por %%mm3, %%mm1 \n\t"
+ "por %%mm4, %%mm2 \n\t"
+ "por %%mm5, %%mm1 \n\t"
+ "por %%mm6, %%mm2 \n\t"
+
+ "addl $16, %%esi \n\t"
+ "addl $8, %%eax \n\t"
+ "addl $8, %%ebx \n\t"
+
+ "movq %%mm1, 16(%%edi) \n\t"
+ "movq %%mm2, 24(%%edi) \n\t"
+ "addl $32, %%edi \n\t"
+
+ "decl %%ecx \n\t"
+ "jnz .Lli0 \n\t"
+ "emms \n\t"
+ "popl %%ebp \n\t"
+ :
+ : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16)
+ : "memory"
+ );
+ pack_li_0_C(dst, y, u, v, (w&15), us, vs);
+}
+
+static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
+ unsigned char *u, unsigned char *v, int w, int us, int vs)
+{
+ asm volatile (""
+ "pushl %%ebp \n\t"
+ "movl 4(%%edx), %%ebp \n\t"
+ "movl (%%edx), %%edx \n\t"
+ "pxor %%mm0, %%mm0 \n\t"
+
+ ".balign 16 \n\t"
+ ".Lli1: \n\t"
+ "movq (%%esi), %%mm1 \n\t"
+ "movq (%%esi), %%mm2 \n\t"
+ "punpcklbw %%mm0, %%mm1 \n\t"
+ "punpckhbw %%mm0, %%mm2 \n\t"
+
+ "movq (%%eax,%%edx,2), %%mm4 \n\t"
+ "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
+ "punpcklbw %%mm0, %%mm4 \n\t"
+ "punpcklbw %%mm0, %%mm6 \n\t"
+ "movq (%%eax), %%mm3 \n\t"
+ "movq (%%ebx), %%mm5 \n\t"
+ "punpcklbw %%mm0, %%mm3 \n\t"
+ "punpcklbw %%mm0, %%mm5 \n\t"
+ "movq %%mm4, %%mm7 \n\t"
+ "paddw %%mm4, %%mm4 \n\t"
+ "paddw %%mm7, %%mm4 \n\t"
+ "movq %%mm6, %%mm7 \n\t"
+ "paddw %%mm6, %%mm6 \n\t"
+ "paddw %%mm7, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "psrlw $3, %%mm4 \n\t"
+ "psrlw $3, %%mm6 \n\t"
+ "movq %%mm4, %%mm3 \n\t"
+ "movq %%mm6, %%mm5 \n\t"
+ "punpcklwd %%mm0, %%mm3 \n\t"
+ "punpckhwd %%mm0, %%mm4 \n\t"
+ "punpcklwd %%mm0, %%mm5 \n\t"
+ "punpckhwd %%mm0, %%mm6 \n\t"
+ "pslld $8, %%mm3 \n\t"
+ "pslld $8, %%mm4 \n\t"
+ "pslld $24, %%mm5 \n\t"
+ "pslld $24, %%mm6 \n\t"
+
+ "por %%mm3, %%mm1 \n\t"
+ "por %%mm4, %%mm2 \n\t"
+ "por %%mm5, %%mm1 \n\t"
+ "por %%mm6, %%mm2 \n\t"
+
+ "movq %%mm1, (%%edi) \n\t"
+ "movq %%mm2, 8(%%edi) \n\t"
+
+ "movq 8(%%esi), %%mm1 \n\t"
+ "movq 8(%%esi), %%mm2 \n\t"
+ "punpcklbw %%mm0, %%mm1 \n\t"
+ "punpckhbw %%mm0, %%mm2 \n\t"
+
+ "movq (%%eax,%%edx,2), %%mm4 \n\t"
+ "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
+ "punpckhbw %%mm0, %%mm4 \n\t"
+ "punpckhbw %%mm0, %%mm6 \n\t"
+ "movq (%%eax), %%mm3 \n\t"
+ "movq (%%ebx), %%mm5 \n\t"
+ "punpckhbw %%mm0, %%mm3 \n\t"
+ "punpckhbw %%mm0, %%mm5 \n\t"
+ "movq %%mm4, %%mm7 \n\t"
+ "paddw %%mm4, %%mm4 \n\t"
+ "paddw %%mm7, %%mm4 \n\t"
+ "movq %%mm6, %%mm7 \n\t"
+ "paddw %%mm6, %%mm6 \n\t"
+ "paddw %%mm7, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "psrlw $3, %%mm4 \n\t"
+ "psrlw $3, %%mm6 \n\t"
+ "movq %%mm4, %%mm3 \n\t"
+ "movq %%mm6, %%mm5 \n\t"
+ "punpcklwd %%mm0, %%mm3 \n\t"
+ "punpckhwd %%mm0, %%mm4 \n\t"
+ "punpcklwd %%mm0, %%mm5 \n\t"
+ "punpckhwd %%mm0, %%mm6 \n\t"
+ "pslld $8, %%mm3 \n\t"
+ "pslld $8, %%mm4 \n\t"
+ "pslld $24, %%mm5 \n\t"
+ "pslld $24, %%mm6 \n\t"
+
+ "por %%mm3, %%mm1 \n\t"
+ "por %%mm4, %%mm2 \n\t"
+ "por %%mm5, %%mm1 \n\t"
+ "por %%mm6, %%mm2 \n\t"
+
+ "addl $16, %%esi \n\t"
+ "addl $8, %%eax \n\t"
+ "addl $8, %%ebx \n\t"
+
+ "movq %%mm1, 16(%%edi) \n\t"
+ "movq %%mm2, 24(%%edi) \n\t"
+ "addl $32, %%edi \n\t"
+
+ "decl %%ecx \n\t"
+ "jnz .Lli1 \n\t"
+ "emms \n\t"
+ "popl %%ebp \n\t"
+ :
+ : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16)
+ : "memory"
+ );
+ pack_li_1_C(dst, y, u, v, (w&15), us, vs);
+}
#endif
static pack_func_t *pack_nn;
@@ -199,7 +449,11 @@ static int open(vf_instance_t *vf, char* args)
pack_li_0 = pack_li_0_C;
pack_li_1 = pack_li_1_C;
#ifdef HAVE_MMX
- if(gCpuCaps.hasMMX) pack_nn = (pack_func_t *)pack_nn_MMX;
+ if(gCpuCaps.hasMMX) {
+ pack_nn = (pack_func_t *)pack_nn_MMX;
+ pack_li_0 = pack_li_0_MMX;
+ pack_li_1 = pack_li_1_MMX;
+ }
#endif
switch(vf->priv->mode) {