diff options
author | michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-10-30 22:35:02 +0000 |
---|---|---|
committer | michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-10-30 22:35:02 +0000 |
commit | 028ee068e45f4e8d63a185efcb0bc1026447cc89 (patch) | |
tree | 148dd06f00300e1ac84ecef7480e4b64c05227a6 /libvo | |
parent | 7627e4061575ca0b47cfd72d4b292ef835f36f30 (diff) | |
download | mpv-028ee068e45f4e8d63a185efcb0bc1026447cc89.tar.bz2 mpv-028ee068e45f4e8d63a185efcb0bc1026447cc89.tar.xz |
slow mmx & not so slow asm versions (outcommented)
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2579 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libvo')
-rw-r--r-- | libvo/osd.c | 77 | ||||
-rw-r--r-- | libvo/osd_template.c | 77 |
2 files changed, 154 insertions, 0 deletions
diff --git a/libvo/osd.c b/libvo/osd.c index a870733edc..d27ab86c7e 100644 --- a/libvo/osd.c +++ b/libvo/osd.c @@ -79,6 +79,76 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i int y; for(y=0;y<h;y++){ register int x; +// printf("%d, %d, %d\n", (int)src&31, (int)srca%31, (int)dstbase&31); +#ifdef HAVE_MMXFIXME +/* asm( + "pxor %%mm7, %%mm7 \n\t" + "xorl %%eax, %%eax \n\t" + "pcmpeqb %%mm6, %%mm6 \n\t" // F..F + "1: \n\t" + "movq (%0, %%eax, 4), %%mm0 \n\t" // dstbase + "movq %%mm0, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "movd (%1, %%eax), %%mm2 \n\t" // srca ABCD0000 + "paddb %%mm6, %%mm2 \n\t" + "punpcklbw %%mm2, %%mm2 \n\t" // srca AABBCCDD + "punpcklbw %%mm2, %%mm2 \n\t" // srca AAAABBBB + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" // srca 0A0A0A0A + "punpckhbw %%mm7, %%mm3 \n\t" // srca 0B0B0B0B + "pmullw %%mm2, %%mm0 \n\t" + "pmullw %%mm3, %%mm1 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000 + "punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD + "punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB + "paddb %%mm2, %%mm0 \n\t" + "movq %%mm0, (%0, %%eax, 4) \n\t" + "addl $2, %%eax \n\t" + "cmpl %3, %%eax \n\t" + " jb 1b \n\t" + + :: "r" (dstbase), "r" (srca), "r" (src), "r" (w) + : "%eax" + );*/ + asm( + "xorl %%eax, %%eax \n\t" + "xorl %%ebx, %%ebx \n\t" + "xorl %%edx, %%edx \n\t" + "1: \n\t" + "movb (%1, %%eax), %%bl \n\t" + "cmpb $0, %%bl \n\t" + " jz 2f \n\t" + "movzxb (%2, %%eax), %%edx \n\t" + "shll $8, %%edx \n\t" + "decb %%bl \n\t" + "movzxb (%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, (%0, %%eax, 4) \n\t" + + "movzxb 1(%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, 1(%0, %%eax, 4) \n\t" + + "movzxb 2(%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, 2(%0, %%eax, 4) \n\t" + + "2: \n\t" + "addl $1, %%eax \n\t" + "cmpl %3, %%eax \n\t" + " jb 1b \n\t" + + :: "r" (dstbase), "r" (srca), "r" (src), "m" (w) + : "%eax", "%ebx", "%ecx", "%edx" + ); +#else //HAVE_MMX for(x=0;x<w;x++){ if(srca[x]){ #ifdef FAST_OSD @@ -90,10 +160,17 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i #endif } } +#endif // !HAVE_MMX src+=srcstride; srca+=srcstride; dstbase+=dststride; } +#ifdef HAVE_3DNOW + asm("femms\n\t"); +#elif defined (HAVE_MMX) + asm("emms\n\t"); +#endif + return; } diff --git a/libvo/osd_template.c b/libvo/osd_template.c index a870733edc..d27ab86c7e 100644 --- a/libvo/osd_template.c +++ b/libvo/osd_template.c @@ -79,6 +79,76 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i int y; for(y=0;y<h;y++){ register int x; +// printf("%d, %d, %d\n", (int)src&31, (int)srca%31, (int)dstbase&31); +#ifdef HAVE_MMXFIXME +/* asm( + "pxor %%mm7, %%mm7 \n\t" + "xorl %%eax, %%eax \n\t" + "pcmpeqb %%mm6, %%mm6 \n\t" // F..F + "1: \n\t" + "movq (%0, %%eax, 4), %%mm0 \n\t" // dstbase + "movq %%mm0, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "movd (%1, %%eax), %%mm2 \n\t" // srca ABCD0000 + "paddb %%mm6, %%mm2 \n\t" + "punpcklbw %%mm2, %%mm2 \n\t" // srca AABBCCDD + "punpcklbw %%mm2, %%mm2 \n\t" // srca AAAABBBB + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" // srca 0A0A0A0A + "punpckhbw %%mm7, %%mm3 \n\t" // srca 0B0B0B0B + "pmullw %%mm2, %%mm0 \n\t" + "pmullw %%mm3, %%mm1 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000 + "punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD + "punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB + "paddb %%mm2, %%mm0 \n\t" + "movq %%mm0, (%0, %%eax, 4) \n\t" + "addl $2, %%eax \n\t" + "cmpl %3, %%eax \n\t" + " jb 1b \n\t" + + :: "r" (dstbase), "r" (srca), "r" (src), "r" (w) + : "%eax" + );*/ + asm( + "xorl %%eax, %%eax \n\t" + "xorl %%ebx, %%ebx \n\t" + "xorl %%edx, %%edx \n\t" + "1: \n\t" + "movb (%1, %%eax), %%bl \n\t" + "cmpb $0, %%bl \n\t" + " jz 2f \n\t" + "movzxb (%2, %%eax), %%edx \n\t" + "shll $8, %%edx \n\t" + "decb %%bl \n\t" + "movzxb (%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, (%0, %%eax, 4) \n\t" + + "movzxb 1(%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, 1(%0, %%eax, 4) \n\t" + + "movzxb 2(%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, 2(%0, %%eax, 4) \n\t" + + "2: \n\t" + "addl $1, %%eax \n\t" + "cmpl %3, %%eax \n\t" + " jb 1b \n\t" + + :: "r" (dstbase), "r" (srca), "r" (src), "m" (w) + : "%eax", "%ebx", "%ecx", "%edx" + ); +#else //HAVE_MMX for(x=0;x<w;x++){ if(srca[x]){ #ifdef FAST_OSD @@ -90,10 +160,17 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i #endif } } +#endif // !HAVE_MMX src+=srcstride; srca+=srcstride; dstbase+=dststride; } +#ifdef HAVE_3DNOW + asm("femms\n\t"); +#elif defined (HAVE_MMX) + asm("emms\n\t"); +#endif + return; } |