From 6778e88bf506e3797d4ba576929435c1d08ecfba Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 11 Nov 2001 22:14:13 +0000 Subject: p2/p3 bgr32 version (20%faster) yv12 and yuy2 versions in mmx git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2847 b3059339-0415-0410-9bf9-f77b7e298cf2 --- libvo/osd.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 158 insertions(+), 3 deletions(-) (limited to 'libvo/osd.c') diff --git a/libvo/osd.c b/libvo/osd.c index fe3fa9b89d..74574822fa 100644 --- a/libvo/osd.c +++ b/libvo/osd.c @@ -1,5 +1,6 @@ // Generic alpha renderers for all YUV modes and RGB depths. // These are "reference implementations", should be optimized later (MMX, etc) +// Optimized by Nick and Michael //#define FAST_OSD //#define FAST_OSD_TABLE @@ -9,14 +10,60 @@ #include "../mmx_defs.h" //#define ENABLE_PROFILE #include "../my_profile.h" +#include + +#ifndef HAVE_3DNOW +static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; +#endif void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; -#ifdef FAST_OSD +#if defined(FAST_OSD) && !defined(HAVE_MMX) w=w>>1; #endif +PROFILE_START(); for(y=0;y>8)+src[x]; #endif } +#endif src+=srcstride; srca+=srcstride; dstbase+=dststride; } +#ifdef HAVE_MMX + asm volatile(EMMS:::"memory"); +#endif +PROFILE_END("vo_draw_alpha_yv12"); return; } void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; -#ifdef FAST_OSD +#if defined(FAST_OSD) && !defined(HAVE_MMX) w=w>>1; #endif +PROFILE_START(); for(y=0;y>8)+src[x]; #endif } - src+=srcstride; +#endif + src+=srcstride; srca+=srcstride; dstbase+=dststride; } +#ifdef HAVE_MMX + asm volatile(EMMS:::"memory"); +#endif +PROFILE_END("vo_draw_alpha_yuy2"); return; } @@ -167,6 +263,7 @@ PROFILE_START(); register int x; #ifdef ARCH_X86 #ifdef HAVE_MMX +#ifdef HAVE_3DNOW asm volatile( PREFETCHW" %0\n\t" PREFETCH" %1\n\t" @@ -203,6 +300,64 @@ PROFILE_START(); "movq %%mm0, %0\n\t" :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])); } +#else //this is faster for intels crap + asm volatile( + PREFETCHW" %0\n\t" + PREFETCH" %1\n\t" + PREFETCH" %2\n\t" + "pxor %%mm7, %%mm7\n\t" + "pcmpeqb %%mm5, %%mm5\n\t" // F..F + "movq %%mm5, %%mm4\n\t" + "psllw $8, %%mm5\n\t" //FF00FF00FF00 + "psrlw $8, %%mm4\n\t" //00FF00FF00FF + ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); + for(x=0;x