// Generic alpha renderers for all YUV modes and RGB depths. // Optimized by Nick and Michael // Code from Michael Niedermayer (michaelni@gmx.at) is under GPL #undef PREFETCH #undef EMMS #undef PREFETCHW #undef PAVGB #ifdef HAVE_3DNOW #define PREFETCH "prefetch" #define PREFETCHW "prefetchw" #define PAVGB "pavgusb" #elif defined ( HAVE_MMX2 ) #define PREFETCH "prefetchnta" #define PREFETCHW "prefetcht0" #define PAVGB "pavgb" #else #define PREFETCH " # nop" #define PREFETCHW " # nop" #endif #ifdef HAVE_3DNOW /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ #define EMMS "femms" #else #define EMMS "emms" #endif static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; #if defined(FAST_OSD) && !defined(HAVE_MMX) w=w>>1; #endif #ifdef HAVE_MMX __asm__ volatile( "pcmpeqb %%mm5, %%mm5\n\t" // F..F "movq %%mm5, %%mm4\n\t" "movq %%mm5, %%mm7\n\t" "psllw $8, %%mm5\n\t" //FF00FF00FF00 "psrlw $8, %%mm4\n\t" //00FF00FF00FF ::); #endif for(y=0;y>8)+src[x]; #endif } #endif src+=srcstride; srca+=srcstride; dstbase+=dststride; } #ifdef HAVE_MMX __asm__ volatile(EMMS:::"memory"); #endif return; } static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; #if defined(FAST_OSD) && !defined(HAVE_MMX) w=w>>1; #endif #ifdef HAVE_MMX __asm__ volatile( "pxor %%mm7, %%mm7\n\t" "pcmpeqb %%mm5, %%mm5\n\t" // F..F "movq %%mm5, %%mm6\n\t" "movq %%mm5, %%mm4\n\t" "psllw $8, %%mm5\n\t" //FF00FF00FF00 "psrlw $8, %%mm4\n\t" //00FF00FF00FF ::); #endif for(y=0;y>8)+src[x]; dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128; } #endif } #endif src+=srcstride; srca+=srcstride; dstbase+=dststride; } #ifdef HAVE_MMX __asm__ volatile(EMMS:::"memory"); #endif return; } static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; #if defined(FAST_OSD) w=w>>1; #endif for(y=0;y>8)+src[x]; dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128; } #endif } src+=srcstride; srca+=srcstride; dstbase+=dststride; } } static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; #ifdef HAVE_MMX __asm__ volatile( "pxor %%mm7, %%mm7\n\t" "pcmpeqb %%mm6, %%mm6\n\t" // F..F ::); #endif for(y=0;y>8)+src[x]; dst[1]=((dst[1]*srca[x])>>8)+src[x]; dst[2]=((dst[2]*srca[x])>>8)+src[x]; #endif } dst+=3; // 24bpp } #endif /* arch_x86 */ src+=srcstride; srca+=srcstride; dstbase+=dststride; } #ifdef HAVE_MMX __asm__ volatile(EMMS:::"memory"); #endif return; } static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; #ifdef WORDS_BIGENDIAN dstbase++; #endif #ifdef HAVE_MMX #ifdef HAVE_3DNOW __asm__ volatile( "pxor %%mm7, %%mm7\n\t" "pcmpeqb %%mm6, %%mm6\n\t" // F..F ::); #else /* HAVE_3DNOW */ __asm__ volatile( "pxor %%mm7, %%mm7\n\t" "pcmpeqb %%mm5, %%mm5\n\t" // F..F "movq %%mm5, %%mm4\n\t" "psllw $8, %%mm5\n\t" //FF00FF00FF00 "psrlw $8, %%mm4\n\t" //00FF00FF00FF ::); #endif /* HAVE_3DNOW */ #endif /* HAVE_MMX */ for(y=0;y>8)+src[x]; dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; #endif } } #endif /* arch_x86 */ src+=srcstride; srca+=srcstride; dstbase+=dststride; } #ifdef HAVE_MMX __asm__ volatile(EMMS:::"memory"); #endif return; }