summaryrefslogtreecommitdiffstats
path: root/libvo
diff options
context:
space:
mode:
authornick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-11-11 11:18:50 +0000
committernick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-11-11 11:18:50 +0000
commitf4730e10c1abb403e793d1679b2d957cdc759fe9 (patch)
treed5c2bb8c54b37c32a3bca932113de6c23620f4f1 /libvo
parentd18d0f5a5d0519f582f6002536544aa0b1059413 (diff)
downloadmpv-f4730e10c1abb403e793d1679b2d957cdc759fe9.tar.bz2
mpv-f4730e10c1abb403e793d1679b2d957cdc759fe9.tar.xz
Extract parallelism from OSD stuff + MMX2 optimization.
I've found that mplayer's measuring is not precise :( Here my test with using RDTSC: Old stuff: rd_tsc: 774377 rd_tsc: 765985 rd_tsc: 265309 New CPU optimized stuff: rd_tsc: 661154 rd_tsc: 641317 rd_tsc: 222448 New MMX2 optimized stuff: rd_tsc: 269544 rd_tsc: 329189 rd_tsc: 173110 git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2824 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libvo')
-rw-r--r--libvo/osd.c73
-rw-r--r--libvo/osd_template.c73
2 files changed, 82 insertions, 64 deletions
diff --git a/libvo/osd.c b/libvo/osd.c
index b44c56b676..fb01444f3e 100644
--- a/libvo/osd.c
+++ b/libvo/osd.c
@@ -76,12 +76,25 @@ void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, i
return;
}
+#ifdef PROFILE_ME
+static inline unsigned long long int read_tsc( void )
+{
+ unsigned long long int retval;
+ __asm __volatile ("rdtsc":"=A"(retval)::"memory");
+ return retval;
+}
+#endif
+
void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
int y;
+#ifdef PROFILE_ME
+unsigned long long v1,v2;
+v1 = read_tsc();
+#endif
for(y=0;y<h;y++){
register int x;
#ifdef ARCH_X86
-#if 0 /*def HAVE_MMX2*/
+#ifdef HAVE_MMX2
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"xorl %%eax, %%eax \n\t"
@@ -117,41 +130,33 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i
: "%eax"
);
#else /* 0 HAVE_MMX2*/
- asm volatile(
- "xorl %%eax, %%eax \n\t"
- "xorl %%ebx, %%ebx \n\t"
- "xorl %%edx, %%edx \n\t"
- ".balign 16\n\t"
- "1: \n\t"
- "movb (%1, %%eax), %%bl \n\t"
- "cmpb $0, %%bl \n\t"
- " jz 2f \n\t"
- "movzbl (%2, %%eax), %%edx \n\t"
- "shll $8, %%edx \n\t"
- "decb %%bl \n\t"
- "movzbl (%0, %%eax, 4), %%ecx \n\t"
- "imull %%ebx, %%ecx \n\t"
- "addl %%edx, %%ecx \n\t"
- "movb %%ch, (%0, %%eax, 4) \n\t"
+ for(x=0;x<w;x++){
+ if(srca[x]){
+ asm volatile(
+ "movzbl (%0), %%ecx\n\t"
+ "movzbl 1(%0), %%eax\n\t"
+ "movzbl 2(%0), %%edx\n\t"
- "movzbl 1(%0, %%eax, 4), %%ecx \n\t"
- "imull %%ebx, %%ecx \n\t"
- "addl %%edx, %%ecx \n\t"
- "movb %%ch, 1(%0, %%eax, 4) \n\t"
+ "imull %1, %%ecx\n\t"
+ "imull %1, %%eax\n\t"
+ "imull %1, %%edx\n\t"
- "movzbl 2(%0, %%eax, 4), %%ecx \n\t"
- "imull %%ebx, %%ecx \n\t"
- "addl %%edx, %%ecx \n\t"
- "movb %%ch, 2(%0, %%eax, 4) \n\t"
+ "addl %2, %%ecx\n\t"
+ "addl %2, %%eax\n\t"
+ "addl %2, %%edx\n\t"
- "2: \n\t"
- "addl $1, %%eax \n\t"
- "cmpl %3, %%eax \n\t"
- " jb 1b \n\t"
+ "movb %%ch, (%0)\n\t"
+ "movb %%ah, 1(%0)\n\t"
+ "movb %%dh, 2(%0)\n\t"
- :: "r" (dstbase), "r" (srca), "r" (src), "m" (w)
- : "%eax", "%ebx", "%ecx", "%edx"
+ :
+ :"r" (&dstbase[4*x]),
+ "r" ((unsigned)srca[x]),
+ "r" (((unsigned)src[x])<<8)
+ :"%eax", "%ecx", "%edx"
);
+ }
+ }
#endif /* 0 HAVE_MMX*/
#else /*non x86 arch*/
for(x=0;x<w;x++){
@@ -170,10 +175,14 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i
srca+=srcstride;
dstbase+=dststride;
}
-#if 0 /*def HAVE_MMX2*/
+#ifdef HAVE_MMX2
asm volatile(SFENCE:::"memory");
asm volatile(EMMS:::"memory");
#endif
+#ifdef PROFILE_ME
+v2 = read_tsc();
+printf("rd_tsc: %llu\n\t",v2-v1);
+#endif
return;
}
diff --git a/libvo/osd_template.c b/libvo/osd_template.c
index b44c56b676..fb01444f3e 100644
--- a/libvo/osd_template.c
+++ b/libvo/osd_template.c
@@ -76,12 +76,25 @@ void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, i
return;
}
+#ifdef PROFILE_ME
+static inline unsigned long long int read_tsc( void )
+{
+ unsigned long long int retval;
+ __asm __volatile ("rdtsc":"=A"(retval)::"memory");
+ return retval;
+}
+#endif
+
void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
int y;
+#ifdef PROFILE_ME
+unsigned long long v1,v2;
+v1 = read_tsc();
+#endif
for(y=0;y<h;y++){
register int x;
#ifdef ARCH_X86
-#if 0 /*def HAVE_MMX2*/
+#ifdef HAVE_MMX2
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"xorl %%eax, %%eax \n\t"
@@ -117,41 +130,33 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i
: "%eax"
);
#else /* 0 HAVE_MMX2*/
- asm volatile(
- "xorl %%eax, %%eax \n\t"
- "xorl %%ebx, %%ebx \n\t"
- "xorl %%edx, %%edx \n\t"
- ".balign 16\n\t"
- "1: \n\t"
- "movb (%1, %%eax), %%bl \n\t"
- "cmpb $0, %%bl \n\t"
- " jz 2f \n\t"
- "movzbl (%2, %%eax), %%edx \n\t"
- "shll $8, %%edx \n\t"
- "decb %%bl \n\t"
- "movzbl (%0, %%eax, 4), %%ecx \n\t"
- "imull %%ebx, %%ecx \n\t"
- "addl %%edx, %%ecx \n\t"
- "movb %%ch, (%0, %%eax, 4) \n\t"
+ for(x=0;x<w;x++){
+ if(srca[x]){
+ asm volatile(
+ "movzbl (%0), %%ecx\n\t"
+ "movzbl 1(%0), %%eax\n\t"
+ "movzbl 2(%0), %%edx\n\t"
- "movzbl 1(%0, %%eax, 4), %%ecx \n\t"
- "imull %%ebx, %%ecx \n\t"
- "addl %%edx, %%ecx \n\t"
- "movb %%ch, 1(%0, %%eax, 4) \n\t"
+ "imull %1, %%ecx\n\t"
+ "imull %1, %%eax\n\t"
+ "imull %1, %%edx\n\t"
- "movzbl 2(%0, %%eax, 4), %%ecx \n\t"
- "imull %%ebx, %%ecx \n\t"
- "addl %%edx, %%ecx \n\t"
- "movb %%ch, 2(%0, %%eax, 4) \n\t"
+ "addl %2, %%ecx\n\t"
+ "addl %2, %%eax\n\t"
+ "addl %2, %%edx\n\t"
- "2: \n\t"
- "addl $1, %%eax \n\t"
- "cmpl %3, %%eax \n\t"
- " jb 1b \n\t"
+ "movb %%ch, (%0)\n\t"
+ "movb %%ah, 1(%0)\n\t"
+ "movb %%dh, 2(%0)\n\t"
- :: "r" (dstbase), "r" (srca), "r" (src), "m" (w)
- : "%eax", "%ebx", "%ecx", "%edx"
+ :
+ :"r" (&dstbase[4*x]),
+ "r" ((unsigned)srca[x]),
+ "r" (((unsigned)src[x])<<8)
+ :"%eax", "%ecx", "%edx"
);
+ }
+ }
#endif /* 0 HAVE_MMX*/
#else /*non x86 arch*/
for(x=0;x<w;x++){
@@ -170,10 +175,14 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i
srca+=srcstride;
dstbase+=dststride;
}
-#if 0 /*def HAVE_MMX2*/
+#ifdef HAVE_MMX2
asm volatile(SFENCE:::"memory");
asm volatile(EMMS:::"memory");
#endif
+#ifdef PROFILE_ME
+v2 = read_tsc();
+printf("rd_tsc: %llu\n\t",v2-v1);
+#endif
return;
}