From b69c835945f353d3b3dec4f92cac1d12a9435118 Mon Sep 17 00:00:00 2001 From: wm4 Date: Sat, 19 Apr 2014 15:47:10 +0200 Subject: vf_ilpack: remove inline asm This makes it multiple times slower. However, the output format (packed YUV) isn't handled efficiently by anything to begin with, and I have no clue we even have this filter. I guess it's one of these filters which find some use sometimes, but are not of higher importance, which justifies removing the faster inline asm. --- video/filter/vf_ilpack.c | 281 +---------------------------------------------- 1 file changed, 4 insertions(+), 277 deletions(-) (limited to 'video') diff --git a/video/filter/vf_ilpack.c b/video/filter/vf_ilpack.c index 2ae129f6de..e27b846735 100644 --- a/video/filter/vf_ilpack.c +++ b/video/filter/vf_ilpack.c @@ -20,16 +20,15 @@ #include #include #include +#include #include "config.h" #include "common/msg.h" -#include "common/cpudetect.h" #include "options/m_option.h" #include "video/img_format.h" #include "video/mp_image.h" #include "vf.h" -#include "libavutil/attributes.h" typedef void (pack_func_t)(unsigned char *dst, unsigned char *y, unsigned char *u, unsigned char *v, int w, int us, int vs); @@ -39,7 +38,7 @@ struct vf_priv_s { pack_func_t *pack[2]; }; -static void pack_nn_C(unsigned char *dst, unsigned char *y, +static void pack_nn(unsigned char *dst, unsigned char *y, unsigned char *u, unsigned char *v, int w, int av_unused us, int av_unused vs) { @@ -52,7 +51,7 @@ static void pack_nn_C(unsigned char *dst, unsigned char *y, } } -static void pack_li_0_C(unsigned char *dst, unsigned char *y, +static void pack_li_0(unsigned char *dst, unsigned char *y, unsigned char *u, unsigned char *v, int w, int us, int vs) { int j; @@ -65,7 +64,7 @@ static void pack_li_0_C(unsigned char *dst, unsigned char *y, } } -static void pack_li_1_C(unsigned char *dst, unsigned char *y, +static void pack_li_1(unsigned char *dst, unsigned char *y, unsigned char *u, unsigned char *v, int w, int us, int vs) { int j; @@ -78,265 +77,6 @@ static void pack_li_1_C(unsigned char *dst, unsigned char *y, } } -#if HAVE_MMX -static void pack_nn_MMX(unsigned char *dst, unsigned char *y, - unsigned char *u, unsigned char *v, int w, - int av_unused us, int av_unused vs) -{ - __asm__ volatile ("" - ".align 4 \n\t" - "1: \n\t" - "movq (%0), %%mm1 \n\t" - "movq (%0), %%mm2 \n\t" - "movq (%1), %%mm4 \n\t" - "movq (%2), %%mm6 \n\t" - "punpcklbw %%mm6, %%mm4 \n\t" - "punpcklbw %%mm4, %%mm1 \n\t" - "punpckhbw %%mm4, %%mm2 \n\t" - - "add $8, %0 \n\t" - "add $4, %1 \n\t" - "add $4, %2 \n\t" - "movq %%mm1, (%3) \n\t" - "movq %%mm2, 8(%3) \n\t" - "add $16, %3 \n\t" - "decl %4 \n\t" - "jnz 1b \n\t" - "emms \n\t" - : - : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8) - : "memory" - ); - pack_nn_C(dst, y, u, v, (w&7), 0, 0); -} - -#if HAVE_EBX_AVAILABLE -static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, - unsigned char *u, unsigned char *v, int w, int us, int vs) -{ - __asm__ volatile ("" - "push %%"REG_BP" \n\t" -#if ARCH_X86_64 - "mov %6, %%"REG_BP" \n\t" -#else - "movl 4(%%"REG_d"), %%"REG_BP" \n\t" - "movl (%%"REG_d"), %%"REG_d" \n\t" -#endif - "pxor %%mm0, %%mm0 \n\t" - - ".align 4 \n\t" - ".Lli0: \n\t" - "movq (%%"REG_S"), %%mm1 \n\t" - "movq (%%"REG_S"), %%mm2 \n\t" - - "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" - "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" - "punpcklbw %%mm0, %%mm4 \n\t" - "punpcklbw %%mm0, %%mm6 \n\t" - "movq (%%"REG_a"), %%mm3 \n\t" - "movq (%%"REG_b"), %%mm5 \n\t" - "punpcklbw %%mm0, %%mm3 \n\t" - "punpcklbw %%mm0, %%mm5 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "psrlw $3, %%mm4 \n\t" - "psrlw $3, %%mm6 \n\t" - "packuswb %%mm4, %%mm4 \n\t" - "packuswb %%mm6, %%mm6 \n\t" - "punpcklbw %%mm6, %%mm4 \n\t" - "punpcklbw %%mm4, %%mm1 \n\t" - "punpckhbw %%mm4, %%mm2 \n\t" - - "movq %%mm1, (%%"REG_D") \n\t" - "movq %%mm2, 8(%%"REG_D") \n\t" - - "movq 8(%%"REG_S"), %%mm1 \n\t" - "movq 8(%%"REG_S"), %%mm2 \n\t" - - "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" - "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" - "punpckhbw %%mm0, %%mm4 \n\t" - "punpckhbw %%mm0, %%mm6 \n\t" - "movq (%%"REG_a"), %%mm3 \n\t" - "movq (%%"REG_b"), %%mm5 \n\t" - "punpckhbw %%mm0, %%mm3 \n\t" - "punpckhbw %%mm0, %%mm5 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "psrlw $3, %%mm4 \n\t" - "psrlw $3, %%mm6 \n\t" - "packuswb %%mm4, %%mm4 \n\t" - "packuswb %%mm6, %%mm6 \n\t" - "punpcklbw %%mm6, %%mm4 \n\t" - "punpcklbw %%mm4, %%mm1 \n\t" - "punpckhbw %%mm4, %%mm2 \n\t" - - "add $16, %%"REG_S" \n\t" - "add $8, %%"REG_a" \n\t" - "add $8, %%"REG_b" \n\t" - - "movq %%mm1, 16(%%"REG_D") \n\t" - "movq %%mm2, 24(%%"REG_D") \n\t" - "add $32, %%"REG_D" \n\t" - - "decl %%ecx \n\t" - "jnz .Lli0 \n\t" - "emms \n\t" - "pop %%"REG_BP" \n\t" - : - : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16), -#if ARCH_X86_64 - "d" ((x86_reg)us), "r" ((x86_reg)vs) -#else - "d" (&us) -#endif - : "memory" - ); - pack_li_0_C(dst, y, u, v, (w&15), us, vs); -} - -static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, - unsigned char *u, unsigned char *v, int w, int us, int vs) -{ - __asm__ volatile ("" - "push %%"REG_BP" \n\t" -#if ARCH_X86_64 - "mov %6, %%"REG_BP" \n\t" -#else - "movl 4(%%"REG_d"), %%"REG_BP" \n\t" - "movl (%%"REG_d"), %%"REG_d" \n\t" -#endif - "pxor %%mm0, %%mm0 \n\t" - - ".align 4 \n\t" - ".Lli1: \n\t" - "movq (%%"REG_S"), %%mm1 \n\t" - "movq (%%"REG_S"), %%mm2 \n\t" - - "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" - "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" - "punpcklbw %%mm0, %%mm4 \n\t" - "punpcklbw %%mm0, %%mm6 \n\t" - "movq (%%"REG_a"), %%mm3 \n\t" - "movq (%%"REG_b"), %%mm5 \n\t" - "punpcklbw %%mm0, %%mm3 \n\t" - "punpcklbw %%mm0, %%mm5 \n\t" - "movq %%mm4, %%mm7 \n\t" - "paddw %%mm4, %%mm4 \n\t" - "paddw %%mm7, %%mm4 \n\t" - "movq %%mm6, %%mm7 \n\t" - "paddw %%mm6, %%mm6 \n\t" - "paddw %%mm7, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "psrlw $3, %%mm4 \n\t" - "psrlw $3, %%mm6 \n\t" - "packuswb %%mm4, %%mm4 \n\t" - "packuswb %%mm6, %%mm6 \n\t" - "punpcklbw %%mm6, %%mm4 \n\t" - "punpcklbw %%mm4, %%mm1 \n\t" - "punpckhbw %%mm4, %%mm2 \n\t" - - "movq %%mm1, (%%"REG_D") \n\t" - "movq %%mm2, 8(%%"REG_D") \n\t" - - "movq 8(%%"REG_S"), %%mm1 \n\t" - "movq 8(%%"REG_S"), %%mm2 \n\t" - - "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" - "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" - "punpckhbw %%mm0, %%mm4 \n\t" - "punpckhbw %%mm0, %%mm6 \n\t" - "movq (%%"REG_a"), %%mm3 \n\t" - "movq (%%"REG_b"), %%mm5 \n\t" - "punpckhbw %%mm0, %%mm3 \n\t" - "punpckhbw %%mm0, %%mm5 \n\t" - "movq %%mm4, %%mm7 \n\t" - "paddw %%mm4, %%mm4 \n\t" - "paddw %%mm7, %%mm4 \n\t" - "movq %%mm6, %%mm7 \n\t" - "paddw %%mm6, %%mm6 \n\t" - "paddw %%mm7, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "paddw %%mm3, %%mm4 \n\t" - "paddw %%mm5, %%mm6 \n\t" - "psrlw $3, %%mm4 \n\t" - "psrlw $3, %%mm6 \n\t" - "packuswb %%mm4, %%mm4 \n\t" - "packuswb %%mm6, %%mm6 \n\t" - "punpcklbw %%mm6, %%mm4 \n\t" - "punpcklbw %%mm4, %%mm1 \n\t" - "punpckhbw %%mm4, %%mm2 \n\t" - - "add $16, %%"REG_S" \n\t" - "add $8, %%"REG_a" \n\t" - "add $8, %%"REG_b" \n\t" - - "movq %%mm1, 16(%%"REG_D") \n\t" - "movq %%mm2, 24(%%"REG_D") \n\t" - "add $32, %%"REG_D" \n\t" - - "decl %%ecx \n\t" - "jnz .Lli1 \n\t" - "emms \n\t" - "pop %%"REG_BP" \n\t" - : - : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16), -#if ARCH_X86_64 - "d" ((x86_reg)us), "r" ((x86_reg)vs) -#else - "d" (&us) -#endif - : "memory" - ); - pack_li_1_C(dst, y, u, v, (w&15), us, vs); -} -#endif /* HAVE_EBX_AVAILABLE */ -#endif - -static pack_func_t *pack_nn; -static pack_func_t *pack_li_0; -static pack_func_t *pack_li_1; - static void ilpack(unsigned char *dst, unsigned char *src[3], int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2]) { @@ -410,19 +150,6 @@ static int vf_open(vf_instance_t *vf) vf->query_format=query_format; vf->filter=filter; - pack_nn = pack_nn_C; - pack_li_0 = pack_li_0_C; - pack_li_1 = pack_li_1_C; -#if HAVE_MMX - if(gCpuCaps.hasMMX) { - pack_nn = pack_nn_MMX; -#if HAVE_EBX_AVAILABLE - pack_li_0 = pack_li_0_MMX; - pack_li_1 = pack_li_1_MMX; -#endif - } -#endif - switch(vf->priv->mode) { case 0: vf->priv->pack[0] = vf->priv->pack[1] = pack_nn; -- cgit v1.2.3