From d0ebecb1c4d2e70d65b91a596acfcffb0e4bc4b0 Mon Sep 17 00:00:00 2001 From: wm4 Date: Sat, 19 Apr 2014 15:53:33 +0200 Subject: vf_pullup: remove inline asm No change in speed (or even slightly faster, though I tested with progressive solid color video only), and normally we use libavformat's vf_pullup anyway. --- video/filter/pullup.c | 206 ----------------------------------------------- video/filter/pullup.h | 6 -- video/filter/vf_pullup.c | 6 -- 3 files changed, 218 deletions(-) (limited to 'video') diff --git a/video/filter/pullup.c b/video/filter/pullup.c index 45aa74eedd..864cc90ba4 100644 --- a/video/filter/pullup.c +++ b/video/filter/pullup.c @@ -21,197 +21,9 @@ #include #include "config.h" #include "pullup.h" -#include "common/cpudetect.h" #include "common/common.h" - -#if ARCH_X86 -#if HAVE_MMX -static int diff_y_mmx(unsigned char *a, unsigned char *b, int s) -{ - int ret; - __asm__ volatile ( - "movl $4, %%ecx \n\t" - "pxor %%mm4, %%mm4 \n\t" - "pxor %%mm7, %%mm7 \n\t" - - "1: \n\t" - - "movq (%%"REG_S"), %%mm0 \n\t" - "movq (%%"REG_S"), %%mm2 \n\t" - "add %%"REG_a", %%"REG_S" \n\t" - "movq (%%"REG_D"), %%mm1 \n\t" - "add %%"REG_a", %%"REG_D" \n\t" - "psubusb %%mm1, %%mm2 \n\t" - "psubusb %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm0 \n\t" - "movq %%mm1, %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm2 \n\t" - "punpckhbw %%mm7, %%mm3 \n\t" - "paddw %%mm0, %%mm4 \n\t" - "paddw %%mm1, %%mm4 \n\t" - "paddw %%mm2, %%mm4 \n\t" - "paddw %%mm3, %%mm4 \n\t" - - "decl %%ecx \n\t" - "jnz 1b \n\t" - - "movq %%mm4, %%mm3 \n\t" - "punpcklwd %%mm7, %%mm4 \n\t" - "punpckhwd %%mm7, %%mm3 \n\t" - "paddd %%mm4, %%mm3 \n\t" - "movd %%mm3, %%eax \n\t" - "psrlq $32, %%mm3 \n\t" - "movd %%mm3, %%edx \n\t" - "addl %%edx, %%eax \n\t" - "emms \n\t" - : "=a" (ret) - : "S" (a), "D" (b), "a" (s) - : "%ecx", "%edx" - ); - return ret; -} - -static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s) -{ - int ret; - __asm__ volatile ( - "movl $4, %%ecx \n\t" - "pxor %%mm6, %%mm6 \n\t" - "pxor %%mm7, %%mm7 \n\t" - "sub %%"REG_a", %%"REG_D" \n\t" - - "2: \n\t" - - "movq (%%"REG_D"), %%mm0 \n\t" - "movq (%%"REG_D"), %%mm1 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "movq (%%"REG_D",%%"REG_a"), %%mm2 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "paddw %%mm0, %%mm0 \n\t" - "paddw %%mm2, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "psubusw %%mm1, %%mm0 \n\t" - "psubusw %%mm2, %%mm1 \n\t" - "paddw %%mm0, %%mm6 \n\t" - "paddw %%mm1, %%mm6 \n\t" - - "movq (%%"REG_S"), %%mm0 \n\t" - "movq (%%"REG_D"), %%mm1 \n\t" - "punpckhbw %%mm7, %%mm0 \n\t" - "movq (%%"REG_D",%%"REG_a"), %%mm2 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm2 \n\t" - "paddw %%mm0, %%mm0 \n\t" - "paddw %%mm2, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "psubusw %%mm1, %%mm0 \n\t" - "psubusw %%mm2, %%mm1 \n\t" - "paddw %%mm0, %%mm6 \n\t" - "paddw %%mm1, %%mm6 \n\t" - - "movq (%%"REG_D",%%"REG_a"), %%mm0 \n\t" - "movq (%%"REG_S"), %%mm1 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "movq (%%"REG_S",%%"REG_a"), %%mm2 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "paddw %%mm0, %%mm0 \n\t" - "paddw %%mm2, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "psubusw %%mm1, %%mm0 \n\t" - "psubusw %%mm2, %%mm1 \n\t" - "paddw %%mm0, %%mm6 \n\t" - "paddw %%mm1, %%mm6 \n\t" - - "movq (%%"REG_D",%%"REG_a"), %%mm0 \n\t" - "movq (%%"REG_S"), %%mm1 \n\t" - "punpckhbw %%mm7, %%mm0 \n\t" - "movq (%%"REG_S",%%"REG_a"), %%mm2 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm2 \n\t" - "paddw %%mm0, %%mm0 \n\t" - "paddw %%mm2, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "psubusw %%mm1, %%mm0 \n\t" - "psubusw %%mm2, %%mm1 \n\t" - "paddw %%mm0, %%mm6 \n\t" - "paddw %%mm1, %%mm6 \n\t" - - "add %%"REG_a", %%"REG_S" \n\t" - "add %%"REG_a", %%"REG_D" \n\t" - "decl %%ecx \n\t" - "jnz 2b \n\t" - - "movq %%mm6, %%mm5 \n\t" - "punpcklwd %%mm7, %%mm6 \n\t" - "punpckhwd %%mm7, %%mm5 \n\t" - "paddd %%mm6, %%mm5 \n\t" - "movd %%mm5, %%eax \n\t" - "psrlq $32, %%mm5 \n\t" - "movd %%mm5, %%edx \n\t" - "addl %%edx, %%eax \n\t" - - "emms \n\t" - : "=a" (ret) - : "S" (a), "D" (b), "a" (s) - : "%ecx", "%edx" - ); - return ret; -} - -static int var_y_mmx(unsigned char *a, unsigned char *b, int s) -{ - int ret; - __asm__ volatile ( - "movl $3, %%ecx \n\t" - "pxor %%mm4, %%mm4 \n\t" - "pxor %%mm7, %%mm7 \n\t" - - "1: \n\t" - - "movq (%%"REG_S"), %%mm0 \n\t" - "movq (%%"REG_S"), %%mm2 \n\t" - "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t" - "add %%"REG_a", %%"REG_S" \n\t" - "psubusb %%mm1, %%mm2 \n\t" - "psubusb %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm0 \n\t" - "movq %%mm1, %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm2 \n\t" - "punpckhbw %%mm7, %%mm3 \n\t" - "paddw %%mm0, %%mm4 \n\t" - "paddw %%mm1, %%mm4 \n\t" - "paddw %%mm2, %%mm4 \n\t" - "paddw %%mm3, %%mm4 \n\t" - - "decl %%ecx \n\t" - "jnz 1b \n\t" - - "movq %%mm4, %%mm3 \n\t" - "punpcklwd %%mm7, %%mm4 \n\t" - "punpckhwd %%mm7, %%mm3 \n\t" - "paddd %%mm4, %%mm3 \n\t" - "movd %%mm3, %%eax \n\t" - "psrlq $32, %%mm3 \n\t" - "movd %%mm3, %%edx \n\t" - "addl %%edx, %%eax \n\t" - "emms \n\t" - : "=a" (ret) - : "S" (a), "a" (s) - : "%ecx", "%edx" - ); - return 4*ret; -} -#endif -#endif - #define ABS(a) (((a)^((a)>>31))-((a)>>31)) static int diff_y(unsigned char *a, unsigned char *b, int s) @@ -778,25 +590,7 @@ void pullup_init_context(struct pullup_context *c) c->diff = diff_y; c->comb = licomb_y; c->var = var_y; -#if ARCH_X86 -#if HAVE_MMX - if (c->cpu & PULLUP_CPU_MMX) { - c->diff = diff_y_mmx; - c->comb = licomb_y_mmx; - c->var = var_y_mmx; - } -#endif -#endif - /* c->comb = qpcomb_y; */ break; -#if 0 - case PULLUP_FMT_YUY2: - c->diff = diff_yuy2; - break; - case PULLUP_FMT_RGB32: - c->diff = diff_rgb32; - break; -#endif } } diff --git a/video/filter/pullup.h b/video/filter/pullup.h index da94539e9a..8bf060f88d 100644 --- a/video/filter/pullup.h +++ b/video/filter/pullup.h @@ -19,11 +19,6 @@ #ifndef MPLAYER_PULLUP_H #define MPLAYER_PULLUP_H -#define PULLUP_CPU_MMX 1 -#define PULLUP_CPU_MMX2 2 -#define PULLUP_CPU_SSE 16 -#define PULLUP_CPU_SSE2 32 - #define PULLUP_FMT_Y 1 #define PULLUP_FMT_YUY2 2 #define PULLUP_FMT_UYVY 3 @@ -65,7 +60,6 @@ struct pullup_context int format; int nplanes; int *bpp, *w, *h, *stride, *background; - unsigned int cpu; int junk_left, junk_right, junk_top, junk_bottom; int verbose; int metric_plane; diff --git a/video/filter/vf_pullup.c b/video/filter/vf_pullup.c index cf61d5a20a..7e04722f1c 100644 --- a/video/filter/vf_pullup.c +++ b/video/filter/vf_pullup.c @@ -22,7 +22,6 @@ #include "config.h" #include "common/msg.h" -#include "common/cpudetect.h" #include "options/m_option.h" #include "video/img_format.h" @@ -85,11 +84,6 @@ static void init_pullup(struct vf_instance *vf, mp_image_t *mpi) c->stride[3] = c->w[3]; c->background[1] = c->background[2] = 128; - if (gCpuCaps.hasMMX) c->cpu |= PULLUP_CPU_MMX; - if (gCpuCaps.hasMMX2) c->cpu |= PULLUP_CPU_MMX2; - if (gCpuCaps.hasSSE) c->cpu |= PULLUP_CPU_SSE; - if (gCpuCaps.hasSSE2) c->cpu |= PULLUP_CPU_SSE2; - pullup_init_context(c); vf->priv->init = 1; -- cgit v1.2.3