summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2014-04-19 15:47:10 +0200
committerwm4 <wm4@nowhere>2014-04-19 15:48:30 +0200
commitb69c835945f353d3b3dec4f92cac1d12a9435118 (patch)
tree735630d0312fded127b2f6f166195c9719456512 /video
parentc4f5dc0d53f24f004bf9ad5a6b55bd8d5a81685f (diff)
downloadmpv-b69c835945f353d3b3dec4f92cac1d12a9435118.tar.bz2
mpv-b69c835945f353d3b3dec4f92cac1d12a9435118.tar.xz
vf_ilpack: remove inline asm
This makes it multiple times slower. However, the output format (packed YUV) isn't handled efficiently by anything to begin with, and I have no clue we even have this filter. I guess it's one of these filters which find some use sometimes, but are not of higher importance, which justifies removing the faster inline asm.
Diffstat (limited to 'video')
-rw-r--r--video/filter/vf_ilpack.c281
1 files changed, 4 insertions, 277 deletions
diff --git a/video/filter/vf_ilpack.c b/video/filter/vf_ilpack.c
index 2ae129f6de..e27b846735 100644
--- a/video/filter/vf_ilpack.c
+++ b/video/filter/vf_ilpack.c
@@ -20,16 +20,15 @@
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
+#include <libavutil/attributes.h>
#include "config.h"
#include "common/msg.h"
-#include "common/cpudetect.h"
#include "options/m_option.h"
#include "video/img_format.h"
#include "video/mp_image.h"
#include "vf.h"
-#include "libavutil/attributes.h"
typedef void (pack_func_t)(unsigned char *dst, unsigned char *y,
unsigned char *u, unsigned char *v, int w, int us, int vs);
@@ -39,7 +38,7 @@ struct vf_priv_s {
pack_func_t *pack[2];
};
-static void pack_nn_C(unsigned char *dst, unsigned char *y,
+static void pack_nn(unsigned char *dst, unsigned char *y,
unsigned char *u, unsigned char *v, int w,
int av_unused us, int av_unused vs)
{
@@ -52,7 +51,7 @@ static void pack_nn_C(unsigned char *dst, unsigned char *y,
}
}
-static void pack_li_0_C(unsigned char *dst, unsigned char *y,
+static void pack_li_0(unsigned char *dst, unsigned char *y,
unsigned char *u, unsigned char *v, int w, int us, int vs)
{
int j;
@@ -65,7 +64,7 @@ static void pack_li_0_C(unsigned char *dst, unsigned char *y,
}
}
-static void pack_li_1_C(unsigned char *dst, unsigned char *y,
+static void pack_li_1(unsigned char *dst, unsigned char *y,
unsigned char *u, unsigned char *v, int w, int us, int vs)
{
int j;
@@ -78,265 +77,6 @@ static void pack_li_1_C(unsigned char *dst, unsigned char *y,
}
}
-#if HAVE_MMX
-static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
- unsigned char *u, unsigned char *v, int w,
- int av_unused us, int av_unused vs)
-{
- __asm__ volatile (""
- ".align 4 \n\t"
- "1: \n\t"
- "movq (%0), %%mm1 \n\t"
- "movq (%0), %%mm2 \n\t"
- "movq (%1), %%mm4 \n\t"
- "movq (%2), %%mm6 \n\t"
- "punpcklbw %%mm6, %%mm4 \n\t"
- "punpcklbw %%mm4, %%mm1 \n\t"
- "punpckhbw %%mm4, %%mm2 \n\t"
-
- "add $8, %0 \n\t"
- "add $4, %1 \n\t"
- "add $4, %2 \n\t"
- "movq %%mm1, (%3) \n\t"
- "movq %%mm2, 8(%3) \n\t"
- "add $16, %3 \n\t"
- "decl %4 \n\t"
- "jnz 1b \n\t"
- "emms \n\t"
- :
- : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
- : "memory"
- );
- pack_nn_C(dst, y, u, v, (w&7), 0, 0);
-}
-
-#if HAVE_EBX_AVAILABLE
-static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
- unsigned char *u, unsigned char *v, int w, int us, int vs)
-{
- __asm__ volatile (""
- "push %%"REG_BP" \n\t"
-#if ARCH_X86_64
- "mov %6, %%"REG_BP" \n\t"
-#else
- "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
- "movl (%%"REG_d"), %%"REG_d" \n\t"
-#endif
- "pxor %%mm0, %%mm0 \n\t"
-
- ".align 4 \n\t"
- ".Lli0: \n\t"
- "movq (%%"REG_S"), %%mm1 \n\t"
- "movq (%%"REG_S"), %%mm2 \n\t"
-
- "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
- "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
- "punpcklbw %%mm0, %%mm4 \n\t"
- "punpcklbw %%mm0, %%mm6 \n\t"
- "movq (%%"REG_a"), %%mm3 \n\t"
- "movq (%%"REG_b"), %%mm5 \n\t"
- "punpcklbw %%mm0, %%mm3 \n\t"
- "punpcklbw %%mm0, %%mm5 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "psrlw $3, %%mm4 \n\t"
- "psrlw $3, %%mm6 \n\t"
- "packuswb %%mm4, %%mm4 \n\t"
- "packuswb %%mm6, %%mm6 \n\t"
- "punpcklbw %%mm6, %%mm4 \n\t"
- "punpcklbw %%mm4, %%mm1 \n\t"
- "punpckhbw %%mm4, %%mm2 \n\t"
-
- "movq %%mm1, (%%"REG_D") \n\t"
- "movq %%mm2, 8(%%"REG_D") \n\t"
-
- "movq 8(%%"REG_S"), %%mm1 \n\t"
- "movq 8(%%"REG_S"), %%mm2 \n\t"
-
- "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
- "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
- "punpckhbw %%mm0, %%mm4 \n\t"
- "punpckhbw %%mm0, %%mm6 \n\t"
- "movq (%%"REG_a"), %%mm3 \n\t"
- "movq (%%"REG_b"), %%mm5 \n\t"
- "punpckhbw %%mm0, %%mm3 \n\t"
- "punpckhbw %%mm0, %%mm5 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "psrlw $3, %%mm4 \n\t"
- "psrlw $3, %%mm6 \n\t"
- "packuswb %%mm4, %%mm4 \n\t"
- "packuswb %%mm6, %%mm6 \n\t"
- "punpcklbw %%mm6, %%mm4 \n\t"
- "punpcklbw %%mm4, %%mm1 \n\t"
- "punpckhbw %%mm4, %%mm2 \n\t"
-
- "add $16, %%"REG_S" \n\t"
- "add $8, %%"REG_a" \n\t"
- "add $8, %%"REG_b" \n\t"
-
- "movq %%mm1, 16(%%"REG_D") \n\t"
- "movq %%mm2, 24(%%"REG_D") \n\t"
- "add $32, %%"REG_D" \n\t"
-
- "decl %%ecx \n\t"
- "jnz .Lli0 \n\t"
- "emms \n\t"
- "pop %%"REG_BP" \n\t"
- :
- : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
-#if ARCH_X86_64
- "d" ((x86_reg)us), "r" ((x86_reg)vs)
-#else
- "d" (&us)
-#endif
- : "memory"
- );
- pack_li_0_C(dst, y, u, v, (w&15), us, vs);
-}
-
-static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
- unsigned char *u, unsigned char *v, int w, int us, int vs)
-{
- __asm__ volatile (""
- "push %%"REG_BP" \n\t"
-#if ARCH_X86_64
- "mov %6, %%"REG_BP" \n\t"
-#else
- "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
- "movl (%%"REG_d"), %%"REG_d" \n\t"
-#endif
- "pxor %%mm0, %%mm0 \n\t"
-
- ".align 4 \n\t"
- ".Lli1: \n\t"
- "movq (%%"REG_S"), %%mm1 \n\t"
- "movq (%%"REG_S"), %%mm2 \n\t"
-
- "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
- "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
- "punpcklbw %%mm0, %%mm4 \n\t"
- "punpcklbw %%mm0, %%mm6 \n\t"
- "movq (%%"REG_a"), %%mm3 \n\t"
- "movq (%%"REG_b"), %%mm5 \n\t"
- "punpcklbw %%mm0, %%mm3 \n\t"
- "punpcklbw %%mm0, %%mm5 \n\t"
- "movq %%mm4, %%mm7 \n\t"
- "paddw %%mm4, %%mm4 \n\t"
- "paddw %%mm7, %%mm4 \n\t"
- "movq %%mm6, %%mm7 \n\t"
- "paddw %%mm6, %%mm6 \n\t"
- "paddw %%mm7, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "psrlw $3, %%mm4 \n\t"
- "psrlw $3, %%mm6 \n\t"
- "packuswb %%mm4, %%mm4 \n\t"
- "packuswb %%mm6, %%mm6 \n\t"
- "punpcklbw %%mm6, %%mm4 \n\t"
- "punpcklbw %%mm4, %%mm1 \n\t"
- "punpckhbw %%mm4, %%mm2 \n\t"
-
- "movq %%mm1, (%%"REG_D") \n\t"
- "movq %%mm2, 8(%%"REG_D") \n\t"
-
- "movq 8(%%"REG_S"), %%mm1 \n\t"
- "movq 8(%%"REG_S"), %%mm2 \n\t"
-
- "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
- "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
- "punpckhbw %%mm0, %%mm4 \n\t"
- "punpckhbw %%mm0, %%mm6 \n\t"
- "movq (%%"REG_a"), %%mm3 \n\t"
- "movq (%%"REG_b"), %%mm5 \n\t"
- "punpckhbw %%mm0, %%mm3 \n\t"
- "punpckhbw %%mm0, %%mm5 \n\t"
- "movq %%mm4, %%mm7 \n\t"
- "paddw %%mm4, %%mm4 \n\t"
- "paddw %%mm7, %%mm4 \n\t"
- "movq %%mm6, %%mm7 \n\t"
- "paddw %%mm6, %%mm6 \n\t"
- "paddw %%mm7, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "paddw %%mm3, %%mm4 \n\t"
- "paddw %%mm5, %%mm6 \n\t"
- "psrlw $3, %%mm4 \n\t"
- "psrlw $3, %%mm6 \n\t"
- "packuswb %%mm4, %%mm4 \n\t"
- "packuswb %%mm6, %%mm6 \n\t"
- "punpcklbw %%mm6, %%mm4 \n\t"
- "punpcklbw %%mm4, %%mm1 \n\t"
- "punpckhbw %%mm4, %%mm2 \n\t"
-
- "add $16, %%"REG_S" \n\t"
- "add $8, %%"REG_a" \n\t"
- "add $8, %%"REG_b" \n\t"
-
- "movq %%mm1, 16(%%"REG_D") \n\t"
- "movq %%mm2, 24(%%"REG_D") \n\t"
- "add $32, %%"REG_D" \n\t"
-
- "decl %%ecx \n\t"
- "jnz .Lli1 \n\t"
- "emms \n\t"
- "pop %%"REG_BP" \n\t"
- :
- : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
-#if ARCH_X86_64
- "d" ((x86_reg)us), "r" ((x86_reg)vs)
-#else
- "d" (&us)
-#endif
- : "memory"
- );
- pack_li_1_C(dst, y, u, v, (w&15), us, vs);
-}
-#endif /* HAVE_EBX_AVAILABLE */
-#endif
-
-static pack_func_t *pack_nn;
-static pack_func_t *pack_li_0;
-static pack_func_t *pack_li_1;
-
static void ilpack(unsigned char *dst, unsigned char *src[3],
int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2])
{
@@ -410,19 +150,6 @@ static int vf_open(vf_instance_t *vf)
vf->query_format=query_format;
vf->filter=filter;
- pack_nn = pack_nn_C;
- pack_li_0 = pack_li_0_C;
- pack_li_1 = pack_li_1_C;
-#if HAVE_MMX
- if(gCpuCaps.hasMMX) {
- pack_nn = pack_nn_MMX;
-#if HAVE_EBX_AVAILABLE
- pack_li_0 = pack_li_0_MMX;
- pack_li_1 = pack_li_1_MMX;
-#endif
- }
-#endif
-
switch(vf->priv->mode) {
case 0:
vf->priv->pack[0] = vf->priv->pack[1] = pack_nn;