summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bswap.h25
-rwxr-xr-xconfigure37
-rw-r--r--cpudetect.c22
-rw-r--r--cpudetect.h26
-rw-r--r--libmpcodecs/pullup.c4
-rw-r--r--libmpcodecs/vf_decimate.c14
-rw-r--r--libmpcodecs/vf_divtc.c14
-rw-r--r--libmpcodecs/vf_eq.c4
-rw-r--r--libmpcodecs/vf_eq2.c4
-rw-r--r--libmpcodecs/vf_filmdint.c8
-rw-r--r--libmpcodecs/vf_halfpack.c10
-rw-r--r--libmpcodecs/vf_ilpack.c126
-rw-r--r--libmpcodecs/vf_ivtc.c82
-rw-r--r--libmpcodecs/vf_noise.c46
-rw-r--r--libmpcodecs/vf_spp.c6
-rw-r--r--libmpcodecs/vf_tfields.c76
-rw-r--r--libvo/aclib.c7
-rw-r--r--libvo/aclib_template.c70
-rw-r--r--libvo/osd.c24
-rw-r--r--libvo/osd_template.c6
-rw-r--r--postproc/rgb2rgb.c9
-rw-r--r--postproc/rgb2rgb_template.c369
-rw-r--r--postproc/swscale-example.c6
-rw-r--r--postproc/swscale.c96
-rw-r--r--postproc/swscale_template.c844
-rw-r--r--postproc/yuv2rgb.c7
-rw-r--r--postproc/yuv2rgb_template.c24
27 files changed, 1019 insertions, 947 deletions
diff --git a/bswap.h b/bswap.h
index 864bedfd5f..42cd6400dd 100644
--- a/bswap.h
+++ b/bswap.h
@@ -7,17 +7,23 @@
#include <inttypes.h>
-#ifdef ARCH_X86
-static inline unsigned short ByteSwap16(unsigned short x)
+#ifdef ARCH_X86_64
+# define LEGACY_REGS "=Q"
+#else
+# define LEGACY_REGS "=q"
+#endif
+
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+static inline uint16_t ByteSwap16(uint16_t x)
{
__asm("xchgb %b0,%h0" :
- "=q" (x) :
+ LEGACY_REGS (x) :
"0" (x));
return x;
}
#define bswap_16(x) ByteSwap16(x)
-static inline unsigned int ByteSwap32(unsigned int x)
+static inline uint32_t ByteSwap32(uint32_t x)
{
#if __CPU__ > 386
__asm("bswap %0":
@@ -26,21 +32,28 @@ static inline unsigned int ByteSwap32(unsigned int x)
__asm("xchgb %b0,%h0\n"
" rorl $16,%0\n"
" xchgb %b0,%h0":
- "=q" (x) :
+ LEGACY_REGS (x) :
#endif
"0" (x));
return x;
}
#define bswap_32(x) ByteSwap32(x)
-static inline unsigned long long int ByteSwap64(unsigned long long int x)
+static inline uint64_t ByteSwap64(uint64_t x)
{
+#ifdef ARCH_X86_64
+ __asm("bswap %0":
+ "=r" (x) :
+ "0" (x));
+ return x;
+#else
register union { __extension__ uint64_t __ll;
uint32_t __l[2]; } __x;
asm("xchgl %0,%1":
"=r"(__x.__l[0]),"=r"(__x.__l[1]):
"0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32))));
return __x.__ll;
+#endif
}
#define bswap_64(x) ByteSwap64(x)
diff --git a/configure b/configure
index af594015e4..6ab921d35c 100755
--- a/configure
+++ b/configure
@@ -456,7 +456,14 @@ if test -z "$_target" ; then
case "`( uname -m ) 2>&1`" in
i[3-9]86*|x86|x86pc|k5|k6|k6_2|k6_3|k6-2|k6-3|pentium*|athlon*|i586_i686|i586-i686|BePC) host_arch=i386 ;;
ia64) host_arch=ia64 ;;
- x86_64|amd64) host_arch=x86_64 ;;
+ x86_64|amd64)
+ if [ "`$_cc -dumpmachine | grep x86_64 | cut -d- -f1`" = "x86_64" -a \
+ -z "`echo $CFLAGS | grep -- -m32`" ]; then
+ host_arch=x86_64
+ else
+ host_arch=i386
+ fi
+ ;;
macppc|ppc) host_arch=ppc ;;
alpha) host_arch=alpha ;;
sparc) host_arch=sparc ;;
@@ -672,17 +679,8 @@ elif x86; then
_cpuinfo="TOOLS/cpuinfo"
fi
-case "$host_arch" in
- i[3-9]86|x86|x86pc|k5|k6|k6-2|k6-3|pentium*|athlon*|i586-i686)
- _def_arch="#define ARCH_X86 1"
- _target_arch="TARGET_ARCH_X86 = yes"
-
- pname=`$_cpuinfo | grep 'model name' | cut -d ':' -f 2 | head -1`
- pvendor=`$_cpuinfo | grep 'vendor_id' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1`
- pfamily=`$_cpuinfo | grep 'cpu family' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1`
- pmodel=`$_cpuinfo | grep -v 'model name' | grep 'model' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1`
- pstepping=`$_cpuinfo | grep 'stepping' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1`
-
+x86_exts_check()
+{
pparam=`$_cpuinfo | grep 'features' | cut -d ':' -f 2 | head -1`
if test -z "$pparam" ; then
pparam=`$_cpuinfo | grep 'flags' | cut -d ':' -f 2 | head -1`
@@ -707,6 +705,20 @@ case "$host_arch" in
sse2) _sse2=yes ;;
esac
done
+}
+
+case "$host_arch" in
+ i[3-9]86|x86|x86pc|k5|k6|k6-2|k6-3|pentium*|athlon*|i586-i686)
+ _def_arch="#define ARCH_X86 1"
+ _target_arch="TARGET_ARCH_X86 = yes"
+
+ pname=`$_cpuinfo | grep 'model name' | cut -d ':' -f 2 | head -1`
+ pvendor=`$_cpuinfo | grep 'vendor_id' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1`
+ pfamily=`$_cpuinfo | grep 'cpu family' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1`
+ pmodel=`$_cpuinfo | grep -v 'model name' | grep 'model' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1`
+ pstepping=`$_cpuinfo | grep 'stepping' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1`
+
+ x86_exts_check
echocheck "CPU vendor"
echores "$pvendor ($pfamily:$pmodel:$pstepping)"
@@ -904,6 +916,7 @@ EOF
_march=''
_mcpu=''
_optimizing=''
+ x86_exts_check
;;
sparc)
diff --git a/cpudetect.c b/cpudetect.c
index 9e05236319..65d7e5ed6d 100644
--- a/cpudetect.c
+++ b/cpudetect.c
@@ -9,7 +9,7 @@ CpuCaps gCpuCaps;
#endif
#include <stdlib.h>
-#ifdef ARCH_X86
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
#include <stdio.h>
#include <string.h>
@@ -47,25 +47,25 @@ static void check_os_katmai_support( void );
// return TRUE if cpuid supported
static int has_cpuid()
{
- int a, c;
+ long a, c;
// code from libavcodec:
__asm__ __volatile__ (
/* See if CPUID instruction is supported ... */
/* ... Get copies of EFLAGS into eax and ecx */
"pushf\n\t"
- "popl %0\n\t"
- "movl %0, %1\n\t"
+ "pop %0\n\t"
+ "mov %0, %1\n\t"
/* ... Toggle the ID bit in one copy and store */
/* to the EFLAGS reg */
- "xorl $0x200000, %0\n\t"
+ "xor $0x200000, %0\n\t"
"push %0\n\t"
"popf\n\t"
/* ... Get the (hopefully modified) EFLAGS */
"pushf\n\t"
- "popl %0\n\t"
+ "pop %0\n\t"
: "=a" (a), "=c" (c)
:
: "cc"
@@ -87,9 +87,9 @@ do_cpuid(unsigned int ax, unsigned int *p)
#else
// code from libavcodec:
__asm __volatile
- ("movl %%ebx, %%esi\n\t"
+ ("mov %%"REG_b", %%"REG_S"\n\t"
"cpuid\n\t"
- "xchgl %%ebx, %%esi"
+ "xchg %%"REG_b", %%"REG_S
: "=a" (p[0]), "=S" (p[1]),
"=c" (p[2]), "=d" (p[3])
: "0" (ax));
@@ -456,7 +456,7 @@ static void check_os_katmai_support( void )
gCpuCaps.hasSSE=0;
#endif /* __linux__ */
}
-#else /* ARCH_X86 */
+#else /* ARCH_X86 || ARCH_X86_64 */
#ifdef SYS_DARWIN
#include <sys/sysctl.h>
@@ -536,10 +536,6 @@ void GetCpuCaps( CpuCaps *caps)
mp_msg(MSGT_CPUDETECT,MSGL_INFO,"CPU: Intel Itanium\n");
#endif
-#ifdef ARCH_X86_64
- mp_msg(MSGT_CPUDETECT,MSGL_INFO,"CPU: Advanced Micro Devices 64-bit CPU\n");
-#endif
-
#ifdef ARCH_SPARC
mp_msg(MSGT_CPUDETECT,MSGL_INFO,"CPU: Sun Sparc\n");
#endif
diff --git a/cpudetect.h b/cpudetect.h
index c178eb7b39..997ec11195 100644
--- a/cpudetect.h
+++ b/cpudetect.h
@@ -6,6 +6,32 @@
#define CPUTYPE_I586 5
#define CPUTYPE_I686 6
+#ifdef ARCH_X86_64
+# define REGa rax
+# define REGb rbx
+# define REGSP rsp
+# define REG_a "rax"
+# define REG_b "rbx"
+# define REG_c "rcx"
+# define REG_d "rdx"
+# define REG_S "rsi"
+# define REG_D "rdi"
+# define REG_SP "rsp"
+# define REG_BP "rbp"
+#else
+# define REGa eax
+# define REGb ebx
+# define REGSP esp
+# define REG_a "eax"
+# define REG_b "ebx"
+# define REG_c "ecx"
+# define REG_d "edx"
+# define REG_S "esi"
+# define REG_D "edi"
+# define REG_SP "esp"
+# define REG_BP "ebp"
+#endif
+
typedef struct cpucaps_s {
int cpuType;
int cpuStepping;
diff --git a/libmpcodecs/pullup.c b/libmpcodecs/pullup.c
index 2abaa158a7..3627084cb5 100644
--- a/libmpcodecs/pullup.c
+++ b/libmpcodecs/pullup.c
@@ -8,6 +8,7 @@
+#ifdef ARCH_X86
#ifdef HAVE_MMX
static int diff_y_mmx(unsigned char *a, unsigned char *b, int s)
{
@@ -147,6 +148,7 @@ static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s)
return ret;
}
#endif
+#endif
#define ABS(a) (((a)^((a)>>31))-((a)>>31))
@@ -682,12 +684,14 @@ void pullup_init_context(struct pullup_context *c)
case PULLUP_FMT_Y:
c->diff = diff_y;
c->comb = licomb_y;
+#ifdef ARCH_X86
#ifdef HAVE_MMX
if (c->cpu & PULLUP_CPU_MMX) {
c->diff = diff_y_mmx;
c->comb = licomb_y_mmx;
}
#endif
+#endif
/* c->comb = qpcomb_y; */
break;
#if 0
diff --git a/libmpcodecs/vf_decimate.c b/libmpcodecs/vf_decimate.c
index 7cc8d2af76..1a80e9dd07 100644
--- a/libmpcodecs/vf_decimate.c
+++ b/libmpcodecs/vf_decimate.c
@@ -31,11 +31,11 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
".balign 16 \n\t"
"1: \n\t"
- "movq (%%esi), %%mm0 \n\t"
- "movq (%%esi), %%mm2 \n\t"
- "addl %%eax, %%esi \n\t"
- "movq (%%edi), %%mm1 \n\t"
- "addl %%ebx, %%edi \n\t"
+ "movq (%%"REG_S"), %%mm0 \n\t"
+ "movq (%%"REG_S"), %%mm2 \n\t"
+ "add %%"REG_a", %%"REG_S" \n\t"
+ "movq (%%"REG_D"), %%mm1 \n\t"
+ "add %%"REG_b", %%"REG_D" \n\t"
"psubusb %%mm1, %%mm2 \n\t"
"psubusb %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm0 \n\t"
@@ -51,10 +51,10 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
"decl %%ecx \n\t"
"jnz 1b \n\t"
- "movq %%mm4, (%%edx) \n\t"
+ "movq %%mm4, (%%"REG_d") \n\t"
"emms \n\t"
:
- : "S" (old), "D" (new), "a" (os), "b" (ns), "d" (out)
+ : "S" (old), "D" (new), "a" ((long)os), "b" ((long)ns), "d" (out)
: "memory"
);
return out[0]+out[1]+out[2]+out[3];
diff --git a/libmpcodecs/vf_divtc.c b/libmpcodecs/vf_divtc.c
index e17600edbd..d3f287a73d 100644
--- a/libmpcodecs/vf_divtc.c
+++ b/libmpcodecs/vf_divtc.c
@@ -44,11 +44,11 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
".balign 16 \n\t"
"1: \n\t"
- "movq (%%esi), %%mm0 \n\t"
- "movq (%%esi), %%mm2 \n\t"
- "addl %%eax, %%esi \n\t"
- "movq (%%edi), %%mm1 \n\t"
- "addl %%ebx, %%edi \n\t"
+ "movq (%%"REG_S"), %%mm0 \n\t"
+ "movq (%%"REG_S"), %%mm2 \n\t"
+ "add %%"REG_a", %%"REG_S" \n\t"
+ "movq (%%"REG_D"), %%mm1 \n\t"
+ "add %%"REG_b", %%"REG_D" \n\t"
"psubusb %%mm1, %%mm2 \n\t"
"psubusb %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm0 \n\t"
@@ -64,10 +64,10 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
"decl %%ecx \n\t"
"jnz 1b \n\t"
- "movq %%mm4, (%%edx) \n\t"
+ "movq %%mm4, (%%"REG_d") \n\t"
"emms \n\t"
:
- : "S" (old), "D" (new), "a" (os), "b" (ns), "d" (out)
+ : "S" (old), "D" (new), "a" ((long)os), "b" ((long)ns), "d" (out)
: "memory"
);
return out[0]+out[1]+out[2]+out[3];
diff --git a/libmpcodecs/vf_eq.c b/libmpcodecs/vf_eq.c
index 74395f61c6..d7adeea1ff 100644
--- a/libmpcodecs/vf_eq.c
+++ b/libmpcodecs/vf_eq.c
@@ -64,9 +64,9 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in
"paddw %%mm3, %%mm1 \n\t"
"paddw %%mm3, %%mm2 \n\t"
"packuswb %%mm2, %%mm1 \n\t"
- "addl $8, %0 \n\t"
+ "add $8, %0 \n\t"
"movq %%mm1, (%1) \n\t"
- "addl $8, %1 \n\t"
+ "add $8, %1 \n\t"
"decl %%eax \n\t"
"jnz 1b \n\t"
: "=r" (src), "=r" (dest)
diff --git a/libmpcodecs/vf_eq2.c b/libmpcodecs/vf_eq2.c
index 123bcea55e..f2641f9c0b 100644
--- a/libmpcodecs/vf_eq2.c
+++ b/libmpcodecs/vf_eq2.c
@@ -152,9 +152,9 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
"paddw %%mm3, %%mm1 \n\t"
"paddw %%mm3, %%mm2 \n\t"
"packuswb %%mm2, %%mm1 \n\t"
- "addl $8, %0 \n\t"
+ "add $8, %0 \n\t"
"movq %%mm1, (%1) \n\t"
- "addl $8, %1 \n\t"
+ "add $8, %1 \n\t"
"decl %%eax \n\t"
"jnz 1b \n\t"
: "=r" (src), "=r" (dst)
diff --git a/libmpcodecs/vf_filmdint.c b/libmpcodecs/vf_filmdint.c
index 27a527cac3..90e25b8423 100644
--- a/libmpcodecs/vf_filmdint.c
+++ b/libmpcodecs/vf_filmdint.c
@@ -406,8 +406,8 @@ block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
"psllq $16, %%mm0\n\t" \
"paddusw %%mm0, %%mm7\n\t" \
"movq (%1), %%mm4\n\t" \
- "leal (%0,%2,2), %0\n\t" \
- "leal (%1,%3,2), %1\n\t" \
+ "lea (%0,%2,2), %0\n\t" \
+ "lea (%1,%3,2), %1\n\t" \
"psubusb %4, %%mm4\n\t" \
PAVGB(%%mm2, %%mm4) \
PAVGB(%%mm2, %%mm4) /* mm4 = qup odd */ \
@@ -440,7 +440,7 @@ block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
"paddusw %%mm2, %%mm7\n\t" \
"paddusw %%mm1, %%mm7\n\t" \
: "=r" (a), "=r" (b) \
- : "r"(as), "r"(bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \
+ : "r"((long)as), "r"((long)bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \
); \
} while (--lines);
@@ -650,7 +650,7 @@ dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
"por %%mm3, %%mm1 \n\t" /* avg if >= threshold */
"movq %%mm1, (%2,%4) \n\t"
: /* no output */
- : "r" (a), "r" (bos), "r" (dst), "r" (ss), "r" (ds), "r" (cos)
+ : "r" (a), "r" (bos), "r" (dst), "r" ((long)ss), "r" ((long)ds), "r" (cos)
);
a += 8;
dst += 8;
diff --git a/libmpcodecs/vf_halfpack.c b/libmpcodecs/vf_halfpack.c
index b4fc0e648f..900aed6a9c 100644
--- a/libmpcodecs/vf_halfpack.c
+++ b/libmpcodecs/vf_halfpack.c
@@ -75,13 +75,13 @@ static void halfpack_MMX(unsigned char *dst, unsigned char *src[3],
"por %%mm5, %%mm1 \n\t"
"por %%mm6, %%mm2 \n\t"
- "addl $8, %0 \n\t"
- "addl $8, %1 \n\t"
- "addl $4, %2 \n\t"
- "addl $4, %3 \n\t"
+ "add $8, %0 \n\t"
+ "add $8, %1 \n\t"
+ "add $4, %2 \n\t"
+ "add $4, %3 \n\t"
"movq %%mm1, (%8) \n\t"
"movq %%mm2, 8(%8) \n\t"
- "addl $16, %8 \n\t"
+ "add $16, %8 \n\t"
"decl %9 \n\t"
"jnz 1b \n\t"
: "=r" (y1), "=r" (y2), "=r" (u), "=r" (v)
diff --git a/libmpcodecs/vf_ilpack.c b/libmpcodecs/vf_ilpack.c
index 66bad26de4..43c6bad218 100644
--- a/libmpcodecs/vf_ilpack.c
+++ b/libmpcodecs/vf_ilpack.c
@@ -76,12 +76,12 @@ static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
"punpcklbw %%mm4, %%mm1 \n\t"
"punpckhbw %%mm4, %%mm2 \n\t"
- "addl $8, %0 \n\t"
- "addl $4, %1 \n\t"
- "addl $4, %2 \n\t"
+ "add $8, %0 \n\t"
+ "add $4, %1 \n\t"
+ "add $4, %2 \n\t"
"movq %%mm1, (%3) \n\t"
"movq %%mm2, 8(%3) \n\t"
- "addl $16, %3 \n\t"
+ "add $16, %3 \n\t"
"decl %4 \n\t"
"jnz 1b \n\t"
"emms \n\t"
@@ -96,22 +96,26 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
unsigned char *u, unsigned char *v, int w, int us, int vs)
{
asm volatile (""
- "pushl %%ebp \n\t"
- "movl 4(%%edx), %%ebp \n\t"
- "movl (%%edx), %%edx \n\t"
+ "push %%"REG_BP" \n\t"
+#ifdef ARCH_X86_64
+ "mov %6, %%"REG_BP" \n\t"
+#else
+ "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
+ "movl (%%"REG_d"), %%"REG_d" \n\t"
+#endif
"pxor %%mm0, %%mm0 \n\t"
".balign 16 \n\t"
".Lli0: \n\t"
- "movq (%%esi), %%mm1 \n\t"
- "movq (%%esi), %%mm2 \n\t"
+ "movq (%%"REG_S"), %%mm1 \n\t"
+ "movq (%%"REG_S"), %%mm2 \n\t"
- "movq (%%eax,%%edx,2), %%mm4 \n\t"
- "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
+ "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
+ "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
"punpcklbw %%mm0, %%mm4 \n\t"
"punpcklbw %%mm0, %%mm6 \n\t"
- "movq (%%eax), %%mm3 \n\t"
- "movq (%%ebx), %%mm5 \n\t"
+ "movq (%%"REG_a"), %%mm3 \n\t"
+ "movq (%%"REG_b"), %%mm5 \n\t"
"punpcklbw %%mm0, %%mm3 \n\t"
"punpcklbw %%mm0, %%mm5 \n\t"
"paddw %%mm3, %%mm4 \n\t"
@@ -136,18 +140,18 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
"punpcklbw %%mm4, %%mm1 \n\t"
"punpckhbw %%mm4, %%mm2 \n\t"
- "movq %%mm1, (%%edi) \n\t"
- "movq %%mm2, 8(%%edi) \n\t"
+ "movq %%mm1, (%%"REG_D") \n\t"
+ "movq %%mm2, 8(%%"REG_D") \n\t"
- "movq 8(%%esi), %%mm1 \n\t"
- "movq 8(%%esi), %%mm2 \n\t"
+ "movq 8(%%"REG_S"), %%mm1 \n\t"
+ "movq 8(%%"REG_S"), %%mm2 \n\t"
- "movq (%%eax,%%edx,2), %%mm4 \n\t"
- "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
+ "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
+ "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
"punpckhbw %%mm0, %%mm4 \n\t"
"punpckhbw %%mm0, %%mm6 \n\t"
- "movq (%%eax), %%mm3 \n\t"
- "movq (%%ebx), %%mm5 \n\t"
+ "movq (%%"REG_a"), %%mm3 \n\t"
+ "movq (%%"REG_b"), %%mm5 \n\t"
"punpckhbw %%mm0, %%mm3 \n\t"
"punpckhbw %%mm0, %%mm5 \n\t"
"paddw %%mm3, %%mm4 \n\t"
@@ -172,20 +176,25 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
"punpcklbw %%mm4, %%mm1 \n\t"
"punpckhbw %%mm4, %%mm2 \n\t"
- "addl $16, %%esi \n\t"
- "addl $8, %%eax \n\t"
- "addl $8, %%ebx \n\t"
+ "add $16, %%"REG_S" \n\t"
+ "add $8, %%"REG_a" \n\t"
+ "add $8, %%"REG_b" \n\t"
- "movq %%mm1, 16(%%edi) \n\t"
- "movq %%mm2, 24(%%edi) \n\t"
- "addl $32, %%edi \n\t"
+ "movq %%mm1, 16(%%"REG_D") \n\t"
+ "movq %%mm2, 24(%%"REG_D") \n\t"
+ "add $32, %%"REG_D" \n\t"
"decl %%ecx \n\t"
"jnz .Lli0 \n\t"
"emms \n\t"
- "popl %%ebp \n\t"
+ "pop %%"REG_BP" \n\t"
:
- : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16)
+ : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
+#ifdef ARCH_X86_64
+ "d" ((long)us), "r" ((long)vs)
+#else
+ "d" (&us)
+#endif
: "memory"
);
pack_li_0_C(dst, y, u, v, (w&15), us, vs);
@@ -195,22 +204,26 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
unsigned char *u, unsigned char *v, int w, int us, int vs)
{
asm volatile (""
- "pushl %%ebp \n\t"
- "movl 4(%%edx), %%ebp \n\t"
- "movl (%%edx), %%edx \n\t"
+ "push %%"REG_BP" \n\t"
+#ifdef ARCH_X86_64
+ "mov %6, %%"REG_BP" \n\t"
+#else
+ "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
+ "movl (%%"REG_d"), %%"REG_d" \n\t"
+#endif
"pxor %%mm0, %%mm0 \n\t"
".balign 16 \n\t"
".Lli1: \n\t"
- "movq (%%esi), %%mm1 \n\t"
- "movq (%%esi), %%mm2 \n\t"
+ "movq (%%"REG_S"), %%mm1 \n\t"
+ "movq (%%"REG_S"), %%mm2 \n\t"
- "movq (%%eax,%%edx,2), %%mm4 \n\t"
- "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
+ "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
+ "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
"punpcklbw %%mm0, %%mm4 \n\t"
"punpcklbw %%mm0, %%mm6 \n\t"
- "movq (%%eax), %%mm3 \n\t"
- "movq (%%ebx), %%mm5 \n\t"
+ "movq (%%"REG_a"), %%mm3 \n\t"
+ "movq (%%"REG_b"), %%mm5 \n\t"
"punpcklbw %%mm0, %%mm3 \n\t"
"punpcklbw %%mm0, %%mm5 \n\t"
"movq %%mm4, %%mm7 \n\t"
@@ -237,18 +250,18 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
"punpcklbw %%mm4, %%mm1 \n\t"
"punpckhbw %%mm4, %%mm2 \n\t"
- "movq %%mm1, (%%edi) \n\t"
- "movq %%mm2, 8(%%edi) \n\t"
+ "movq %%mm1, (%%"REG_D") \n\t"
+ "movq %%mm2, 8(%%"REG_D") \n\t"
- "movq 8(%%esi), %%mm1 \n\t"
- "movq 8(%%esi), %%mm2 \n\t"
+ "movq 8(%%"REG_S"), %%mm1 \n\t"
+ "movq 8(%%"REG_S"), %%mm2 \n\t"
- "movq (%%eax,%%edx,2), %%mm4 \n\t"
- "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
+ "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
+ "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
"punpckhbw %%mm0, %%mm4 \n\t"
"punpckhbw %%mm0, %%mm6 \n\t"
- "movq (%%eax), %%mm3 \n\t"
- "movq (%%ebx), %%mm5 \n\t"
+ "movq (%%"REG_a"), %%mm3 \n\t"
+ "movq (%%"REG_b"), %%mm5 \n\t"
"punpckhbw %%mm0, %%mm3 \n\t"
"punpckhbw %%mm0, %%mm5 \n\t"
"movq %%mm4, %%mm7 \n\t"
@@ -275,20 +288,25 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
"punpcklbw %%mm4, %%mm1 \n\t"
"punpckhbw %%mm4, %%mm2 \n\t"
- "addl $16, %%esi \n\t"
- "addl $8, %%eax \n\t"
- "addl $8, %%ebx \n\t"
+ "add $16, %%"REG_S" \n\t"
+ "add $8, %%"REG_a" \n\t"
+ "add $8, %%"REG_b" \n\t"
- "movq %%mm1, 16(%%edi) \n\t"
- "movq %%mm2, 24(%%edi) \n\t"
- "addl $32, %%edi \n\t"
+ "movq %%mm1, 16(%%"REG_D") \n\t"
+ "movq %%mm2, 24(%%"REG_D") \n\t"
+ "add $32, %%"REG_D" \n\t"
"decl %%ecx \n\t"
"jnz .Lli1 \n\t"
"emms \n\t"
- "popl %%ebp \n\t"
+ "pop %%"REG_BP" \n\t"
:
- : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16)
+ : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
+#ifdef ARCH_X86_64
+ "d" ((long)us), "r" ((long)vs)
+#else
+ "d" (&us)
+#endif
: "memory"
);
pack_li_1_C(dst, y, u, v, (w&15), us, vs);
diff --git a/libmpcodecs/vf_ivtc.c b/libmpcodecs/vf_ivtc.c
index 804f68a084..3fb00e5f7b 100644
--- a/libmpcodecs/vf_ivtc.c
+++ b/libmpcodecs/vf_ivtc.c
@@ -71,11 +71,11 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
"1: \n\t"
// Even difference
- "movq (%%esi), %%mm0 \n\t"
- "movq (%%esi), %%mm2 \n\t"
- "addl %%eax, %%esi \n\t"
- "movq (%%edi), %%mm1 \n\t"
- "addl %%ebx, %%edi \n\t"
+ "movq (%%"REG_S"), %%mm0 \n\t"
+ "movq (%%"REG_S"), %%mm2 \n\t"
+ "add %%"REG_a", %%"REG_S" \n\t"
+ "movq (%%"REG_D"), %%mm1 \n\t"
+ "add %%"REG_b", %%"REG_D" \n\t"
"psubusb %%mm1, %%mm2 \n\t"
"psubusb %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm0 \n\t"
@@ -90,11 +90,11 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
"paddw %%mm3, %%mm4 \n\t"
// Odd difference
- "movq (%%esi), %%mm0 \n\t"
- "movq (%%esi), %%mm2 \n\t"
- "addl %%eax, %%esi \n\t"
- "movq (%%edi), %%mm1 \n\t"
- "addl %%ebx, %%edi \n\t"
+ "movq (%%"REG_S"), %%mm0 \n\t"
+ "movq (%%"REG_S"), %%mm2 \n\t"
+ "add %%"REG_a", %%"REG_S" \n\t"
+ "movq (%%"REG_D"), %%mm1 \n\t"
+ "add %%"REG_b", %%"REG_D" \n\t"
"psubusb %%mm1, %%mm2 \n\t"
"psubusb %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm0 \n\t"
@@ -110,8 +110,8 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
"decl %%ecx \n\t"
"jnz 1b \n\t"
- "movq %%mm4, (%%edx) \n\t"
- "movq %%mm5, 8(%%edx) \n\t"
+ "movq %%mm4, (%%"REG_d") \n\t"
+ "movq %%mm5, 8(%%"REG_d") \n\t"
:
: "S" (old), "D" (new), "a" (os), "b" (ns), "d" (out)
: "memory"
@@ -130,14 +130,14 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
".balign 16 \n\t"
"2: \n\t"
- "movq (%%esi), %%mm0 \n\t"
- "movq (%%esi,%%eax), %%mm1 \n\t"
- "addl %%eax, %%esi \n\t"
- "addl %%eax, %%esi \n\t"
- "movq (%%edi), %%mm2 \n\t"
- "movq (%%edi,%%ebx), %%mm3 \n\t"
- "addl %%ebx, %%edi \n\t"
- "addl %%ebx, %%edi \n\t"
+ "movq (%%"REG_S"), %%mm0 \n\t"
+ "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
+ "add %%"REG_a", %%"REG_S" \n\t"
+ "add %%"REG_a", %%"REG_S" \n\t"
+ "movq (%%"REG_D"), %%mm2 \n\t"
+ "movq (%%"REG_D",%%"REG_b"), %%mm3 \n\t"
+ "add %%"REG_b", %%"REG_D" \n\t"
+ "add %%"REG_b", %%"REG_D" \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
@@ -164,16 +164,16 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
"psubw %%mm1, %%mm4 \n\t"
"psubw %%mm2, %%mm5 \n\t"
"psubw %%mm3, %%mm6 \n\t"
- "movq %%mm4, (%%edx) \n\t"
- "movq %%mm5, 16(%%edx) \n\t"
- "movq %%mm6, 32(%%edx) \n\t"
+ "movq %%mm4, (%%"REG_d") \n\t"
+ "movq %%mm5, 16(%%"REG_d") \n\t"
+ "movq %%mm6, 32(%%"REG_d") \n\t"
- "movl %%eax, %%ecx \n\t"
- "shll $3, %%ecx \n\t"
- "subl %%ecx, %%esi \n\t"
- "movl %%ebx, %%ecx \n\t"
- "shll $3, %%ecx \n\t"
- "subl %%ecx, %%edi \n\t"
+ "mov %%"REG_a", %%"REG_c" \n\t"
+ "shl $3, %%"REG_c" \n\t"
+ "sub %%"REG_c", %%"REG_S" \n\t"
+ "mov %%"REG_b", %%"REG_c" \n\t"
+ "shl $3, %%"REG_c" \n\t"
+ "sub %%"REG_c", %%"REG_D" \n\t"
// Second loop for the last four columns
"movl $4, %%ecx \n\t"
@@ -184,14 +184,14 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
".balign 16 \n\t"
"3: \n\t"
- "movq (%%esi), %%mm0 \n\t"
- "movq (%%esi,%%eax), %%mm1 \n\t"
- "addl %%eax, %%esi \n\t"
- "addl %%eax, %%esi \n\t"
- "movq (%%edi), %%mm2 \n\t"
- "movq (%%edi,%%ebx), %%mm3 \n\t"
- "addl %%ebx, %%edi \n\t"
- "addl %%ebx, %%edi \n\t"
+ "movq (%%"REG_S"), %%mm0 \n\t"
+ "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
+ "add %%"REG_a", %%"REG_S" \n\t"
+ "add %%"REG_a", %%"REG_S" \n\t"
+ "movq (%%"REG_D"), %%mm2 \n\t"
+ "movq (%%"REG_D",%%"REG_b"), %%mm3 \n\t"
+ "add %%"REG_b", %%"REG_D" \n\t"
+ "add %%"REG_b", %%"REG_D" \n\t"
"punpckhbw %%mm7, %%mm0 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t"
@@ -218,13 +218,13 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
"psubw %%mm1, %%mm4 \n\t"
"psubw %%mm2, %%mm5 \n\t"
"psubw %%mm3, %%mm6 \n\t"
- "movq %%mm4, 8(%%edx) \n\t"
- "movq %%mm5, 24(%%edx) \n\t"
- "movq %%mm6, 40(%%edx) \n\t"
+ "movq %%mm4, 8(%%"REG_d") \n\t"
+ "movq %%mm5, 24(%%"REG_d") \n\t"
+ "movq %%mm6, 40(%%"REG_d") \n\t"
"emms \n\t"
:
- : "S" (old), "D" (new), "a" (os), "b" (ns), "d" (out)
+ : "S" (old), "D" (new), "a" ((long)os), "b" ((long)ns), "d" (out)
: "memory"
);
m->p = m->t = m->s = 0;
diff --git a/libmpcodecs/vf_noise.c b/libmpcodecs/vf_noise.c
index 14ad8f9604..c8f669bffa 100644
--- a/libmpcodecs/vf_noise.c
+++ b/libmpcodecs/vf_noise.c
@@ -143,26 +143,26 @@ static int8_t *initNoise(FilterParam *fp){
#ifdef HAVE_MMX
static inline void lineNoise_MMX(uint8_t *dst, uint8_t *src, int8_t *noise, int len, int shift){
- int mmx_len= len&(~7);
+ long mmx_len= len&(~7);
noise+=shift;
asm volatile(
- "movl %3, %%eax \n\t"
+ "mov %3, %%"REG_a" \n\t"
"pcmpeqb %%mm7, %%mm7 \n\t"
"psllw $15, %%mm7 \n\t"
"packsswb %%mm7, %%mm7 \n\t"
".balign 16 \n\t"
"1: \n\t"
- "movq (%0, %%eax), %%mm0 \n\t"
- "movq (%1, %%eax), %%mm1 \n\t"
+ "movq (%0, %%"REG_a"), %%mm0 \n\t"
+ "movq (%1, %%"REG_a"), %%mm1 \n\t"
"pxor %%mm7, %%mm0 \n\t"
"paddsb %%mm1, %%mm0 \n\t"
"pxor %%mm7, %%mm0 \n\t"
- "movq %%mm0, (%2, %%eax) \n\t"
- "addl $8, %%eax \n\t"
+ "movq %%mm0, (%2, %%"REG_a") \n\t"
+ "add $8, %%"REG_a" \n\t"
" js 1b \n\t"
:: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
- : "%eax"
+ : "%"REG_a
);
if(mmx_len!=len)
lineNoise_C(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
@@ -172,26 +172,26 @@ static inline void lineNoise_MMX(uint8_t *dst, uint8_t *src, int8_t *noise, int
//duplicate of previous except movntq
#ifdef HAVE_MMX2
static inline void lineNoise_MMX2(uint8_t *dst, uint8_t *src, int8_t *noise, int len, int shift){
- int mmx_len= len&(~7);
+ long mmx_len= len&(~7);
noise+=shift;
asm volatile(
- "movl %3, %%eax \n\t"
+ "mov %3, %%"REG_a" \n\t"
"pcmpeqb %%mm7, %%mm7 \n\t"
"psllw $15, %%mm7 \n\t"
"packsswb %%mm7, %%mm7 \n\t"
".balign 16 \n\t"
"1: \n\t"
- "movq (%0, %%eax), %%mm0 \n\t"
- "movq (%1, %%eax), %%mm1 \n\t"
+ "movq (%0, %%"REG_a"), %%mm0 \n\t"
+ "movq (%1, %%"REG_a"), %%mm1 \n\t"
"pxor %%mm7, %%mm0 \n\t"
"paddsb %%mm1, %%mm0 \n\t"
"pxor %%mm7, %%mm0 \n\t"
- "movntq %%mm0, (%2, %%eax) \n\t"
- "addl $8, %%eax \n\t"
+ "movntq %%mm0, (%2, %%"REG_a") \n\t"
+ "add $8, %%"REG_a" \n\t"
" js 1b \n\t"
:: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
- : "%eax"
+ : "%"REG_a
);
if(mmx_len!=len)
lineNoise_C(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
@@ -214,16 +214,16 @@ static inline void lineNoise_C(uint8_t *dst, uint8_t *src, int8_t *noise, int le
#ifdef HAVE_MMX
static inline void lineNoiseAvg_MMX(uint8_t *dst, uint8_t *src, int len, int8_t **shift){
- int mmx_len= len&(~7);
+ long mmx_len= len&(~7);
asm volatile(
- "movl %5, %%eax