diff options
-rw-r--r-- | bswap.h | 25 | ||||
-rwxr-xr-x | configure | 37 | ||||
-rw-r--r-- | cpudetect.c | 22 | ||||
-rw-r--r-- | cpudetect.h | 26 | ||||
-rw-r--r-- | libmpcodecs/pullup.c | 4 | ||||
-rw-r--r-- | libmpcodecs/vf_decimate.c | 14 | ||||
-rw-r--r-- | libmpcodecs/vf_divtc.c | 14 | ||||
-rw-r--r-- | libmpcodecs/vf_eq.c | 4 | ||||
-rw-r--r-- | libmpcodecs/vf_eq2.c | 4 | ||||
-rw-r--r-- | libmpcodecs/vf_filmdint.c | 8 | ||||
-rw-r--r-- | libmpcodecs/vf_halfpack.c | 10 | ||||
-rw-r--r-- | libmpcodecs/vf_ilpack.c | 126 | ||||
-rw-r--r-- | libmpcodecs/vf_ivtc.c | 82 | ||||
-rw-r--r-- | libmpcodecs/vf_noise.c | 46 | ||||
-rw-r--r-- | libmpcodecs/vf_spp.c | 6 | ||||
-rw-r--r-- | libmpcodecs/vf_tfields.c | 76 | ||||
-rw-r--r-- | libvo/aclib.c | 7 | ||||
-rw-r--r-- | libvo/aclib_template.c | 70 | ||||
-rw-r--r-- | libvo/osd.c | 24 | ||||
-rw-r--r-- | libvo/osd_template.c | 6 | ||||
-rw-r--r-- | postproc/rgb2rgb.c | 9 | ||||
-rw-r--r-- | postproc/rgb2rgb_template.c | 369 | ||||
-rw-r--r-- | postproc/swscale-example.c | 6 | ||||
-rw-r--r-- | postproc/swscale.c | 96 | ||||
-rw-r--r-- | postproc/swscale_template.c | 844 | ||||
-rw-r--r-- | postproc/yuv2rgb.c | 7 | ||||
-rw-r--r-- | postproc/yuv2rgb_template.c | 24 |
27 files changed, 1019 insertions, 947 deletions
@@ -7,17 +7,23 @@ #include <inttypes.h> -#ifdef ARCH_X86 -static inline unsigned short ByteSwap16(unsigned short x) +#ifdef ARCH_X86_64 +# define LEGACY_REGS "=Q" +#else +# define LEGACY_REGS "=q" +#endif + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +static inline uint16_t ByteSwap16(uint16_t x) { __asm("xchgb %b0,%h0" : - "=q" (x) : + LEGACY_REGS (x) : "0" (x)); return x; } #define bswap_16(x) ByteSwap16(x) -static inline unsigned int ByteSwap32(unsigned int x) +static inline uint32_t ByteSwap32(uint32_t x) { #if __CPU__ > 386 __asm("bswap %0": @@ -26,21 +32,28 @@ static inline unsigned int ByteSwap32(unsigned int x) __asm("xchgb %b0,%h0\n" " rorl $16,%0\n" " xchgb %b0,%h0": - "=q" (x) : + LEGACY_REGS (x) : #endif "0" (x)); return x; } #define bswap_32(x) ByteSwap32(x) -static inline unsigned long long int ByteSwap64(unsigned long long int x) +static inline uint64_t ByteSwap64(uint64_t x) { +#ifdef ARCH_X86_64 + __asm("bswap %0": + "=r" (x) : + "0" (x)); + return x; +#else register union { __extension__ uint64_t __ll; uint32_t __l[2]; } __x; asm("xchgl %0,%1": "=r"(__x.__l[0]),"=r"(__x.__l[1]): "0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32)))); return __x.__ll; +#endif } #define bswap_64(x) ByteSwap64(x) @@ -456,7 +456,14 @@ if test -z "$_target" ; then case "`( uname -m ) 2>&1`" in i[3-9]86*|x86|x86pc|k5|k6|k6_2|k6_3|k6-2|k6-3|pentium*|athlon*|i586_i686|i586-i686|BePC) host_arch=i386 ;; ia64) host_arch=ia64 ;; - x86_64|amd64) host_arch=x86_64 ;; + x86_64|amd64) + if [ "`$_cc -dumpmachine | grep x86_64 | cut -d- -f1`" = "x86_64" -a \ + -z "`echo $CFLAGS | grep -- -m32`" ]; then + host_arch=x86_64 + else + host_arch=i386 + fi + ;; macppc|ppc) host_arch=ppc ;; alpha) host_arch=alpha ;; sparc) host_arch=sparc ;; @@ -672,17 +679,8 @@ elif x86; then _cpuinfo="TOOLS/cpuinfo" fi -case "$host_arch" in - i[3-9]86|x86|x86pc|k5|k6|k6-2|k6-3|pentium*|athlon*|i586-i686) - _def_arch="#define ARCH_X86 1" - _target_arch="TARGET_ARCH_X86 = yes" - - pname=`$_cpuinfo | grep 'model name' | cut -d ':' -f 2 | head -1` - pvendor=`$_cpuinfo | grep 'vendor_id' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1` - pfamily=`$_cpuinfo | grep 'cpu family' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1` - pmodel=`$_cpuinfo | grep -v 'model name' | grep 'model' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1` - pstepping=`$_cpuinfo | grep 'stepping' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1` - +x86_exts_check() +{ pparam=`$_cpuinfo | grep 'features' | cut -d ':' -f 2 | head -1` if test -z "$pparam" ; then pparam=`$_cpuinfo | grep 'flags' | cut -d ':' -f 2 | head -1` @@ -707,6 +705,20 @@ case "$host_arch" in sse2) _sse2=yes ;; esac done +} + +case "$host_arch" in + i[3-9]86|x86|x86pc|k5|k6|k6-2|k6-3|pentium*|athlon*|i586-i686) + _def_arch="#define ARCH_X86 1" + _target_arch="TARGET_ARCH_X86 = yes" + + pname=`$_cpuinfo | grep 'model name' | cut -d ':' -f 2 | head -1` + pvendor=`$_cpuinfo | grep 'vendor_id' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1` + pfamily=`$_cpuinfo | grep 'cpu family' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1` + pmodel=`$_cpuinfo | grep -v 'model name' | grep 'model' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1` + pstepping=`$_cpuinfo | grep 'stepping' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1` + + x86_exts_check echocheck "CPU vendor" echores "$pvendor ($pfamily:$pmodel:$pstepping)" @@ -904,6 +916,7 @@ EOF _march='' _mcpu='' _optimizing='' + x86_exts_check ;; sparc) diff --git a/cpudetect.c b/cpudetect.c index 9e05236319..65d7e5ed6d 100644 --- a/cpudetect.c +++ b/cpudetect.c @@ -9,7 +9,7 @@ CpuCaps gCpuCaps; #endif #include <stdlib.h> -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #include <stdio.h> #include <string.h> @@ -47,25 +47,25 @@ static void check_os_katmai_support( void ); // return TRUE if cpuid supported static int has_cpuid() { - int a, c; + long a, c; // code from libavcodec: __asm__ __volatile__ ( /* See if CPUID instruction is supported ... */ /* ... Get copies of EFLAGS into eax and ecx */ "pushf\n\t" - "popl %0\n\t" - "movl %0, %1\n\t" + "pop %0\n\t" + "mov %0, %1\n\t" /* ... Toggle the ID bit in one copy and store */ /* to the EFLAGS reg */ - "xorl $0x200000, %0\n\t" + "xor $0x200000, %0\n\t" "push %0\n\t" "popf\n\t" /* ... Get the (hopefully modified) EFLAGS */ "pushf\n\t" - "popl %0\n\t" + "pop %0\n\t" : "=a" (a), "=c" (c) : : "cc" @@ -87,9 +87,9 @@ do_cpuid(unsigned int ax, unsigned int *p) #else // code from libavcodec: __asm __volatile - ("movl %%ebx, %%esi\n\t" + ("mov %%"REG_b", %%"REG_S"\n\t" "cpuid\n\t" - "xchgl %%ebx, %%esi" + "xchg %%"REG_b", %%"REG_S : "=a" (p[0]), "=S" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax)); @@ -456,7 +456,7 @@ static void check_os_katmai_support( void ) gCpuCaps.hasSSE=0; #endif /* __linux__ */ } -#else /* ARCH_X86 */ +#else /* ARCH_X86 || ARCH_X86_64 */ #ifdef SYS_DARWIN #include <sys/sysctl.h> @@ -536,10 +536,6 @@ void GetCpuCaps( CpuCaps *caps) mp_msg(MSGT_CPUDETECT,MSGL_INFO,"CPU: Intel Itanium\n"); #endif -#ifdef ARCH_X86_64 - mp_msg(MSGT_CPUDETECT,MSGL_INFO,"CPU: Advanced Micro Devices 64-bit CPU\n"); -#endif - #ifdef ARCH_SPARC mp_msg(MSGT_CPUDETECT,MSGL_INFO,"CPU: Sun Sparc\n"); #endif diff --git a/cpudetect.h b/cpudetect.h index c178eb7b39..997ec11195 100644 --- a/cpudetect.h +++ b/cpudetect.h @@ -6,6 +6,32 @@ #define CPUTYPE_I586 5 #define CPUTYPE_I686 6 +#ifdef ARCH_X86_64 +# define REGa rax +# define REGb rbx +# define REGSP rsp +# define REG_a "rax" +# define REG_b "rbx" +# define REG_c "rcx" +# define REG_d "rdx" +# define REG_S "rsi" +# define REG_D "rdi" +# define REG_SP "rsp" +# define REG_BP "rbp" +#else +# define REGa eax +# define REGb ebx +# define REGSP esp +# define REG_a "eax" +# define REG_b "ebx" +# define REG_c "ecx" +# define REG_d "edx" +# define REG_S "esi" +# define REG_D "edi" +# define REG_SP "esp" +# define REG_BP "ebp" +#endif + typedef struct cpucaps_s { int cpuType; int cpuStepping; diff --git a/libmpcodecs/pullup.c b/libmpcodecs/pullup.c index 2abaa158a7..3627084cb5 100644 --- a/libmpcodecs/pullup.c +++ b/libmpcodecs/pullup.c @@ -8,6 +8,7 @@ +#ifdef ARCH_X86 #ifdef HAVE_MMX static int diff_y_mmx(unsigned char *a, unsigned char *b, int s) { @@ -147,6 +148,7 @@ static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s) return ret; } #endif +#endif #define ABS(a) (((a)^((a)>>31))-((a)>>31)) @@ -682,12 +684,14 @@ void pullup_init_context(struct pullup_context *c) case PULLUP_FMT_Y: c->diff = diff_y; c->comb = licomb_y; +#ifdef ARCH_X86 #ifdef HAVE_MMX if (c->cpu & PULLUP_CPU_MMX) { c->diff = diff_y_mmx; c->comb = licomb_y_mmx; } #endif +#endif /* c->comb = qpcomb_y; */ break; #if 0 diff --git a/libmpcodecs/vf_decimate.c b/libmpcodecs/vf_decimate.c index 7cc8d2af76..1a80e9dd07 100644 --- a/libmpcodecs/vf_decimate.c +++ b/libmpcodecs/vf_decimate.c @@ -31,11 +31,11 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns) ".balign 16 \n\t" "1: \n\t" - "movq (%%esi), %%mm0 \n\t" - "movq (%%esi), %%mm2 \n\t" - "addl %%eax, %%esi \n\t" - "movq (%%edi), %%mm1 \n\t" - "addl %%ebx, %%edi \n\t" + "movq (%%"REG_S"), %%mm0 \n\t" + "movq (%%"REG_S"), %%mm2 \n\t" + "add %%"REG_a", %%"REG_S" \n\t" + "movq (%%"REG_D"), %%mm1 \n\t" + "add %%"REG_b", %%"REG_D" \n\t" "psubusb %%mm1, %%mm2 \n\t" "psubusb %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm0 \n\t" @@ -51,10 +51,10 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns) "decl %%ecx \n\t" "jnz 1b \n\t" - "movq %%mm4, (%%edx) \n\t" + "movq %%mm4, (%%"REG_d") \n\t" "emms \n\t" : - : "S" (old), "D" (new), "a" (os), "b" (ns), "d" (out) + : "S" (old), "D" (new), "a" ((long)os), "b" ((long)ns), "d" (out) : "memory" ); return out[0]+out[1]+out[2]+out[3]; diff --git a/libmpcodecs/vf_divtc.c b/libmpcodecs/vf_divtc.c index e17600edbd..d3f287a73d 100644 --- a/libmpcodecs/vf_divtc.c +++ b/libmpcodecs/vf_divtc.c @@ -44,11 +44,11 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns) ".balign 16 \n\t" "1: \n\t" - "movq (%%esi), %%mm0 \n\t" - "movq (%%esi), %%mm2 \n\t" - "addl %%eax, %%esi \n\t" - "movq (%%edi), %%mm1 \n\t" - "addl %%ebx, %%edi \n\t" + "movq (%%"REG_S"), %%mm0 \n\t" + "movq (%%"REG_S"), %%mm2 \n\t" + "add %%"REG_a", %%"REG_S" \n\t" + "movq (%%"REG_D"), %%mm1 \n\t" + "add %%"REG_b", %%"REG_D" \n\t" "psubusb %%mm1, %%mm2 \n\t" "psubusb %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm0 \n\t" @@ -64,10 +64,10 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns) "decl %%ecx \n\t" "jnz 1b \n\t" - "movq %%mm4, (%%edx) \n\t" + "movq %%mm4, (%%"REG_d") \n\t" "emms \n\t" : - : "S" (old), "D" (new), "a" (os), "b" (ns), "d" (out) + : "S" (old), "D" (new), "a" ((long)os), "b" ((long)ns), "d" (out) : "memory" ); return out[0]+out[1]+out[2]+out[3]; diff --git a/libmpcodecs/vf_eq.c b/libmpcodecs/vf_eq.c index 74395f61c6..d7adeea1ff 100644 --- a/libmpcodecs/vf_eq.c +++ b/libmpcodecs/vf_eq.c @@ -64,9 +64,9 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in "paddw %%mm3, %%mm1 \n\t" "paddw %%mm3, %%mm2 \n\t" "packuswb %%mm2, %%mm1 \n\t" - "addl $8, %0 \n\t" + "add $8, %0 \n\t" "movq %%mm1, (%1) \n\t" - "addl $8, %1 \n\t" + "add $8, %1 \n\t" "decl %%eax \n\t" "jnz 1b \n\t" : "=r" (src), "=r" (dest) diff --git a/libmpcodecs/vf_eq2.c b/libmpcodecs/vf_eq2.c index 123bcea55e..f2641f9c0b 100644 --- a/libmpcodecs/vf_eq2.c +++ b/libmpcodecs/vf_eq2.c @@ -152,9 +152,9 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src, "paddw %%mm3, %%mm1 \n\t" "paddw %%mm3, %%mm2 \n\t" "packuswb %%mm2, %%mm1 \n\t" - "addl $8, %0 \n\t" + "add $8, %0 \n\t" "movq %%mm1, (%1) \n\t" - "addl $8, %1 \n\t" + "add $8, %1 \n\t" "decl %%eax \n\t" "jnz 1b \n\t" : "=r" (src), "=r" (dst) diff --git a/libmpcodecs/vf_filmdint.c b/libmpcodecs/vf_filmdint.c index 27a527cac3..90e25b8423 100644 --- a/libmpcodecs/vf_filmdint.c +++ b/libmpcodecs/vf_filmdint.c @@ -406,8 +406,8 @@ block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs, "psllq $16, %%mm0\n\t" \ "paddusw %%mm0, %%mm7\n\t" \ "movq (%1), %%mm4\n\t" \ - "leal (%0,%2,2), %0\n\t" \ - "leal (%1,%3,2), %1\n\t" \ + "lea (%0,%2,2), %0\n\t" \ + "lea (%1,%3,2), %1\n\t" \ "psubusb %4, %%mm4\n\t" \ PAVGB(%%mm2, %%mm4) \ PAVGB(%%mm2, %%mm4) /* mm4 = qup odd */ \ @@ -440,7 +440,7 @@ block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs, "paddusw %%mm2, %%mm7\n\t" \ "paddusw %%mm1, %%mm7\n\t" \ : "=r" (a), "=r" (b) \ - : "r"(as), "r"(bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \ + : "r"((long)as), "r"((long)bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \ ); \ } while (--lines); @@ -650,7 +650,7 @@ dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos, "por %%mm3, %%mm1 \n\t" /* avg if >= threshold */ "movq %%mm1, (%2,%4) \n\t" : /* no output */ - : "r" (a), "r" (bos), "r" (dst), "r" (ss), "r" (ds), "r" (cos) + : "r" (a), "r" (bos), "r" (dst), "r" ((long)ss), "r" ((long)ds), "r" (cos) ); a += 8; dst += 8; diff --git a/libmpcodecs/vf_halfpack.c b/libmpcodecs/vf_halfpack.c index b4fc0e648f..900aed6a9c 100644 --- a/libmpcodecs/vf_halfpack.c +++ b/libmpcodecs/vf_halfpack.c @@ -75,13 +75,13 @@ static void halfpack_MMX(unsigned char *dst, unsigned char *src[3], "por %%mm5, %%mm1 \n\t" "por %%mm6, %%mm2 \n\t" - "addl $8, %0 \n\t" - "addl $8, %1 \n\t" - "addl $4, %2 \n\t" - "addl $4, %3 \n\t" + "add $8, %0 \n\t" + "add $8, %1 \n\t" + "add $4, %2 \n\t" + "add $4, %3 \n\t" "movq %%mm1, (%8) \n\t" "movq %%mm2, 8(%8) \n\t" - "addl $16, %8 \n\t" + "add $16, %8 \n\t" "decl %9 \n\t" "jnz 1b \n\t" : "=r" (y1), "=r" (y2), "=r" (u), "=r" (v) diff --git a/libmpcodecs/vf_ilpack.c b/libmpcodecs/vf_ilpack.c index 66bad26de4..43c6bad218 100644 --- a/libmpcodecs/vf_ilpack.c +++ b/libmpcodecs/vf_ilpack.c @@ -76,12 +76,12 @@ static void pack_nn_MMX(unsigned char *dst, unsigned char *y, "punpcklbw %%mm4, %%mm1 \n\t" "punpckhbw %%mm4, %%mm2 \n\t" - "addl $8, %0 \n\t" - "addl $4, %1 \n\t" - "addl $4, %2 \n\t" + "add $8, %0 \n\t" + "add $4, %1 \n\t" + "add $4, %2 \n\t" "movq %%mm1, (%3) \n\t" "movq %%mm2, 8(%3) \n\t" - "addl $16, %3 \n\t" + "add $16, %3 \n\t" "decl %4 \n\t" "jnz 1b \n\t" "emms \n\t" @@ -96,22 +96,26 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, unsigned char *u, unsigned char *v, int w, int us, int vs) { asm volatile ("" - "pushl %%ebp \n\t" - "movl 4(%%edx), %%ebp \n\t" - "movl (%%edx), %%edx \n\t" + "push %%"REG_BP" \n\t" +#ifdef ARCH_X86_64 + "mov %6, %%"REG_BP" \n\t" +#else + "movl 4(%%"REG_d"), %%"REG_BP" \n\t" + "movl (%%"REG_d"), %%"REG_d" \n\t" +#endif "pxor %%mm0, %%mm0 \n\t" ".balign 16 \n\t" ".Lli0: \n\t" - "movq (%%esi), %%mm1 \n\t" - "movq (%%esi), %%mm2 \n\t" + "movq (%%"REG_S"), %%mm1 \n\t" + "movq (%%"REG_S"), %%mm2 \n\t" - "movq (%%eax,%%edx,2), %%mm4 \n\t" - "movq (%%ebx,%%ebp,2), %%mm6 \n\t" + "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" + "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" "punpcklbw %%mm0, %%mm4 \n\t" "punpcklbw %%mm0, %%mm6 \n\t" - "movq (%%eax), %%mm3 \n\t" - "movq (%%ebx), %%mm5 \n\t" + "movq (%%"REG_a"), %%mm3 \n\t" + "movq (%%"REG_b"), %%mm5 \n\t" "punpcklbw %%mm0, %%mm3 \n\t" "punpcklbw %%mm0, %%mm5 \n\t" "paddw %%mm3, %%mm4 \n\t" @@ -136,18 +140,18 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, "punpcklbw %%mm4, %%mm1 \n\t" "punpckhbw %%mm4, %%mm2 \n\t" - "movq %%mm1, (%%edi) \n\t" - "movq %%mm2, 8(%%edi) \n\t" + "movq %%mm1, (%%"REG_D") \n\t" + "movq %%mm2, 8(%%"REG_D") \n\t" - "movq 8(%%esi), %%mm1 \n\t" - "movq 8(%%esi), %%mm2 \n\t" + "movq 8(%%"REG_S"), %%mm1 \n\t" + "movq 8(%%"REG_S"), %%mm2 \n\t" - "movq (%%eax,%%edx,2), %%mm4 \n\t" - "movq (%%ebx,%%ebp,2), %%mm6 \n\t" + "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" + "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" "punpckhbw %%mm0, %%mm4 \n\t" "punpckhbw %%mm0, %%mm6 \n\t" - "movq (%%eax), %%mm3 \n\t" - "movq (%%ebx), %%mm5 \n\t" + "movq (%%"REG_a"), %%mm3 \n\t" + "movq (%%"REG_b"), %%mm5 \n\t" "punpckhbw %%mm0, %%mm3 \n\t" "punpckhbw %%mm0, %%mm5 \n\t" "paddw %%mm3, %%mm4 \n\t" @@ -172,20 +176,25 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, "punpcklbw %%mm4, %%mm1 \n\t" "punpckhbw %%mm4, %%mm2 \n\t" - "addl $16, %%esi \n\t" - "addl $8, %%eax \n\t" - "addl $8, %%ebx \n\t" + "add $16, %%"REG_S" \n\t" + "add $8, %%"REG_a" \n\t" + "add $8, %%"REG_b" \n\t" - "movq %%mm1, 16(%%edi) \n\t" - "movq %%mm2, 24(%%edi) \n\t" - "addl $32, %%edi \n\t" + "movq %%mm1, 16(%%"REG_D") \n\t" + "movq %%mm2, 24(%%"REG_D") \n\t" + "add $32, %%"REG_D" \n\t" "decl %%ecx \n\t" "jnz .Lli0 \n\t" "emms \n\t" - "popl %%ebp \n\t" + "pop %%"REG_BP" \n\t" : - : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16) + : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16), +#ifdef ARCH_X86_64 + "d" ((long)us), "r" ((long)vs) +#else + "d" (&us) +#endif : "memory" ); pack_li_0_C(dst, y, u, v, (w&15), us, vs); @@ -195,22 +204,26 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, unsigned char *u, unsigned char *v, int w, int us, int vs) { asm volatile ("" - "pushl %%ebp \n\t" - "movl 4(%%edx), %%ebp \n\t" - "movl (%%edx), %%edx \n\t" + "push %%"REG_BP" \n\t" +#ifdef ARCH_X86_64 + "mov %6, %%"REG_BP" \n\t" +#else + "movl 4(%%"REG_d"), %%"REG_BP" \n\t" + "movl (%%"REG_d"), %%"REG_d" \n\t" +#endif "pxor %%mm0, %%mm0 \n\t" ".balign 16 \n\t" ".Lli1: \n\t" - "movq (%%esi), %%mm1 \n\t" - "movq (%%esi), %%mm2 \n\t" + "movq (%%"REG_S"), %%mm1 \n\t" + "movq (%%"REG_S"), %%mm2 \n\t" - "movq (%%eax,%%edx,2), %%mm4 \n\t" - "movq (%%ebx,%%ebp,2), %%mm6 \n\t" + "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" + "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" "punpcklbw %%mm0, %%mm4 \n\t" "punpcklbw %%mm0, %%mm6 \n\t" - "movq (%%eax), %%mm3 \n\t" - "movq (%%ebx), %%mm5 \n\t" + "movq (%%"REG_a"), %%mm3 \n\t" + "movq (%%"REG_b"), %%mm5 \n\t" "punpcklbw %%mm0, %%mm3 \n\t" "punpcklbw %%mm0, %%mm5 \n\t" "movq %%mm4, %%mm7 \n\t" @@ -237,18 +250,18 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, "punpcklbw %%mm4, %%mm1 \n\t" "punpckhbw %%mm4, %%mm2 \n\t" - "movq %%mm1, (%%edi) \n\t" - "movq %%mm2, 8(%%edi) \n\t" + "movq %%mm1, (%%"REG_D") \n\t" + "movq %%mm2, 8(%%"REG_D") \n\t" - "movq 8(%%esi), %%mm1 \n\t" - "movq 8(%%esi), %%mm2 \n\t" + "movq 8(%%"REG_S"), %%mm1 \n\t" + "movq 8(%%"REG_S"), %%mm2 \n\t" - "movq (%%eax,%%edx,2), %%mm4 \n\t" - "movq (%%ebx,%%ebp,2), %%mm6 \n\t" + "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" + "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" "punpckhbw %%mm0, %%mm4 \n\t" "punpckhbw %%mm0, %%mm6 \n\t" - "movq (%%eax), %%mm3 \n\t" - "movq (%%ebx), %%mm5 \n\t" + "movq (%%"REG_a"), %%mm3 \n\t" + "movq (%%"REG_b"), %%mm5 \n\t" "punpckhbw %%mm0, %%mm3 \n\t" "punpckhbw %%mm0, %%mm5 \n\t" "movq %%mm4, %%mm7 \n\t" @@ -275,20 +288,25 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, "punpcklbw %%mm4, %%mm1 \n\t" "punpckhbw %%mm4, %%mm2 \n\t" - "addl $16, %%esi \n\t" - "addl $8, %%eax \n\t" - "addl $8, %%ebx \n\t" + "add $16, %%"REG_S" \n\t" + "add $8, %%"REG_a" \n\t" + "add $8, %%"REG_b" \n\t" - "movq %%mm1, 16(%%edi) \n\t" - "movq %%mm2, 24(%%edi) \n\t" - "addl $32, %%edi \n\t" + "movq %%mm1, 16(%%"REG_D") \n\t" + "movq %%mm2, 24(%%"REG_D") \n\t" + "add $32, %%"REG_D" \n\t" "decl %%ecx \n\t" "jnz .Lli1 \n\t" "emms \n\t" - "popl %%ebp \n\t" + "pop %%"REG_BP" \n\t" : - : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16) + : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16), +#ifdef ARCH_X86_64 + "d" ((long)us), "r" ((long)vs) +#else + "d" (&us) +#endif : "memory" ); pack_li_1_C(dst, y, u, v, (w&15), us, vs); diff --git a/libmpcodecs/vf_ivtc.c b/libmpcodecs/vf_ivtc.c index 804f68a084..3fb00e5f7b 100644 --- a/libmpcodecs/vf_ivtc.c +++ b/libmpcodecs/vf_ivtc.c @@ -71,11 +71,11 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char "1: \n\t" // Even difference - "movq (%%esi), %%mm0 \n\t" - "movq (%%esi), %%mm2 \n\t" - "addl %%eax, %%esi \n\t" - "movq (%%edi), %%mm1 \n\t" - "addl %%ebx, %%edi \n\t" + "movq (%%"REG_S"), %%mm0 \n\t" + "movq (%%"REG_S"), %%mm2 \n\t" + "add %%"REG_a", %%"REG_S" \n\t" + "movq (%%"REG_D"), %%mm1 \n\t" + "add %%"REG_b", %%"REG_D" \n\t" "psubusb %%mm1, %%mm2 \n\t" "psubusb %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm0 \n\t" @@ -90,11 +90,11 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char "paddw %%mm3, %%mm4 \n\t" // Odd difference - "movq (%%esi), %%mm0 \n\t" - "movq (%%esi), %%mm2 \n\t" - "addl %%eax, %%esi \n\t" - "movq (%%edi), %%mm1 \n\t" - "addl %%ebx, %%edi \n\t" + "movq (%%"REG_S"), %%mm0 \n\t" + "movq (%%"REG_S"), %%mm2 \n\t" + "add %%"REG_a", %%"REG_S" \n\t" + "movq (%%"REG_D"), %%mm1 \n\t" + "add %%"REG_b", %%"REG_D" \n\t" "psubusb %%mm1, %%mm2 \n\t" "psubusb %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm0 \n\t" @@ -110,8 +110,8 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char "decl %%ecx \n\t" "jnz 1b \n\t" - "movq %%mm4, (%%edx) \n\t" - "movq %%mm5, 8(%%edx) \n\t" + "movq %%mm4, (%%"REG_d") \n\t" + "movq %%mm5, 8(%%"REG_d") \n\t" : : "S" (old), "D" (new), "a" (os), "b" (ns), "d" (out) : "memory" @@ -130,14 +130,14 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char ".balign 16 \n\t" "2: \n\t" - "movq (%%esi), %%mm0 \n\t" - "movq (%%esi,%%eax), %%mm1 \n\t" - "addl %%eax, %%esi \n\t" - "addl %%eax, %%esi \n\t" - "movq (%%edi), %%mm2 \n\t" - "movq (%%edi,%%ebx), %%mm3 \n\t" - "addl %%ebx, %%edi \n\t" - "addl %%ebx, %%edi \n\t" + "movq (%%"REG_S"), %%mm0 \n\t" + "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t" + "add %%"REG_a", %%"REG_S" \n\t" + "add %%"REG_a", %%"REG_S" \n\t" + "movq (%%"REG_D"), %%mm2 \n\t" + "movq (%%"REG_D",%%"REG_b"), %%mm3 \n\t" + "add %%"REG_b", %%"REG_D" \n\t" + "add %%"REG_b", %%"REG_D" \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" @@ -164,16 +164,16 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char "psubw %%mm1, %%mm4 \n\t" "psubw %%mm2, %%mm5 \n\t" "psubw %%mm3, %%mm6 \n\t" - "movq %%mm4, (%%edx) \n\t" - "movq %%mm5, 16(%%edx) \n\t" - "movq %%mm6, 32(%%edx) \n\t" + "movq %%mm4, (%%"REG_d") \n\t" + "movq %%mm5, 16(%%"REG_d") \n\t" + "movq %%mm6, 32(%%"REG_d") \n\t" - "movl %%eax, %%ecx \n\t" - "shll $3, %%ecx \n\t" - "subl %%ecx, %%esi \n\t" - "movl %%ebx, %%ecx \n\t" - "shll $3, %%ecx \n\t" - "subl %%ecx, %%edi \n\t" + "mov %%"REG_a", %%"REG_c" \n\t" + "shl $3, %%"REG_c" \n\t" + "sub %%"REG_c", %%"REG_S" \n\t" + "mov %%"REG_b", %%"REG_c" \n\t" + "shl $3, %%"REG_c" \n\t" + "sub %%"REG_c", %%"REG_D" \n\t" // Second loop for the last four columns "movl $4, %%ecx \n\t" @@ -184,14 +184,14 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char ".balign 16 \n\t" "3: \n\t" - "movq (%%esi), %%mm0 \n\t" - "movq (%%esi,%%eax), %%mm1 \n\t" - "addl %%eax, %%esi \n\t" - "addl %%eax, %%esi \n\t" - "movq (%%edi), %%mm2 \n\t" - "movq (%%edi,%%ebx), %%mm3 \n\t" - "addl %%ebx, %%edi \n\t" - "addl %%ebx, %%edi \n\t" + "movq (%%"REG_S"), %%mm0 \n\t" + "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t" + "add %%"REG_a", %%"REG_S" \n\t" + "add %%"REG_a", %%"REG_S" \n\t" + "movq (%%"REG_D"), %%mm2 \n\t" + "movq (%%"REG_D",%%"REG_b"), %%mm3 \n\t" + "add %%"REG_b", %%"REG_D" \n\t" + "add %%"REG_b", %%"REG_D" \n\t" "punpckhbw %%mm7, %%mm0 \n\t" "punpckhbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm2 \n\t" @@ -218,13 +218,13 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char "psubw %%mm1, %%mm4 \n\t" "psubw %%mm2, %%mm5 \n\t" "psubw %%mm3, %%mm6 \n\t" - "movq %%mm4, 8(%%edx) \n\t" - "movq %%mm5, 24(%%edx) \n\t" - "movq %%mm6, 40(%%edx) \n\t" + "movq %%mm4, 8(%%"REG_d") \n\t" + "movq %%mm5, 24(%%"REG_d") \n\t" + "movq %%mm6, 40(%%"REG_d") \n\t" "emms \n\t" : - : "S" (old), "D" (new), "a" (os), "b" (ns), "d" (out) + : "S" (old), "D" (new), "a" ((long)os), "b" ((long)ns), "d" (out) : "memory" ); m->p = m->t = m->s = 0; diff --git a/libmpcodecs/vf_noise.c b/libmpcodecs/vf_noise.c index 14ad8f9604..c8f669bffa 100644 --- a/libmpcodecs/vf_noise.c +++ b/libmpcodecs/vf_noise.c @@ -143,26 +143,26 @@ static int8_t *initNoise(FilterParam *fp){ #ifdef HAVE_MMX static inline void lineNoise_MMX(uint8_t *dst, uint8_t *src, int8_t *noise, int len, int shift){ - int mmx_len= len&(~7); + long mmx_len= len&(~7); noise+=shift; asm volatile( - "movl %3, %%eax \n\t" + "mov %3, %%"REG_a" \n\t" "pcmpeqb %%mm7, %%mm7 \n\t" "psllw $15, %%mm7 \n\t" "packsswb %%mm7, %%mm7 \n\t" ".balign 16 \n\t" "1: \n\t" - "movq (%0, %%eax), %%mm0 \n\t" - "movq (%1, %%eax), %%mm1 \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" "pxor %%mm7, %%mm0 \n\t" "paddsb %%mm1, %%mm0 \n\t" "pxor %%mm7, %%mm0 \n\t" - "movq %%mm0, (%2, %%eax) \n\t" - "addl $8, %%eax \n\t" + "movq %%mm0, (%2, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" " js 1b \n\t" :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) - : "%eax" + : "%"REG_a ); if(mmx_len!=len) lineNoise_C(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); @@ -172,26 +172,26 @@ static inline void lineNoise_MMX(uint8_t *dst, uint8_t *src, int8_t *noise, int //duplicate of previous except movntq #ifdef HAVE_MMX2 static inline void lineNoise_MMX2(uint8_t *dst, uint8_t *src, int8_t *noise, int len, int shift){ - int mmx_len= len&(~7); + long mmx_len= len&(~7); noise+=shift; asm volatile( - "movl %3, %%eax \n\t" + "mov %3, %%"REG_a" \n\t" "pcmpeqb %%mm7, %%mm7 \n\t" "psllw $15, %%mm7 \n\t" "packsswb %%mm7, %%mm7 \n\t" ".balign 16 \n\t" "1: \n\t" - "movq (%0, %%eax), %%mm0 \n\t" - "movq (%1, %%eax), %%mm1 \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" "pxor %%mm7, %%mm0 \n\t" "paddsb %%mm1, %%mm0 \n\t" "pxor %%mm7, %%mm0 \n\t" - "movntq %%mm0, (%2, %%eax) \n\t" - "addl $8, %%eax \n\t" + "movntq %%mm0, (%2, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" " js 1b \n\t" :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) - : "%eax" + : "%"REG_a ); if(mmx_len!=len) lineNoise_C(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); @@ -214,16 +214,16 @@ static inline void lineNoise_C(uint8_t *dst, uint8_t *src, int8_t *noise, int le #ifdef HAVE_MMX static inline void lineNoiseAvg_MMX(uint8_t *dst, uint8_t *src, int len, int8_t **shift){ - int mmx_len= len&(~7); + long mmx_len= len&(~7); asm volatile( - "movl %5, %%eax \n\t" + "mov % |