diff options
-rw-r--r-- | asmalign.h | 7 | ||||
-rwxr-xr-x | configure | 5 | ||||
-rw-r--r-- | liba52/downmix.c | 55 | ||||
-rw-r--r-- | liba52/imdct.c | 21 | ||||
-rw-r--r-- | libmpcodecs/pullup.c | 7 | ||||
-rw-r--r-- | libmpcodecs/vf_decimate.c | 3 | ||||
-rw-r--r-- | libmpcodecs/vf_divtc.c | 3 | ||||
-rw-r--r-- | libmpcodecs/vf_eq.c | 3 | ||||
-rw-r--r-- | libmpcodecs/vf_eq2.c | 3 | ||||
-rw-r--r-- | libmpcodecs/vf_fspp.c | 3 | ||||
-rw-r--r-- | libmpcodecs/vf_halfpack.c | 3 | ||||
-rw-r--r-- | libmpcodecs/vf_ilpack.c | 7 | ||||
-rw-r--r-- | libmpcodecs/vf_ivtc.c | 7 | ||||
-rw-r--r-- | libmpcodecs/vf_noise.c | 7 | ||||
-rw-r--r-- | mangle.h | 2 | ||||
-rw-r--r-- | postproc/rgb2rgb_template.c | 35 | ||||
-rw-r--r-- | postproc/swscale_template.c | 40 |
17 files changed, 123 insertions, 88 deletions
diff --git a/asmalign.h b/asmalign.h new file mode 100644 index 0000000000..45a59cdaa7 --- /dev/null +++ b/asmalign.h @@ -0,0 +1,7 @@ +#ifdef SYS_DARWIN +#define ASMALIGN8 ".align 3\n\t" +#define ASMALIGN16 ".align 4\n\t" +#else +#define ASMALIGN8 ".balign 8\n\t" +#define ASMALIGN16 ".balign 16\n\t" +#endif @@ -6972,7 +6972,10 @@ fi echores "$_crash_debug" if darwin ; then - CFLAGS="$CFLAGS -mdynamic-no-pic -falign-loops=16 -DSYS_DARWIN" + CFLAGS="$CFLAGS -mdynamic-no-pic -falign-loops=16 -DSYS_DARWIN -DCONFIG_DARWIN" + if x86 ; then + CFLAGS="$CFLAGS -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk" + fi if [ "$_cc_major" = 3 ] && [ "$_cc_minor" -lt 1 ]; then CFLAGS="$CFLAGS -no-cpp-precomp" fi diff --git a/liba52/downmix.c b/liba52/downmix.c index 52955c6335..67eee7a89e 100644 --- a/liba52/downmix.c +++ b/liba52/downmix.c @@ -28,6 +28,7 @@ */ #include "config.h" +#include "asmalign.h" #include <string.h> #include <inttypes.h> @@ -691,7 +692,7 @@ static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps (%0, %%"REG_S"), %%xmm0 \n\t" "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" @@ -714,7 +715,7 @@ static void mix3to1_SSE (sample_t * samples, sample_t bias) "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps (%0, %%"REG_S"), %%xmm0 \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" @@ -735,7 +736,7 @@ static void mix4to1_SSE (sample_t * samples, sample_t bias) "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps (%0, %%"REG_S"), %%xmm0 \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" @@ -757,7 +758,7 @@ static void mix5to1_SSE (sample_t * samples, sample_t bias) "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps (%0, %%"REG_S"), %%xmm0 \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" @@ -780,7 +781,7 @@ static void mix3to2_SSE (sample_t * samples, sample_t bias) "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" //common @@ -803,7 +804,7 @@ static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" //common @@ -826,7 +827,7 @@ static void mix21toS_SSE (sample_t * samples, sample_t bias) "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround "movaps (%0, %%"REG_S"), %%xmm1 \n\t" @@ -850,7 +851,7 @@ static void mix31to2_SSE (sample_t * samples, sample_t bias) "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" @@ -874,7 +875,7 @@ static void mix31toS_SSE (sample_t * samples, sample_t bias) "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround @@ -900,7 +901,7 @@ static void mix22toS_SSE (sample_t * samples, sample_t bias) "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround @@ -925,7 +926,7 @@ static void mix32to2_SSE (sample_t * samples, sample_t bias) "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" // common @@ -949,7 +950,7 @@ static void mix32toS_SSE (sample_t * samples, sample_t bias) "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" @@ -976,7 +977,7 @@ static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movaps (%0, %%"REG_S"), %%xmm0 \n\t" "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" @@ -998,7 +999,7 @@ static void zero_MMX(sample_t * samples) asm volatile( "mov $-1024, %%"REG_S" \n\t" "pxor %%mm0, %%mm0 \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq %%mm0, (%0, %%"REG_S") \n\t" "movq %%mm0, 8(%0, %%"REG_S") \n\t" @@ -1258,7 +1259,7 @@ static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) "movd %2, %%mm7 \n\t" "punpckldq %2, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" @@ -1289,7 +1290,7 @@ static void mix3to1_3dnow (sample_t * samples, sample_t bias) "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" @@ -1316,7 +1317,7 @@ static void mix4to1_3dnow (sample_t * samples, sample_t bias) "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" @@ -1345,7 +1346,7 @@ static void mix5to1_3dnow (sample_t * samples, sample_t bias) "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" @@ -1376,7 +1377,7 @@ static void mix3to2_3dnow (sample_t * samples, sample_t bias) "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" @@ -1407,7 +1408,7 @@ static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) "movd %2, %%mm7 \n\t" "punpckldq %2, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq 1024(%1, %%"REG_S"), %%mm0\n\t" "movq 1032(%1, %%"REG_S"), %%mm1\n\t" @@ -1438,7 +1439,7 @@ static void mix21toS_3dnow (sample_t * samples, sample_t bias) "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround @@ -1471,7 +1472,7 @@ static void mix31to2_3dnow (sample_t * samples, sample_t bias) "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" @@ -1504,7 +1505,7 @@ static void mix31toS_3dnow (sample_t * samples, sample_t bias) "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" @@ -1541,7 +1542,7 @@ static void mix22toS_3dnow (sample_t * samples, sample_t bias) "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq 2048(%0, %%"REG_S"), %%mm0\n\t" "movq 2056(%0, %%"REG_S"), %%mm1\n\t" @@ -1576,7 +1577,7 @@ static void mix32to2_3dnow (sample_t * samples, sample_t bias) "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" @@ -1608,7 +1609,7 @@ static void mix32toS_3dnow (sample_t * samples, sample_t bias) { asm volatile( "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" @@ -1649,7 +1650,7 @@ static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) "movd %2, %%mm7 \n\t" "punpckldq %2, %%mm7 \n\t" "mov $-1024, %%"REG_S" \n\t" - ".balign 16\n\t" + ASMALIGN16 "1: \n\t" "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" diff --git a/liba52/imdct.c b/liba52/imdct.c index 24505e17ab..a535823584 100644 --- a/liba52/imdct.c +++ b/liba52/imdct.c @@ -31,6 +31,7 @@ */ #include "config.h" +#include "asmalign.h" #include <math.h> #include <stdio.h> @@ -792,7 +793,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" "mov $1008, %%"REG_D" \n\t" "push %%"REG_BP" \n\t" //use ebp without telling gcc - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI @@ -851,7 +852,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "xorps %%xmm1, %%xmm1 \n\t" "xorps %%xmm2, %%xmm2 \n\t" "mov %0, %%"REG_S" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p] "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q] @@ -872,7 +873,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) asm volatile( "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 "mov %0, %%"REG_S" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 @@ -903,7 +904,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "xorps %%xmm5, %%xmm5 \n\t" "xorps %%xmm2, %%xmm2 \n\t" "mov %0, %%"REG_S" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5 "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7 @@ -944,7 +945,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) buf_offset = buf+128; asm volatile( "mov %0, %%"REG_S" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "xor %%"REG_D", %%"REG_D" \n\t" // k "lea (%%"REG_S", %3), %%"REG_d" \n\t" @@ -976,7 +977,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) /* Post IFFT complex multiply plus IFFT complex conjugate*/ asm volatile( "mov $-1024, %%"REG_S" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movaps (%0, %%"REG_S"), %%xmm0 \n\t" "movaps (%0, %%"REG_S"), %%xmm1 \n\t" @@ -1002,7 +1003,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "xor %%"REG_S", %%"REG_S" \n\t" // 0 "movss %3, %%xmm2 \n\t" // bias "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? @@ -1029,7 +1030,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "xor %%"REG_S", %%"REG_S" \n\t" // 0 "movss %3, %%xmm2 \n\t" // bias "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C @@ -1056,7 +1057,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) asm volatile( "xor %%"REG_D", %%"REG_D" \n\t" // 0 "xor %%"REG_S", %%"REG_S" \n\t" // 0 - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C @@ -1078,7 +1079,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) asm volatile( "mov $1024, %%"REG_D" \n\t" // 1024 "xor %%"REG_S", %%"REG_S" \n\t" // 0 - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? diff --git a/libmpcodecs/pullup.c b/libmpcodecs/pullup.c index df4069cc67..12bc5e739c 100644 --- a/libmpcodecs/pullup.c +++ b/libmpcodecs/pullup.c @@ -5,6 +5,7 @@ #include <string.h> #include "pullup.h" #include "config.h" +#include "asmalign.h" @@ -18,7 +19,7 @@ static int diff_y_mmx(unsigned char *a, unsigned char *b, int s) "pxor %%mm4, %%mm4 \n\t" "pxor %%mm7, %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%%esi), %%mm0 \n\t" @@ -67,7 +68,7 @@ static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s) "pxor %%mm7, %%mm7 \n\t" "subl %%eax, %%edi \n\t" - ".balign 16 \n\t" + ASMALIGN16 "2: \n\t" "movq (%%esi), %%mm0 \n\t" @@ -156,7 +157,7 @@ static int var_y_mmx(unsigned char *a, unsigned char *b, int s) "pxor %%mm4, %%mm4 \n\t" "pxor %%mm7, %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%%esi), %%mm0 \n\t" diff --git a/libmpcodecs/vf_decimate.c b/libmpcodecs/vf_decimate.c index 0fc7bb9a5a..03c05a4af5 100644 --- a/libmpcodecs/vf_decimate.c +++ b/libmpcodecs/vf_decimate.c @@ -5,6 +5,7 @@ #include "config.h" #include "mp_msg.h" #include "cpudetect.h" +#include "asmalign.h" #include "img_format.h" #include "mp_image.h" @@ -28,7 +29,7 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns) "pxor %%mm4, %%mm4 \n\t" "pxor %%mm7, %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%%"REG_S"), %%mm0 \n\t" diff --git a/libmpcodecs/vf_divtc.c b/libmpcodecs/vf_divtc.c index 849f694c09..2efd7a33be 100644 --- a/libmpcodecs/vf_divtc.c +++ b/libmpcodecs/vf_divtc.c @@ -8,6 +8,7 @@ #include "mp_msg.h" #include "cpudetect.h" #include "bswap.h" +#include "asmalign.h" #include "img_format.h" #include "mp_image.h" @@ -41,7 +42,7 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns) "pxor %%mm4, %%mm4 \n\t" "pxor %%mm7, %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%%"REG_S"), %%mm0 \n\t" diff --git a/libmpcodecs/vf_eq.c b/libmpcodecs/vf_eq.c index 6f1bf675c1..504e91a605 100644 --- a/libmpcodecs/vf_eq.c +++ b/libmpcodecs/vf_eq.c @@ -6,6 +6,7 @@ #include "config.h" #include "mp_msg.h" #include "cpudetect.h" +#include "asmalign.h" #include "img_format.h" #include "mp_image.h" @@ -51,7 +52,7 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in "movq (%6), %%mm4 \n\t" "pxor %%mm0, %%mm0 \n\t" "movl %4, %%eax\n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%0), %%mm1 \n\t" "movq (%0), %%mm2 \n\t" diff --git a/libmpcodecs/vf_eq2.c b/libmpcodecs/vf_eq2.c index 9bb5ed7467..faac982f75 100644 --- a/libmpcodecs/vf_eq2.c +++ b/libmpcodecs/vf_eq2.c @@ -18,6 +18,7 @@ #include "config.h" #include "mp_msg.h" #include "cpudetect.h" +#include "asmalign.h" #include "img_format.h" #include "mp_image.h" @@ -135,7 +136,7 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src, "movq (%6), %%mm4 \n\t" "pxor %%mm0, %%mm0 \n\t" "movl %4, %%eax\n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%0), %%mm1 \n\t" "movq (%0), %%mm2 \n\t" diff --git a/libmpcodecs/vf_fspp.c b/libmpcodecs/vf_fspp.c index 304db8faed..006fcb0d47 100644 --- a/libmpcodecs/vf_fspp.c +++ b/libmpcodecs/vf_fspp.c @@ -37,6 +37,7 @@ #include <math.h> #include "config.h" +#include "asmalign.h" #include "mp_msg.h" #include "cpudetect.h" @@ -888,7 +889,7 @@ static void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int static void column_fidct_mmx(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt) { asm volatile( - ".align 16 \n\t" + ASMALIGN16 "1: \n\t" "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t" // diff --git a/libmpcodecs/vf_halfpack.c b/libmpcodecs/vf_halfpack.c index 99b569a8b1..7f8b87a21b 100644 --- a/libmpcodecs/vf_halfpack.c +++ b/libmpcodecs/vf_halfpack.c @@ -6,6 +6,7 @@ #include "config.h" #include "mp_msg.h" #include "cpudetect.h" +#include "asmalign.h" #include "img_format.h" #include "mp_image.h" @@ -40,7 +41,7 @@ static void halfpack_MMX(unsigned char *dst, unsigned char *src[3], for (h/=2; h; h--) { asm ( "pxor %%mm0, %%mm0 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%0), %%mm1 \n\t" "movq (%0), %%mm2 \n\t" diff --git a/libmpcodecs/vf_ilpack.c b/libmpcodecs/vf_ilpack.c index d369e5e295..7eb65c5818 100644 --- a/libmpcodecs/vf_ilpack.c +++ b/libmpcodecs/vf_ilpack.c @@ -6,6 +6,7 @@ #include "config.h" #include "mp_msg.h" #include "cpudetect.h" +#include "asmalign.h" #include "img_format.h" #include "mp_image.h" @@ -66,7 +67,7 @@ static void pack_nn_MMX(unsigned char *dst, unsigned char *y, { int j; asm volatile ("" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%0), %%mm1 \n\t" "movq (%0), %%mm2 \n\t" @@ -105,7 +106,7 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, #endif "pxor %%mm0, %%mm0 \n\t" - ".balign 16 \n\t" + ASMALIGN16 ".Lli0: \n\t" "movq (%%"REG_S"), %%mm1 \n\t" "movq (%%"REG_S"), %%mm2 \n\t" @@ -213,7 +214,7 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, #endif "pxor %%mm0, %%mm0 \n\t" - ".balign 16 \n\t" + ASMALIGN16 ".Lli1: \n\t" "movq (%%"REG_S"), %%mm1 \n\t" "movq (%%"REG_S"), %%mm2 \n\t" diff --git a/libmpcodecs/vf_ivtc.c b/libmpcodecs/vf_ivtc.c index 62bc749447..50cabe0ee1 100644 --- a/libmpcodecs/vf_ivtc.c +++ b/libmpcodecs/vf_ivtc.c @@ -5,6 +5,7 @@ #include "config.h" #include "mp_msg.h" #include "cpudetect.h" +#include "asmalign.h" #include "img_format.h" #include "mp_image.h" @@ -67,7 +68,7 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char "pxor %%mm5, %%mm5 \n\t" // 4 odd difference sums "pxor %%mm7, %%mm7 \n\t" // all zeros - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" // Even difference @@ -127,7 +128,7 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char "pxor %%mm5, %%mm5 \n\t" // Temporal noise "pxor %%mm6, %%mm6 \n\t" // Current spacial noise - ".balign 16 \n\t" + ASMALIGN16 "2: \n\t" "movq (%%"REG_S"), %%mm0 \n\t" @@ -181,7 +182,7 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char "pxor %%mm5, %%mm5 \n\t" "pxor %%mm6, %%mm6 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "3: \n\t" "movq (%%"REG_S"), %%mm0 \n\t" diff --git a/libmpcodecs/vf_noise.c b/libmpcodecs/vf_noise.c index ea762c0b0b..c3427b3fd9 100644 --- a/libmpcodecs/vf_noise.c +++ b/libmpcodecs/vf_noise.c @@ -25,6 +25,7 @@ #include "config.h" #include "mp_msg.h" #include "cpudetect.h" +#include "asmalign.h" #ifdef HAVE_MALLOC_H #include <malloc.h> @@ -153,7 +154,7 @@ static inline void lineNoise_MMX(uint8_t *dst, uint8_t *src, int8_t *noise, int "pcmpeqb %%mm7, %%mm7 \n\t" "psllw $15, %%mm7 \n\t" "packsswb %%mm7, %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm1 \n\t" @@ -182,7 +183,7 @@ static inline void lineNoise_MMX2(uint8_t *dst, uint8_t *src, int8_t *noise, int "pcmpeqb %%mm7, %%mm7 \n\t" "psllw $15, %%mm7 \n\t" "packsswb %%mm7, %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm1 \n\t" @@ -220,7 +221,7 @@ static inline void lineNoiseAvg_MMX(uint8_t *dst, uint8_t *src, int len, int8_t asm volatile( "mov %5, %%"REG_a" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movq (%1, %%"REG_a"), %%mm1 \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t" @@ -9,7 +9,7 @@ /* Feel free to add more to the list, eg. a.out IMO */ #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__OS2__) || \ - (defined(__OpenBSD__) && !defined(__ELF__)) + (defined(__OpenBSD__) && !defined(__ELF__)) || defined(__APPLE__) #define MANGLE(a) "_" #a #else #define MANGLE(a) #a diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c index d611debca2..d03493ca31 100644 --- a/postproc/rgb2rgb_template.c +++ b/postproc/rgb2rgb_template.c @@ -12,6 +12,8 @@ #include <stddef.h> #include <inttypes.h> /* for __WORDSIZE */ +#include "asmalign.h" + #ifndef __WORDSIZE // #warning You have misconfigured system and probably will lose performance! #define __WORDSIZE MP_WORDSIZE @@ -40,9 +42,14 @@ #define PREFETCHW "prefetcht0" #define PAVGB "pavgb" #else +#ifdef __APPLE__ +#define PREFETCH "#" +#define PREFETCHW "#" +#elif #define PREFETCH "/nop" #define PREFETCHW "/nop" #endif +#endif #ifdef HAVE_3DNOW /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ @@ -56,8 +63,12 @@ #define SFENCE "sfence" #else #define MOVNTQ "movq" +#ifdef __APPLE__ +#define SFENCE "#" +#elif #define SFENCE "/nop" #endif +#endif static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size) { @@ -332,7 +343,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_ "movq %3, %%mm5 \n\t" "movq %4, %%mm6 \n\t" "movq %5, %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%1) \n\t" "movd (%1), %%mm0 \n\t" @@ -489,7 +500,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_ "movq %3, %%mm5 \n\t" "movq %4, %%mm6 \n\t" "movq %5, %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%1) \n\t" "movd (%1), %%mm0 \n\t" @@ -1344,7 +1355,7 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long s /* TODO: unroll this loop */ asm volatile ( "xor %%"REG_a", %%"REG_a" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%0, %%"REG_a") \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t" @@ -1394,7 +1405,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s "movq "MANGLE(mask24r)", %%mm5 \n\t" "movq "MANGLE(mask24g)", %%mm6 \n\t" "movq "MANGLE(mask24b)", %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%1, %%"REG_a") \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG @@ -1464,7 +1475,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) asm volatile( "xor %%"REG_a", %%"REG_a" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%1, %%"REG_a", 2) \n\t" PREFETCH" 32(%2, %%"REG_a") \n\t" @@ -1617,7 +1628,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) asm volatile( "xor %%"REG_a", %%"REG_a" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%1, %%"REG_a", 2) \n\t" PREFETCH" 32(%2, %%"REG_a") \n\t" @@ -1741,7 +1752,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t "xor %%"REG_a", %%"REG_a" \n\t" "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%"REG_a", 4) \n\t" "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) @@ -1794,7 +1805,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t asm volatile( "xor %%"REG_a", %%"REG_a" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%"REG_a", 4) \n\t" "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) @@ -1979,7 +1990,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t "xorl %%eax, %%eax \n\t" "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%eax, 4) \n\t" "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0) @@ -2032,7 +2043,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t asm volatile( "xorl %%eax, %%eax \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%eax, 4) \n\t" "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) @@ -2110,7 +2121,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "movq "MANGLE(w1111)", %%mm5 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%"REG_b") \n\t" "movd (%0, %%"REG_b"), %%mm0 \n\t" @@ -2184,7 +2195,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" "add %%"REG_b", %%"REG_b" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%"REG_b") \n\t" PREFETCH" 64(%1, %%"REG_b") \n\t" diff --git a/postproc/swscale_template.c b/postproc/swscale_template.c index 98828a9136..84af160903 100644 --- a/postproc/swscale_template.c +++ b/postproc/swscale_template.c @@ -16,6 +16,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "asmalign.h" + #undef REAL_MOVNTQ #undef MOVNTQ #undef PAVGB @@ -71,7 +73,7 @@ "movq %%mm3, %%mm4 \n\t"\ "lea " offset "(%0), %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ - ".balign 16 \n\t" /* FIXME Unroll? */\ + ASMALIGN16 /* FIXME Unroll? */\ "1: \n\t"\ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\ @@ -98,7 +100,7 @@ #define YSCALEYUV2YV121 \ "mov %2, %%"REG_a" \n\t"\ - ".balign 16 \n\t" /* FIXME Unroll? */\ + ASMALIGN16 /* FIXME Unroll? */\ "1: \n\t"\ "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\ @@ -118,14 +120,14 @@ */ #define YSCALEYUV2PACKEDX \ "xor %%"REG_a", %%"REG_a" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "nop \n\t"\ "1: \n\t"\ "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ "movq %%mm3, %%mm4 \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "2: \n\t"\ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ @@ -143,7 +145,7 @@ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\ "movq %%mm1, %%mm7 \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "2: \n\t"\ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\ @@ -205,7 +207,7 @@ "punpcklwd %%mm5, %%mm5 \n\t"\ "punpcklwd %%mm5, %%mm5 \n\t"\ "xor %%"REG_a", %%"REG_a" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\ "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\ @@ -258,7 +260,7 @@ "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\ "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\ "xor "#index", "#index" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ @@ -290,7 +292,7 @@ #define REAL_YSCALEYUV2RGB(index, c) \ "xor "#index", "#index" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ @@ -356,7 +358,7 @@ #define REAL_YSCALEYUV2PACKED1(index, c) \ "xor "#index", "#index" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]* |