summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--asmalign.h7
-rwxr-xr-xconfigure5
-rw-r--r--liba52/downmix.c55
-rw-r--r--liba52/imdct.c21
-rw-r--r--libmpcodecs/pullup.c7
-rw-r--r--libmpcodecs/vf_decimate.c3
-rw-r--r--libmpcodecs/vf_divtc.c3
-rw-r--r--libmpcodecs/vf_eq.c3
-rw-r--r--libmpcodecs/vf_eq2.c3
-rw-r--r--libmpcodecs/vf_fspp.c3
-rw-r--r--libmpcodecs/vf_halfpack.c3
-rw-r--r--libmpcodecs/vf_ilpack.c7
-rw-r--r--libmpcodecs/vf_ivtc.c7
-rw-r--r--libmpcodecs/vf_noise.c7
-rw-r--r--mangle.h2
-rw-r--r--postproc/rgb2rgb_template.c35
-rw-r--r--postproc/swscale_template.c40
17 files changed, 123 insertions, 88 deletions
diff --git a/asmalign.h b/asmalign.h
new file mode 100644
index 0000000000..45a59cdaa7
--- /dev/null
+++ b/asmalign.h
@@ -0,0 +1,7 @@
+#ifdef SYS_DARWIN
+#define ASMALIGN8 ".align 3\n\t"
+#define ASMALIGN16 ".align 4\n\t"
+#else
+#define ASMALIGN8 ".balign 8\n\t"
+#define ASMALIGN16 ".balign 16\n\t"
+#endif
diff --git a/configure b/configure
index 5addbd3670..5c449aa1f6 100755
--- a/configure
+++ b/configure
@@ -6972,7 +6972,10 @@ fi
echores "$_crash_debug"
if darwin ; then
- CFLAGS="$CFLAGS -mdynamic-no-pic -falign-loops=16 -DSYS_DARWIN"
+ CFLAGS="$CFLAGS -mdynamic-no-pic -falign-loops=16 -DSYS_DARWIN -DCONFIG_DARWIN"
+ if x86 ; then
+ CFLAGS="$CFLAGS -arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk"
+ fi
if [ "$_cc_major" = 3 ] && [ "$_cc_minor" -lt 1 ]; then
CFLAGS="$CFLAGS -no-cpp-precomp"
fi
diff --git a/liba52/downmix.c b/liba52/downmix.c
index 52955c6335..67eee7a89e 100644
--- a/liba52/downmix.c
+++ b/liba52/downmix.c
@@ -28,6 +28,7 @@
*/
#include "config.h"
+#include "asmalign.h"
#include <string.h>
#include <inttypes.h>
@@ -691,7 +692,7 @@ static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
"movlps %2, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps (%0, %%"REG_S"), %%xmm0 \n\t"
"movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
@@ -714,7 +715,7 @@ static void mix3to1_SSE (sample_t * samples, sample_t bias)
"movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps (%0, %%"REG_S"), %%xmm0 \n\t"
"movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
@@ -735,7 +736,7 @@ static void mix4to1_SSE (sample_t * samples, sample_t bias)
"movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps (%0, %%"REG_S"), %%xmm0 \n\t"
"movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
@@ -757,7 +758,7 @@ static void mix5to1_SSE (sample_t * samples, sample_t bias)
"movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps (%0, %%"REG_S"), %%xmm0 \n\t"
"movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
@@ -780,7 +781,7 @@ static void mix3to2_SSE (sample_t * samples, sample_t bias)
"movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
"addps %%xmm7, %%xmm0 \n\t" //common
@@ -803,7 +804,7 @@ static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
"movlps %2, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps 1024(%1, %%"REG_S"), %%xmm0\n\t"
"addps %%xmm7, %%xmm0 \n\t" //common
@@ -826,7 +827,7 @@ static void mix21toS_SSE (sample_t * samples, sample_t bias)
"movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround
"movaps (%0, %%"REG_S"), %%xmm1 \n\t"
@@ -850,7 +851,7 @@ static void mix31to2_SSE (sample_t * samples, sample_t bias)
"movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
"addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
@@ -874,7 +875,7 @@ static void mix31toS_SSE (sample_t * samples, sample_t bias)
"movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
"movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround
@@ -900,7 +901,7 @@ static void mix22toS_SSE (sample_t * samples, sample_t bias)
"movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps 2048(%0, %%"REG_S"), %%xmm0\n\t"
"addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround
@@ -925,7 +926,7 @@ static void mix32to2_SSE (sample_t * samples, sample_t bias)
"movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
"addps %%xmm7, %%xmm0 \n\t" // common
@@ -949,7 +950,7 @@ static void mix32toS_SSE (sample_t * samples, sample_t bias)
"movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
"movaps 3072(%0, %%"REG_S"), %%xmm2\n\t"
@@ -976,7 +977,7 @@ static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
"movlps %2, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movaps (%0, %%"REG_S"), %%xmm0 \n\t"
"movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
@@ -998,7 +999,7 @@ static void zero_MMX(sample_t * samples)
asm volatile(
"mov $-1024, %%"REG_S" \n\t"
"pxor %%mm0, %%mm0 \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq %%mm0, (%0, %%"REG_S") \n\t"
"movq %%mm0, 8(%0, %%"REG_S") \n\t"
@@ -1258,7 +1259,7 @@ static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
"movd %2, %%mm7 \n\t"
"punpckldq %2, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0, %%"REG_S"), %%mm0 \n\t"
"movq 8(%0, %%"REG_S"), %%mm1 \n\t"
@@ -1289,7 +1290,7 @@ static void mix3to1_3dnow (sample_t * samples, sample_t bias)
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0, %%"REG_S"), %%mm0 \n\t"
"movq 8(%0, %%"REG_S"), %%mm1 \n\t"
@@ -1316,7 +1317,7 @@ static void mix4to1_3dnow (sample_t * samples, sample_t bias)
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0, %%"REG_S"), %%mm0 \n\t"
"movq 8(%0, %%"REG_S"), %%mm1 \n\t"
@@ -1345,7 +1346,7 @@ static void mix5to1_3dnow (sample_t * samples, sample_t bias)
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0, %%"REG_S"), %%mm0 \n\t"
"movq 8(%0, %%"REG_S"), %%mm1 \n\t"
@@ -1376,7 +1377,7 @@ static void mix3to2_3dnow (sample_t * samples, sample_t bias)
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq 1024(%0, %%"REG_S"), %%mm0\n\t"
"movq 1032(%0, %%"REG_S"), %%mm1\n\t"
@@ -1407,7 +1408,7 @@ static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
"movd %2, %%mm7 \n\t"
"punpckldq %2, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq 1024(%1, %%"REG_S"), %%mm0\n\t"
"movq 1032(%1, %%"REG_S"), %%mm1\n\t"
@@ -1438,7 +1439,7 @@ static void mix21toS_3dnow (sample_t * samples, sample_t bias)
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround
"movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround
@@ -1471,7 +1472,7 @@ static void mix31to2_3dnow (sample_t * samples, sample_t bias)
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq 1024(%0, %%"REG_S"), %%mm0\n\t"
"movq 1032(%0, %%"REG_S"), %%mm1\n\t"
@@ -1504,7 +1505,7 @@ static void mix31toS_3dnow (sample_t * samples, sample_t bias)
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq 1024(%0, %%"REG_S"), %%mm0\n\t"
"movq 1032(%0, %%"REG_S"), %%mm1\n\t"
@@ -1541,7 +1542,7 @@ static void mix22toS_3dnow (sample_t * samples, sample_t bias)
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq 2048(%0, %%"REG_S"), %%mm0\n\t"
"movq 2056(%0, %%"REG_S"), %%mm1\n\t"
@@ -1576,7 +1577,7 @@ static void mix32to2_3dnow (sample_t * samples, sample_t bias)
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq 1024(%0, %%"REG_S"), %%mm0\n\t"
"movq 1032(%0, %%"REG_S"), %%mm1\n\t"
@@ -1608,7 +1609,7 @@ static void mix32toS_3dnow (sample_t * samples, sample_t bias)
{
asm volatile(
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
@@ -1649,7 +1650,7 @@ static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
"movd %2, %%mm7 \n\t"
"punpckldq %2, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16\n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0, %%"REG_S"), %%mm0 \n\t"
"movq 8(%0, %%"REG_S"), %%mm1 \n\t"
diff --git a/liba52/imdct.c b/liba52/imdct.c
index 24505e17ab..a535823584 100644
--- a/liba52/imdct.c
+++ b/liba52/imdct.c
@@ -31,6 +31,7 @@
*/
#include "config.h"
+#include "asmalign.h"
#include <math.h>
#include <stdio.h>
@@ -792,7 +793,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t"
"mov $1008, %%"REG_D" \n\t"
"push %%"REG_BP" \n\t" //use ebp without telling gcc
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI
"movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI
@@ -851,7 +852,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"xorps %%xmm1, %%xmm1 \n\t"
"xorps %%xmm2, %%xmm2 \n\t"
"mov %0, %%"REG_S" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movlps (%%"REG_S"), %%xmm0\n\t" //buf[p]
"movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q]
@@ -872,7 +873,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
asm volatile(
"movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1
"mov %0, %%"REG_S" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3
"shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3
@@ -903,7 +904,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"xorps %%xmm5, %%xmm5 \n\t"
"xorps %%xmm2, %%xmm2 \n\t"
"mov %0, %%"REG_S" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5
"movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7
@@ -944,7 +945,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
buf_offset = buf+128;
asm volatile(
"mov %0, %%"REG_S" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"xor %%"REG_D", %%"REG_D" \n\t" // k
"lea (%%"REG_S", %3), %%"REG_d" \n\t"
@@ -976,7 +977,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
/* Post IFFT complex multiply plus IFFT complex conjugate*/
asm volatile(
"mov $-1024, %%"REG_S" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movaps (%0, %%"REG_S"), %%xmm0 \n\t"
"movaps (%0, %%"REG_S"), %%xmm1 \n\t"
@@ -1002,7 +1003,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"xor %%"REG_S", %%"REG_S" \n\t" // 0
"movss %3, %%xmm2 \n\t" // bias
"shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ...
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ?
"movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ?
@@ -1029,7 +1030,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
"xor %%"REG_S", %%"REG_S" \n\t" // 0
"movss %3, %%xmm2 \n\t" // bias
"shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ...
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A
"movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C
@@ -1056,7 +1057,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
asm volatile(
"xor %%"REG_D", %%"REG_D" \n\t" // 0
"xor %%"REG_S", %%"REG_S" \n\t" // 0
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A
"movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C
@@ -1078,7 +1079,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
asm volatile(
"mov $1024, %%"REG_D" \n\t" // 1024
"xor %%"REG_S", %%"REG_S" \n\t" // 0
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ?
"movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ?
diff --git a/libmpcodecs/pullup.c b/libmpcodecs/pullup.c
index df4069cc67..12bc5e739c 100644
--- a/libmpcodecs/pullup.c
+++ b/libmpcodecs/pullup.c
@@ -5,6 +5,7 @@
#include <string.h>
#include "pullup.h"
#include "config.h"
+#include "asmalign.h"
@@ -18,7 +19,7 @@ static int diff_y_mmx(unsigned char *a, unsigned char *b, int s)
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm7, %%mm7 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%%esi), %%mm0 \n\t"
@@ -67,7 +68,7 @@ static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s)
"pxor %%mm7, %%mm7 \n\t"
"subl %%eax, %%edi \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"2: \n\t"
"movq (%%esi), %%mm0 \n\t"
@@ -156,7 +157,7 @@ static int var_y_mmx(unsigned char *a, unsigned char *b, int s)
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm7, %%mm7 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%%esi), %%mm0 \n\t"
diff --git a/libmpcodecs/vf_decimate.c b/libmpcodecs/vf_decimate.c
index 0fc7bb9a5a..03c05a4af5 100644
--- a/libmpcodecs/vf_decimate.c
+++ b/libmpcodecs/vf_decimate.c
@@ -5,6 +5,7 @@
#include "config.h"
#include "mp_msg.h"
#include "cpudetect.h"
+#include "asmalign.h"
#include "img_format.h"
#include "mp_image.h"
@@ -28,7 +29,7 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm7, %%mm7 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%%"REG_S"), %%mm0 \n\t"
diff --git a/libmpcodecs/vf_divtc.c b/libmpcodecs/vf_divtc.c
index 849f694c09..2efd7a33be 100644
--- a/libmpcodecs/vf_divtc.c
+++ b/libmpcodecs/vf_divtc.c
@@ -8,6 +8,7 @@
#include "mp_msg.h"
#include "cpudetect.h"
#include "bswap.h"
+#include "asmalign.h"
#include "img_format.h"
#include "mp_image.h"
@@ -41,7 +42,7 @@ static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm7, %%mm7 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%%"REG_S"), %%mm0 \n\t"
diff --git a/libmpcodecs/vf_eq.c b/libmpcodecs/vf_eq.c
index 6f1bf675c1..504e91a605 100644
--- a/libmpcodecs/vf_eq.c
+++ b/libmpcodecs/vf_eq.c
@@ -6,6 +6,7 @@
#include "config.h"
#include "mp_msg.h"
#include "cpudetect.h"
+#include "asmalign.h"
#include "img_format.h"
#include "mp_image.h"
@@ -51,7 +52,7 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in
"movq (%6), %%mm4 \n\t"
"pxor %%mm0, %%mm0 \n\t"
"movl %4, %%eax\n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0), %%mm1 \n\t"
"movq (%0), %%mm2 \n\t"
diff --git a/libmpcodecs/vf_eq2.c b/libmpcodecs/vf_eq2.c
index 9bb5ed7467..faac982f75 100644
--- a/libmpcodecs/vf_eq2.c
+++ b/libmpcodecs/vf_eq2.c
@@ -18,6 +18,7 @@
#include "config.h"
#include "mp_msg.h"
#include "cpudetect.h"
+#include "asmalign.h"
#include "img_format.h"
#include "mp_image.h"
@@ -135,7 +136,7 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
"movq (%6), %%mm4 \n\t"
"pxor %%mm0, %%mm0 \n\t"
"movl %4, %%eax\n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0), %%mm1 \n\t"
"movq (%0), %%mm2 \n\t"
diff --git a/libmpcodecs/vf_fspp.c b/libmpcodecs/vf_fspp.c
index 304db8faed..006fcb0d47 100644
--- a/libmpcodecs/vf_fspp.c
+++ b/libmpcodecs/vf_fspp.c
@@ -37,6 +37,7 @@
#include <math.h>
#include "config.h"
+#include "asmalign.h"
#include "mp_msg.h"
#include "cpudetect.h"
@@ -888,7 +889,7 @@ static void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int
static void column_fidct_mmx(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt)
{
asm volatile(
- ".align 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t"
//
diff --git a/libmpcodecs/vf_halfpack.c b/libmpcodecs/vf_halfpack.c
index 99b569a8b1..7f8b87a21b 100644
--- a/libmpcodecs/vf_halfpack.c
+++ b/libmpcodecs/vf_halfpack.c
@@ -6,6 +6,7 @@
#include "config.h"
#include "mp_msg.h"
#include "cpudetect.h"
+#include "asmalign.h"
#include "img_format.h"
#include "mp_image.h"
@@ -40,7 +41,7 @@ static void halfpack_MMX(unsigned char *dst, unsigned char *src[3],
for (h/=2; h; h--) {
asm (
"pxor %%mm0, %%mm0 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0), %%mm1 \n\t"
"movq (%0), %%mm2 \n\t"
diff --git a/libmpcodecs/vf_ilpack.c b/libmpcodecs/vf_ilpack.c
index d369e5e295..7eb65c5818 100644
--- a/libmpcodecs/vf_ilpack.c
+++ b/libmpcodecs/vf_ilpack.c
@@ -6,6 +6,7 @@
#include "config.h"
#include "mp_msg.h"
#include "cpudetect.h"
+#include "asmalign.h"
#include "img_format.h"
#include "mp_image.h"
@@ -66,7 +67,7 @@ static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
{
int j;
asm volatile (""
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0), %%mm1 \n\t"
"movq (%0), %%mm2 \n\t"
@@ -105,7 +106,7 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
#endif
"pxor %%mm0, %%mm0 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
".Lli0: \n\t"
"movq (%%"REG_S"), %%mm1 \n\t"
"movq (%%"REG_S"), %%mm2 \n\t"
@@ -213,7 +214,7 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
#endif
"pxor %%mm0, %%mm0 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
".Lli1: \n\t"
"movq (%%"REG_S"), %%mm1 \n\t"
"movq (%%"REG_S"), %%mm2 \n\t"
diff --git a/libmpcodecs/vf_ivtc.c b/libmpcodecs/vf_ivtc.c
index 62bc749447..50cabe0ee1 100644
--- a/libmpcodecs/vf_ivtc.c
+++ b/libmpcodecs/vf_ivtc.c
@@ -5,6 +5,7 @@
#include "config.h"
#include "mp_msg.h"
#include "cpudetect.h"
+#include "asmalign.h"
#include "img_format.h"
#include "mp_image.h"
@@ -67,7 +68,7 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
"pxor %%mm5, %%mm5 \n\t" // 4 odd difference sums
"pxor %%mm7, %%mm7 \n\t" // all zeros
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
// Even difference
@@ -127,7 +128,7 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
"pxor %%mm5, %%mm5 \n\t" // Temporal noise
"pxor %%mm6, %%mm6 \n\t" // Current spacial noise
- ".balign 16 \n\t"
+ ASMALIGN16
"2: \n\t"
"movq (%%"REG_S"), %%mm0 \n\t"
@@ -181,7 +182,7 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
"pxor %%mm5, %%mm5 \n\t"
"pxor %%mm6, %%mm6 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"3: \n\t"
"movq (%%"REG_S"), %%mm0 \n\t"
diff --git a/libmpcodecs/vf_noise.c b/libmpcodecs/vf_noise.c
index ea762c0b0b..c3427b3fd9 100644
--- a/libmpcodecs/vf_noise.c
+++ b/libmpcodecs/vf_noise.c
@@ -25,6 +25,7 @@
#include "config.h"
#include "mp_msg.h"
#include "cpudetect.h"
+#include "asmalign.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
@@ -153,7 +154,7 @@ static inline void lineNoise_MMX(uint8_t *dst, uint8_t *src, int8_t *noise, int
"pcmpeqb %%mm7, %%mm7 \n\t"
"psllw $15, %%mm7 \n\t"
"packsswb %%mm7, %%mm7 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
@@ -182,7 +183,7 @@ static inline void lineNoise_MMX2(uint8_t *dst, uint8_t *src, int8_t *noise, int
"pcmpeqb %%mm7, %%mm7 \n\t"
"psllw $15, %%mm7 \n\t"
"packsswb %%mm7, %%mm7 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
@@ -220,7 +221,7 @@ static inline void lineNoiseAvg_MMX(uint8_t *dst, uint8_t *src, int len, int8_t
asm volatile(
"mov %5, %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
diff --git a/mangle.h b/mangle.h
index f3894cc332..7941636fba 100644
--- a/mangle.h
+++ b/mangle.h
@@ -9,7 +9,7 @@
/* Feel free to add more to the list, eg. a.out IMO */
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__OS2__) || \
- (defined(__OpenBSD__) && !defined(__ELF__))
+ (defined(__OpenBSD__) && !defined(__ELF__)) || defined(__APPLE__)
#define MANGLE(a) "_" #a
#else
#define MANGLE(a) #a
diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c
index d611debca2..d03493ca31 100644
--- a/postproc/rgb2rgb_template.c
+++ b/postproc/rgb2rgb_template.c
@@ -12,6 +12,8 @@
#include <stddef.h>
#include <inttypes.h> /* for __WORDSIZE */
+#include "asmalign.h"
+
#ifndef __WORDSIZE
// #warning You have misconfigured system and probably will lose performance!
#define __WORDSIZE MP_WORDSIZE
@@ -40,9 +42,14 @@
#define PREFETCHW "prefetcht0"
#define PAVGB "pavgb"
#else
+#ifdef __APPLE__
+#define PREFETCH "#"
+#define PREFETCHW "#"
+#elif
#define PREFETCH "/nop"
#define PREFETCHW "/nop"
#endif
+#endif
#ifdef HAVE_3DNOW
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
@@ -56,8 +63,12 @@
#define SFENCE "sfence"
#else
#define MOVNTQ "movq"
+#ifdef __APPLE__
+#define SFENCE "#"
+#elif
#define SFENCE "/nop"
#endif
+#endif
static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size)
{
@@ -332,7 +343,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_
"movq %3, %%mm5 \n\t"
"movq %4, %%mm6 \n\t"
"movq %5, %%mm7 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t"
@@ -489,7 +500,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_
"movq %3, %%mm5 \n\t"
"movq %4, %%mm6 \n\t"
"movq %5, %%mm7 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t"
@@ -1344,7 +1355,7 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long s
/* TODO: unroll this loop */
asm volatile (
"xor %%"REG_a", %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 32(%0, %%"REG_a") \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
@@ -1394,7 +1405,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
"movq "MANGLE(mask24r)", %%mm5 \n\t"
"movq "MANGLE(mask24g)", %%mm6 \n\t"
"movq "MANGLE(mask24b)", %%mm7 \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 32(%1, %%"REG_a") \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
@@ -1464,7 +1475,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"REG_a") \n\t"
@@ -1617,7 +1628,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"REG_a") \n\t"
@@ -1741,7 +1752,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"xor %%"REG_a", %%"REG_a" \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
@@ -1794,7 +1805,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
@@ -1979,7 +1990,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"xorl %%eax, %%eax \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 64(%0, %%eax, 4) \n\t"
"movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0)
@@ -2032,7 +2043,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
asm volatile(
"xorl %%eax, %%eax \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 64(%0, %%eax, 4) \n\t"
"movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0)
@@ -2110,7 +2121,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"movq "MANGLE(w1111)", %%mm5 \n\t"
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 64(%0, %%"REG_b") \n\t"
"movd (%0, %%"REG_b"), %%mm0 \n\t"
@@ -2184,7 +2195,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t"
"add %%"REG_b", %%"REG_b" \n\t"
- ".balign 16 \n\t"
+ ASMALIGN16
"1: \n\t"
PREFETCH" 64(%0, %%"REG_b") \n\t"
PREFETCH" 64(%1, %%"REG_b") \n\t"
diff --git a/postproc/swscale_template.c b/postproc/swscale_template.c
index 98828a9136..84af160903 100644
--- a/postproc/swscale_template.c
+++ b/postproc/swscale_template.c
@@ -16,6 +16,8 @@
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "asmalign.h"
+
#undef REAL_MOVNTQ
#undef MOVNTQ
#undef PAVGB
@@ -71,7 +73,7 @@
"movq %%mm3, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
- ".balign 16 \n\t" /* FIXME Unroll? */\
+ ASMALIGN16 /* FIXME Unroll? */\
"1: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
@@ -98,7 +100,7 @@
#define YSCALEYUV2YV121 \
"mov %2, %%"REG_a" \n\t"\
- ".balign 16 \n\t" /* FIXME Unroll? */\
+ ASMALIGN16 /* FIXME Unroll? */\
"1: \n\t"\
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
"movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
@@ -118,14 +120,14 @@
*/
#define YSCALEYUV2PACKEDX \
"xor %%"REG_a", %%"REG_a" \n\t"\
- ".balign 16 \n\t"\
+ ASMALIGN16\
"nop \n\t"\
"1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
"movq %%mm3, %%mm4 \n\t"\
- ".balign 16 \n\t"\
+ ASMALIGN16\
"2: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
@@ -143,7 +145,7 @@
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
"movq %%mm1, %%mm7 \n\t"\
- ".balign 16 \n\t"\
+ ASMALIGN16\
"2: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
@@ -205,7 +207,7 @@
"punpcklwd %%mm5, %%mm5 \n\t"\
"punpcklwd %%mm5, %%mm5 \n\t"\
"xor %%"REG_a", %%"REG_a" \n\t"\
- ".balign 16 \n\t"\
+ ASMALIGN16\
"1: \n\t"\
"movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\
"movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\
@@ -258,7 +260,7 @@
"movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
"movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
"xor "#index", "#index" \n\t"\
- ".balign 16 \n\t"\
+ ASMALIGN16\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
@@ -290,7 +292,7 @@
#define REAL_YSCALEYUV2RGB(index, c) \
"xor "#index", "#index" \n\t"\
- ".balign 16 \n\t"\
+ ASMALIGN16\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
@@ -356,7 +358,7 @@
#define REAL_YSCALEYUV2PACKED1(index, c) \
"xor "#index", "#index" \n\t"\
- ".balign 16 \n\t"\
+ ASMALIGN16\
"1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
"movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
@@ -371,7 +373,7 @@
#define REAL_YSCALEYUV2RGB1(index, c) \
"xor "#index", "#index" \n\t"\
- ".balign 16 \n\t"\
+ ASMALIGN16\
"1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
"movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
@@ -420,7 +422,7 @@
#define REAL_YSCALEYUV2PACKED1b(index, c) \
"xor "#index", "#index" \n\t"\
- ".balign 16 \n\t"\
+ ASMALIGN16\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\