summaryrefslogtreecommitdiffstats
path: root/libswscale
diff options
context:
space:
mode:
authordiego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2>2007-04-30 08:05:29 +0000
committerdiego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2>2007-04-30 08:05:29 +0000
commit8caae487380071b53cefddd0c2ae799a7f04d33e (patch)
treed3148b286ab819f38ceebe35a6476d1d006cb8bd /libswscale
parent66be250f28b53c7c23ee344e42051449500584b0 (diff)
downloadmpv-8caae487380071b53cefddd0c2ae799a7f04d33e.tar.bz2
mpv-8caae487380071b53cefddd0c2ae799a7f04d33e.tar.xz
cosmetics attack, part IV: Remove all tabs and prettyprint/reindent the code.
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@23179 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/swscale_template.c5363
1 files changed, 2681 insertions, 2682 deletions
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 9b96754479..96d30afa0b 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -71,835 +71,835 @@
#endif
#define YSCALEYUV2YV12X(x, offset, dest, width) \
- asm volatile(\
- "xor %%"REG_a", %%"REG_a" \n\t"\
- "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
- "movq %%mm3, %%mm4 \n\t"\
- "lea " offset "(%0), %%"REG_d" \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- ASMALIGN(4) /* FIXME Unroll? */\
- "1: \n\t"\
- "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
- "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
- "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
- "add $16, %%"REG_d" \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "test %%"REG_S", %%"REG_S" \n\t"\
- "pmulhw %%mm0, %%mm2 \n\t"\
- "pmulhw %%mm0, %%mm5 \n\t"\
- "paddw %%mm2, %%mm3 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- " jnz 1b \n\t"\
- "psraw $3, %%mm3 \n\t"\
- "psraw $3, %%mm4 \n\t"\
- "packuswb %%mm4, %%mm3 \n\t"\
- MOVNTQ(%%mm3, (%1, %%REGa))\
- "add $8, %%"REG_a" \n\t"\
- "cmp %2, %%"REG_a" \n\t"\
- "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
- "movq %%mm3, %%mm4 \n\t"\
- "lea " offset "(%0), %%"REG_d" \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "jb 1b \n\t"\
- :: "r" (&c->redDither),\
- "r" (dest), "g" (width)\
- : "%"REG_a, "%"REG_d, "%"REG_S\
- );
+ asm volatile(\
+ "xor %%"REG_a", %%"REG_a" \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
+ "movq %%mm3, %%mm4 \n\t"\
+ "lea " offset "(%0), %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ ASMALIGN(4) /* FIXME Unroll? */\
+ "1: \n\t"\
+ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
+ "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
+ "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* srcData */\
+ "add $16, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ "pmulhw %%mm0, %%mm2 \n\t"\
+ "pmulhw %%mm0, %%mm5 \n\t"\
+ "paddw %%mm2, %%mm3 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ " jnz 1b \n\t"\
+ "psraw $3, %%mm3 \n\t"\
+ "psraw $3, %%mm4 \n\t"\
+ "packuswb %%mm4, %%mm3 \n\t"\
+ MOVNTQ(%%mm3, (%1, %%REGa))\
+ "add $8, %%"REG_a" \n\t"\
+ "cmp %2, %%"REG_a" \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
+ "movq %%mm3, %%mm4 \n\t"\
+ "lea " offset "(%0), %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "jb 1b \n\t"\
+ :: "r" (&c->redDither),\
+ "r" (dest), "g" (width)\
+ : "%"REG_a, "%"REG_d, "%"REG_S\
+ );
#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
- asm volatile(\
- "lea " offset "(%0), %%"REG_d" \n\t"\
- "xor %%"REG_a", %%"REG_a" \n\t"\
- "pxor %%mm4, %%mm4 \n\t"\
- "pxor %%mm5, %%mm5 \n\t"\
- "pxor %%mm6, %%mm6 \n\t"\
- "pxor %%mm7, %%mm7 \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- ASMALIGN(4) \
- "1: \n\t"\
- "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0\n\t" /* srcData */\
- "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
- "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
- "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1\n\t" /* srcData */\
- "movq %%mm0, %%mm3 \n\t"\
- "punpcklwd %%mm1, %%mm0 \n\t"\
- "punpckhwd %%mm1, %%mm3 \n\t"\
- "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
- "pmaddwd %%mm1, %%mm0 \n\t"\
- "pmaddwd %%mm1, %%mm3 \n\t"\
- "paddd %%mm0, %%mm4 \n\t"\
- "paddd %%mm3, %%mm5 \n\t"\
- "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3\n\t" /* srcData */\
- "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
- "add $16, %%"REG_d" \n\t"\
- "test %%"REG_S", %%"REG_S" \n\t"\
- "movq %%mm2, %%mm0 \n\t"\
- "punpcklwd %%mm3, %%mm2 \n\t"\
- "punpckhwd %%mm3, %%mm0 \n\t"\
- "pmaddwd %%mm1, %%mm2 \n\t"\
- "pmaddwd %%mm1, %%mm0 \n\t"\
- "paddd %%mm2, %%mm6 \n\t"\
- "paddd %%mm0, %%mm7 \n\t"\
- " jnz 1b \n\t"\
- "psrad $16, %%mm4 \n\t"\
- "psrad $16, %%mm5 \n\t"\
- "psrad $16, %%mm6 \n\t"\
- "psrad $16, %%mm7 \n\t"\
- "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
- "packssdw %%mm5, %%mm4 \n\t"\
- "packssdw %%mm7, %%mm6 \n\t"\
- "paddw %%mm0, %%mm4 \n\t"\
- "paddw %%mm0, %%mm6 \n\t"\
- "psraw $3, %%mm4 \n\t"\
- "psraw $3, %%mm6 \n\t"\
- "packuswb %%mm6, %%mm4 \n\t"\
- MOVNTQ(%%mm4, (%1, %%REGa))\
- "add $8, %%"REG_a" \n\t"\
- "cmp %2, %%"REG_a" \n\t"\
- "lea " offset "(%0), %%"REG_d" \n\t"\
- "pxor %%mm4, %%mm4 \n\t"\
- "pxor %%mm5, %%mm5 \n\t"\
- "pxor %%mm6, %%mm6 \n\t"\
- "pxor %%mm7, %%mm7 \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "jb 1b \n\t"\
- :: "r" (&c->redDither),\
- "r" (dest), "g" (width)\
- : "%"REG_a, "%"REG_d, "%"REG_S\
- );
+ asm volatile(\
+ "lea " offset "(%0), %%"REG_d" \n\t"\
+ "xor %%"REG_a", %%"REG_a" \n\t"\
+ "pxor %%mm4, %%mm4 \n\t"\
+ "pxor %%mm5, %%mm5 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ ASMALIGN(4) \
+ "1: \n\t"\
+ "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\
+ "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
+ "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
+ "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\
+ "movq %%mm0, %%mm3 \n\t"\
+ "punpcklwd %%mm1, %%mm0 \n\t"\
+ "punpckhwd %%mm1, %%mm3 \n\t"\
+ "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
+ "pmaddwd %%mm1, %%mm0 \n\t"\
+ "pmaddwd %%mm1, %%mm3 \n\t"\
+ "paddd %%mm0, %%mm4 \n\t"\
+ "paddd %%mm3, %%mm5 \n\t"\
+ "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\
+ "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
+ "add $16, %%"REG_d" \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ "movq %%mm2, %%mm0 \n\t"\
+ "punpcklwd %%mm3, %%mm2 \n\t"\
+ "punpckhwd %%mm3, %%mm0 \n\t"\
+ "pmaddwd %%mm1, %%mm2 \n\t"\
+ "pmaddwd %%mm1, %%mm0 \n\t"\
+ "paddd %%mm2, %%mm6 \n\t"\
+ "paddd %%mm0, %%mm7 \n\t"\
+ " jnz 1b \n\t"\
+ "psrad $16, %%mm4 \n\t"\
+ "psrad $16, %%mm5 \n\t"\
+ "psrad $16, %%mm6 \n\t"\
+ "psrad $16, %%mm7 \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
+ "packssdw %%mm5, %%mm4 \n\t"\
+ "packssdw %%mm7, %%mm6 \n\t"\
+ "paddw %%mm0, %%mm4 \n\t"\
+ "paddw %%mm0, %%mm6 \n\t"\
+ "psraw $3, %%mm4 \n\t"\
+ "psraw $3, %%mm6 \n\t"\
+ "packuswb %%mm6, %%mm4 \n\t"\
+ MOVNTQ(%%mm4, (%1, %%REGa))\
+ "add $8, %%"REG_a" \n\t"\
+ "cmp %2, %%"REG_a" \n\t"\
+ "lea " offset "(%0), %%"REG_d" \n\t"\
+ "pxor %%mm4, %%mm4 \n\t"\
+ "pxor %%mm5, %%mm5 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "jb 1b \n\t"\
+ :: "r" (&c->redDither),\
+ "r" (dest), "g" (width)\
+ : "%"REG_a, "%"REG_d, "%"REG_S\
+ );
#define YSCALEYUV2YV121 \
- "mov %2, %%"REG_a" \n\t"\
- ASMALIGN(4) /* FIXME Unroll? */\
- "1: \n\t"\
- "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
- "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
- "psraw $7, %%mm0 \n\t"\
- "psraw $7, %%mm1 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- MOVNTQ(%%mm0, (%1, %%REGa))\
- "add $8, %%"REG_a" \n\t"\
- "jnc 1b \n\t"
+ "mov %2, %%"REG_a" \n\t"\
+ ASMALIGN(4) /* FIXME Unroll? */\
+ "1: \n\t"\
+ "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
+ "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
+ "psraw $7, %%mm0 \n\t"\
+ "psraw $7, %%mm1 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ MOVNTQ(%%mm0, (%1, %%REGa))\
+ "add $8, %%"REG_a" \n\t"\
+ "jnc 1b \n\t"
/*
- :: "m" (-lumFilterSize), "m" (-chrFilterSize),
- "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
- "r" (dest), "m" (dstW),
- "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
- : "%eax", "%ebx", "%ecx", "%edx", "%esi"
+ :: "m" (-lumFilterSize), "m" (-chrFilterSize),
+ "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
+ "r" (dest), "m" (dstW),
+ "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
+ : "%eax", "%ebx", "%ecx", "%edx", "%esi"
*/
#define YSCALEYUV2PACKEDX \
- asm volatile(\
- "xor %%"REG_a", %%"REG_a" \n\t"\
- ASMALIGN(4)\
- "nop \n\t"\
- "1: \n\t"\
- "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
- "movq %%mm3, %%mm4 \n\t"\
- ASMALIGN(4)\
- "2: \n\t"\
- "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
- "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
- "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
- "add $16, %%"REG_d" \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "pmulhw %%mm0, %%mm2 \n\t"\
- "pmulhw %%mm0, %%mm5 \n\t"\
- "paddw %%mm2, %%mm3 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- "test %%"REG_S", %%"REG_S" \n\t"\
- " jnz 2b \n\t"\
+ asm volatile(\
+ "xor %%"REG_a", %%"REG_a" \n\t"\
+ ASMALIGN(4)\
+ "nop \n\t"\
+ "1: \n\t"\
+ "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
+ "movq %%mm3, %%mm4 \n\t"\
+ ASMALIGN(4)\
+ "2: \n\t"\
+ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
+ "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
+ "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
+ "add $16, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "pmulhw %%mm0, %%mm2 \n\t"\
+ "pmulhw %%mm0, %%mm5 \n\t"\
+ "paddw %%mm2, %%mm3 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ " jnz 2b \n\t"\
\
- "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
- "movq %%mm1, %%mm7 \n\t"\
- ASMALIGN(4)\
- "2: \n\t"\
- "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
- "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
- "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
- "add $16, %%"REG_d" \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "pmulhw %%mm0, %%mm2 \n\t"\
- "pmulhw %%mm0, %%mm5 \n\t"\
- "paddw %%mm2, %%mm1 \n\t"\
- "paddw %%mm5, %%mm7 \n\t"\
- "test %%"REG_S", %%"REG_S" \n\t"\
- " jnz 2b \n\t"\
-
-#define YSCALEYUV2PACKEDX_END\
- :: "r" (&c->redDither), \
- "m" (dummy), "m" (dummy), "m" (dummy),\
- "r" (dest), "m" (dstW)\
- : "%"REG_a, "%"REG_d, "%"REG_S\
- );
+ "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm1 \n\t"\
+ "movq %%mm1, %%mm7 \n\t"\
+ ASMALIGN(4)\
+ "2: \n\t"\
+ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
+ "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
+ "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
+ "add $16, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "pmulhw %%mm0, %%mm2 \n\t"\
+ "pmulhw %%mm0, %%mm5 \n\t"\
+ "paddw %%mm2, %%mm1 \n\t"\
+ "paddw %%mm5, %%mm7 \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ " jnz 2b \n\t"\
+
+#define YSCALEYUV2PACKEDX_END \
+ :: "r" (&c->redDither), \
+ "m" (dummy), "m" (dummy), "m" (dummy),\
+ "r" (dest), "m" (dstW) \
+ : "%"REG_a, "%"REG_d, "%"REG_S \
+ );
#define YSCALEYUV2PACKEDX_ACCURATE \
- asm volatile(\
- "xor %%"REG_a", %%"REG_a" \n\t"\
- ASMALIGN(4)\
- "nop \n\t"\
- "1: \n\t"\
- "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "pxor %%mm4, %%mm4 \n\t"\
- "pxor %%mm5, %%mm5 \n\t"\
- "pxor %%mm6, %%mm6 \n\t"\
- "pxor %%mm7, %%mm7 \n\t"\
- ASMALIGN(4)\
- "2: \n\t"\
- "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
- "movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
- "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
- "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
- "movq %%mm0, %%mm3 \n\t"\
- "punpcklwd %%mm1, %%mm0 \n\t"\
- "punpckhwd %%mm1, %%mm3 \n\t"\
- "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
- "pmaddwd %%mm1, %%mm0 \n\t"\
- "pmaddwd %%mm1, %%mm3 \n\t"\
- "paddd %%mm0, %%mm4 \n\t"\
- "paddd %%mm3, %%mm5 \n\t"\
- "movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
- "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
- "add $16, %%"REG_d" \n\t"\
- "test %%"REG_S", %%"REG_S" \n\t"\
- "movq %%mm2, %%mm0 \n\t"\
- "punpcklwd %%mm3, %%mm2 \n\t"\
- "punpckhwd %%mm3, %%mm0 \n\t"\
- "pmaddwd %%mm1, %%mm2 \n\t"\
- "pmaddwd %%mm1, %%mm0 \n\t"\
- "paddd %%mm2, %%mm6 \n\t"\
- "paddd %%mm0, %%mm7 \n\t"\
- " jnz 2b \n\t"\
- "psrad $16, %%mm4 \n\t"\
- "psrad $16, %%mm5 \n\t"\
- "psrad $16, %%mm6 \n\t"\
- "psrad $16, %%mm7 \n\t"\
- "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
- "packssdw %%mm5, %%mm4 \n\t"\
- "packssdw %%mm7, %%mm6 \n\t"\
- "paddw %%mm0, %%mm4 \n\t"\
- "paddw %%mm0, %%mm6 \n\t"\
- "movq %%mm4, "U_TEMP"(%0) \n\t"\
- "movq %%mm6, "V_TEMP"(%0) \n\t"\
+ asm volatile(\
+ "xor %%"REG_a", %%"REG_a" \n\t"\
+ ASMALIGN(4)\
+ "nop \n\t"\
+ "1: \n\t"\
+ "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "pxor %%mm4, %%mm4 \n\t"\
+ "pxor %%mm5, %%mm5 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "pxor %%mm7, %%mm7 \n\t"\
+ ASMALIGN(4)\
+ "2: \n\t"\
+ "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
+ "movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
+ "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
+ "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
+ "movq %%mm0, %%mm3 \n\t"\
+ "punpcklwd %%mm1, %%mm0 \n\t"\
+ "punpckhwd %%mm1, %%mm3 \n\t"\
+ "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
+ "pmaddwd %%mm1, %%mm0 \n\t"\
+ "pmaddwd %%mm1, %%mm3 \n\t"\
+ "paddd %%mm0, %%mm4 \n\t"\
+ "paddd %%mm3, %%mm5 \n\t"\
+ "movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
+ "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
+ "add $16, %%"REG_d" \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ "movq %%mm2, %%mm0 \n\t"\
+ "punpcklwd %%mm3, %%mm2 \n\t"\
+ "punpckhwd %%mm3, %%mm0 \n\t"\
+ "pmaddwd %%mm1, %%mm2 \n\t"\
+ "pmaddwd %%mm1, %%mm0 \n\t"\
+ "paddd %%mm2, %%mm6 \n\t"\
+ "paddd %%mm0, %%mm7 \n\t"\
+ " jnz 2b \n\t"\
+ "psrad $16, %%mm4 \n\t"\
+ "psrad $16, %%mm5 \n\t"\
+ "psrad $16, %%mm6 \n\t"\
+ "psrad $16, %%mm7 \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
+ "packssdw %%mm5, %%mm4 \n\t"\
+ "packssdw %%mm7, %%mm6 \n\t"\
+ "paddw %%mm0, %%mm4 \n\t"\
+ "paddw %%mm0, %%mm6 \n\t"\
+ "movq %%mm4, "U_TEMP"(%0) \n\t"\
+ "movq %%mm6, "V_TEMP"(%0) \n\t"\
\
- "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "pxor %%mm1, %%mm1 \n\t"\
- "pxor %%mm5, %%mm5 \n\t"\
- "pxor %%mm7, %%mm7 \n\t"\
- "pxor %%mm6, %%mm6 \n\t"\
- ASMALIGN(4)\
- "2: \n\t"\
- "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
- "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
- "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
- "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
- "movq %%mm0, %%mm3 \n\t"\
- "punpcklwd %%mm4, %%mm0 \n\t"\
- "punpckhwd %%mm4, %%mm3 \n\t"\
- "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
- "pmaddwd %%mm4, %%mm0 \n\t"\
- "pmaddwd %%mm4, %%mm3 \n\t"\
- "paddd %%mm0, %%mm1 \n\t"\
- "paddd %%mm3, %%mm5 \n\t"\
- "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
- "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
- "add $16, %%"REG_d" \n\t"\
- "test %%"REG_S", %%"REG_S" \n\t"\
- "movq %%mm2, %%mm0 \n\t"\
- "punpcklwd %%mm3, %%mm2 \n\t"\
- "punpckhwd %%mm3, %%mm0 \n\t"\
- "pmaddwd %%mm4, %%mm2 \n\t"\
- "pmaddwd %%mm4, %%mm0 \n\t"\
- "paddd %%mm2, %%mm7 \n\t"\
- "paddd %%mm0, %%mm6 \n\t"\
- " jnz 2b \n\t"\
- "psrad $16, %%mm1 \n\t"\
- "psrad $16, %%mm5 \n\t"\
- "psrad $16, %%mm7 \n\t"\
- "psrad $16, %%mm6 \n\t"\
- "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
- "packssdw %%mm5, %%mm1 \n\t"\
- "packssdw %%mm6, %%mm7 \n\t"\
- "paddw %%mm0, %%mm1 \n\t"\
- "paddw %%mm0, %%mm7 \n\t"\
- "movq "U_TEMP"(%0), %%mm3 \n\t"\
- "movq "V_TEMP"(%0), %%mm4 \n\t"\
+ "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "pxor %%mm1, %%mm1 \n\t"\
+ "pxor %%mm5, %%mm5 \n\t"\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ ASMALIGN(4)\
+ "2: \n\t"\
+ "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
+ "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
+ "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
+ "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
+ "movq %%mm0, %%mm3 \n\t"\
+ "punpcklwd %%mm4, %%mm0 \n\t"\
+ "punpckhwd %%mm4, %%mm3 \n\t"\
+ "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
+ "pmaddwd %%mm4, %%mm0 \n\t"\
+ "pmaddwd %%mm4, %%mm3 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t"\
+ "paddd %%mm3, %%mm5 \n\t"\
+ "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
+ "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
+ "add $16, %%"REG_d" \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ "movq %%mm2, %%mm0 \n\t"\
+ "punpcklwd %%mm3, %%mm2 \n\t"\
+ "punpckhwd %%mm3, %%mm0 \n\t"\
+ "pmaddwd %%mm4, %%mm2 \n\t"\
+ "pmaddwd %%mm4, %%mm0 \n\t"\
+ "paddd %%mm2, %%mm7 \n\t"\
+ "paddd %%mm0, %%mm6 \n\t"\
+ " jnz 2b \n\t"\
+ "psrad $16, %%mm1 \n\t"\
+ "psrad $16, %%mm5 \n\t"\
+ "psrad $16, %%mm7 \n\t"\
+ "psrad $16, %%mm6 \n\t"\
+ "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
+ "packssdw %%mm5, %%mm1 \n\t"\
+ "packssdw %%mm6, %%mm7 \n\t"\
+ "paddw %%mm0, %%mm1 \n\t"\
+ "paddw %%mm0, %%mm7 \n\t"\
+ "movq "U_TEMP"(%0), %%mm3 \n\t"\
+ "movq "V_TEMP"(%0), %%mm4 \n\t"\
#define YSCALEYUV2RGBX \
- "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
- "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
- "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
- "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
- "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
- "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
- /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
- "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
- "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
- "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
- "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
- "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
- "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
- /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
- "paddw %%mm3, %%mm4 \n\t"\
- "movq %%mm2, %%mm0 \n\t"\
- "movq %%mm5, %%mm6 \n\t"\
- "movq %%mm4, %%mm3 \n\t"\
- "punpcklwd %%mm2, %%mm2 \n\t"\
- "punpcklwd %%mm5, %%mm5 \n\t"\
- "punpcklwd %%mm4, %%mm4 \n\t"\
- "paddw %%mm1, %%mm2 \n\t"\
- "paddw %%mm1, %%mm5 \n\t"\
- "paddw %%mm1, %%mm4 \n\t"\
- "punpckhwd %%mm0, %%mm0 \n\t"\
- "punpckhwd %%mm6, %%mm6 \n\t"\
- "punpckhwd %%mm3, %%mm3 \n\t"\
- "paddw %%mm7, %%mm0 \n\t"\
- "paddw %%mm7, %%mm6 \n\t"\
- "paddw %%mm7, %%mm3 \n\t"\
- /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
- "packuswb %%mm0, %%mm2 \n\t"\
- "packuswb %%mm6, %%mm5 \n\t"\
- "packuswb %%mm3, %%mm4 \n\t"\
- "pxor %%mm7, %%mm7 \n\t"
+ "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
+ "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
+ "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
+ "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
+ "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
+ "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
+/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+ "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
+ "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
+ "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
+ "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
+ "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
+ "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
+/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+ "paddw %%mm3, %%mm4 \n\t"\
+ "movq %%mm2, %%mm0 \n\t"\
+ "movq %%mm5, %%mm6 \n\t"\
+ "movq %%mm4, %%mm3 \n\t"\
+ "punpcklwd %%mm2, %%mm2 \n\t"\
+ "punpcklwd %%mm5, %%mm5 \n\t"\
+ "punpcklwd %%mm4, %%mm4 \n\t"\
+ "paddw %%mm1, %%mm2 \n\t"\
+ "paddw %%mm1, %%mm5 \n\t"\
+ "paddw %%mm1, %%mm4 \n\t"\
+ "punpckhwd %%mm0, %%mm0 \n\t"\
+ "punpckhwd %%mm6, %%mm6 \n\t"\
+ "punpckhwd %%mm3, %%mm3 \n\t"\
+ "paddw %%mm7, %%mm0 \n\t"\
+ "paddw %%mm7, %%mm6 \n\t"\
+ "paddw %%mm7, %%mm3 \n\t"\
+ /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+ "packuswb %%mm0, %%mm2 \n\t"\
+ "packuswb %%mm6, %%mm5 \n\t"\
+ "packuswb %%mm3, %%mm4 \n\t"\
+ "pxor %%mm7, %%mm7 \n\t"
#if 0
#define FULL_YSCALEYUV2RGB \
- "pxor %%mm7, %%mm7 \n\t"\
- "movd %6, %%mm6 \n\t" /*yalpha1*/\
- "punpcklwd %%mm6, %%mm6 \n\t"\
- "punpcklwd %%mm6, %%mm6 \n\t"\
- "movd %7, %%mm5 \n\t" /*uvalpha1*/\
- "punpcklwd %%mm5, %%mm5 \n\t"\
- "punpcklwd %%mm5, %%mm5 \n\t"\
- "xor %%"REG_a", %%"REG_a" \n\t"\
- ASMALIGN(4)\
- "1: \n\t"\
- "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\
- "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\
- "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\
- "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\
- "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
- "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
- "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
- "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
- "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
- "movq 4096(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
- "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
- "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
- "movq 4096(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
- "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
- "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
- "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
- "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
- "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd %6, %%mm6 \n\t" /*yalpha1*/\
+ "punpcklwd %%mm6, %%mm6 \n\t"\
+ "punpcklwd %%mm6, %%mm6 \n\t"\
+ "movd %7, %%mm5 \n\t" /*uvalpha1*/\
+ "punpcklwd %%mm5, %%mm5 \n\t"\
+ "punpcklwd %%mm5, %%mm5 \n\t"\
+ "xor %%"REG_a", %%"REG_a" \n\t"\
+ ASMALIGN(4)\
+ "1: \n\t"\
+ "movq (%0, %%"REG_a",2), %%mm0 \n\t" /*buf0[eax]*/\
+ "movq (%1, %%"REG_a",2), %%mm1 \n\t" /*buf1[eax]*/\
+ "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\
+ "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\
+ "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
+ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+ "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+ "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+ "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+ "movq 4096(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
+ "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+ "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+ "movq 4096(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
+ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+ "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+ "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
+ "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
+ "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
\
\
- "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
- "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
- "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
- "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
- "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
- "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
- "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
+ "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+ "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
+ "pmulhw "MANGLE(ubCoeff)", %%mm3 \n\t"\
+ "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+ "pmulhw "MANGLE(ugCoeff)", %%mm2 \n\t"\
+ "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+ "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
\
\
- "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
- "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
- "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
- "paddw %%mm1, %%mm3 \n\t" /* B*/\
- "paddw %%mm1, %%mm0 \n\t" /* R*/\
- "packuswb %%mm3, %%mm3 \n\t"\
+ "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
+ "pmulhw "MANGLE(vrCoeff)", %%mm0 \n\t"\
+ "pmulhw "MANGLE(vgCoeff)", %%mm4 \n\t"\
+ "paddw %%mm1, %%mm3 \n\t" /* B*/\
+ "paddw %%mm1, %%mm0 \n\t" /* R*/\
+ "packuswb %%mm3, %%mm3 \n\t"\
\
- "packuswb %%mm0, %%mm0 \n\t"\
- "paddw %%mm4, %%mm2 \n\t"\
- "paddw %%mm2, %%mm1 \n\t" /* G*/\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ "paddw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm2, %%mm1 \n\t" /* G*/\
\
- "packuswb %%mm1, %%mm1 \n\t"
+ "packuswb %%mm1, %%mm1 \n\t"
#endif
#define REAL_YSCALEYUV2PACKED(index, c) \
- "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
- "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\
- "psraw $3, %%mm0 \n\t"\
- "psraw $3, %%mm1 \n\t"\
- "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
- "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
- "xor "#index", "#index" \n\t"\
- ASMALIGN(4)\
- "1: \n\t"\
- "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
- "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
- "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
- "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
- "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
- "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
- "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
- "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
- "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
- "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
- "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
- "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
- "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
- "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
- "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
- "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
- "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
- "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
- "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
- "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
- "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
- "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
- "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
- "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
- "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
+ "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
+ "psraw $3, %%mm0 \n\t"\
+ "psraw $3, %%mm1 \n\t"\
+ "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
+ "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
+ "xor "#index", "#index" \n\t"\
+ ASMALIGN(4)\
+ "1: \n\t"\
+ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
+ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
+ "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
+ "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
+ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
+ "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+ "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+ "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+ "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+ "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
+ "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
+ "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
+ "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
+ "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
+ "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
+ "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+ "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+ "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+ "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+ "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+ "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
#define REAL_YSCALEYUV2RGB(index, c) \
- "xor "#index", "#index" \n\t"\
- ASMALIGN(4)\
- "1: \n\t"\
- "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
- "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
- "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\
- "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\
- "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
- "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
- "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
- "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
- "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
- "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
- "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\