diff options
author | henry <henry@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2004-08-02 11:26:43 +0000 |
---|---|---|
committer | henry <henry@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2004-08-02 11:26:43 +0000 |
commit | 943139cc78038c3aea0837229298cb2c08e3f8a2 (patch) | |
tree | 56b2a2dac2c09fe1016e3e146ec19cb2aae0777a /libmpeg2/motion_comp_alpha.c | |
parent | 4779094c4be9af5ec0c5145d8a460b75e4a510c8 (diff) | |
download | mpv-943139cc78038c3aea0837229298cb2c08e3f8a2.tar.bz2 mpv-943139cc78038c3aea0837229298cb2c08e3f8a2.tar.xz |
Importing libmpeg2 from mpeg2dec-0.4.0b
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@12933 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libmpeg2/motion_comp_alpha.c')
-rw-r--r-- | libmpeg2/motion_comp_alpha.c | 198 |
1 files changed, 99 insertions, 99 deletions
diff --git a/libmpeg2/motion_comp_alpha.c b/libmpeg2/motion_comp_alpha.c index 86deb33fcd..05cd550841 100644 --- a/libmpeg2/motion_comp_alpha.c +++ b/libmpeg2/motion_comp_alpha.c @@ -1,6 +1,6 @@ /* * motion_comp_alpha.c - * Copyright (C) 2002 Falk Hueffner <falk@debian.org> + * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. * See http://libmpeg2.sourceforge.net/ for updates. @@ -27,135 +27,136 @@ #include <inttypes.h> #include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" #include "alpha_asm.h" -static inline uint64_t avg2(uint64_t a, uint64_t b) +static inline uint64_t avg2 (uint64_t a, uint64_t b) { - return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); + return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1); } // Load two unaligned quadwords from addr. This macro only works if // addr is actually unaligned. -#define ULOAD16(ret_l, ret_r, addr) \ +#define ULOAD16(ret_l,ret_r,addr) \ do { \ - uint64_t _l = ldq_u(addr + 0); \ - uint64_t _m = ldq_u(addr + 8); \ - uint64_t _r = ldq_u(addr + 16); \ - ret_l = extql(_l, addr) | extqh(_m, addr); \ - ret_r = extql(_m, addr) | extqh(_r, addr); \ + uint64_t _l = ldq_u (addr + 0); \ + uint64_t _m = ldq_u (addr + 8); \ + uint64_t _r = ldq_u (addr + 16); \ + ret_l = extql (_l, addr) | extqh (_m, addr); \ + ret_r = extql (_m, addr) | extqh (_r, addr); \ } while (0) // Load two aligned quadwords from addr. -#define ALOAD16(ret_l, ret_r, addr) \ +#define ALOAD16(ret_l,ret_r,addr) \ do { \ - ret_l = ldq(addr); \ - ret_r = ldq(addr + 8); \ + ret_l = ldq (addr); \ + ret_r = ldq (addr + 8); \ } while (0) -#define OP8(LOAD, LOAD16, STORE) \ +#define OP8(LOAD,LOAD16,STORE) \ do { \ - STORE(LOAD(pixels), block); \ + STORE (LOAD (pixels), block); \ pixels += line_size; \ block += line_size; \ } while (--h) -#define OP16(LOAD, LOAD16, STORE) \ +#define OP16(LOAD,LOAD16,STORE) \ do { \ uint64_t l, r; \ - LOAD16(l, r, pixels); \ - STORE(l, block); \ - STORE(r, block + 8); \ + LOAD16 (l, r, pixels); \ + STORE (l, block); \ + STORE (r, block + 8); \ pixels += line_size; \ block += line_size; \ } while (--h) -#define OP8_X2(LOAD, LOAD16, STORE) \ +#define OP8_X2(LOAD,LOAD16,STORE) \ do { \ uint64_t p0, p1; \ \ - p0 = LOAD(pixels); \ + p0 = LOAD (pixels); \ p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \ - STORE(avg2(p0, p1), block); \ + STORE (avg2 (p0, p1), block); \ pixels += line_size; \ block += line_size; \ } while (--h) -#define OP16_X2(LOAD, LOAD16, STORE) \ +#define OP16_X2(LOAD,LOAD16,STORE) \ do { \ uint64_t p0, p1; \ \ - LOAD16(p0, p1, pixels); \ - STORE(avg2(p0, p0 >> 8 | p1 << 56), block); \ - STORE(avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \ - block + 8); \ + LOAD16 (p0, p1, pixels); \ + STORE (avg2(p0, p0 >> 8 | p1 << 56), block); \ + STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \ + block + 8); \ pixels += line_size; \ block += line_size; \ } while (--h) -#define OP8_Y2(LOAD, LOAD16, STORE) \ +#define OP8_Y2(LOAD,LOAD16,STORE) \ do { \ uint64_t p0, p1; \ - p0 = LOAD(pixels); \ + p0 = LOAD (pixels); \ pixels += line_size; \ - p1 = LOAD(pixels); \ + p1 = LOAD (pixels); \ do { \ - uint64_t av = avg2(p0, p1); \ + uint64_t av = avg2 (p0, p1); \ if (--h == 0) line_size = 0; \ pixels += line_size; \ p0 = p1; \ - p1 = LOAD(pixels); \ - STORE(av, block); \ + p1 = LOAD (pixels); \ + STORE (av, block); \ block += line_size; \ } while (h); \ } while (0) -#define OP16_Y2(LOAD, LOAD16, STORE) \ +#define OP16_Y2(LOAD,LOAD16,STORE) \ do { \ uint64_t p0l, p0r, p1l, p1r; \ - LOAD16(p0l, p0r, pixels); \ + LOAD16 (p0l, p0r, pixels); \ pixels += line_size; \ - LOAD16(p1l, p1r, pixels); \ + LOAD16 (p1l, p1r, pixels); \ do { \ uint64_t avl, avr; \ if (--h == 0) line_size = 0; \ - avl = avg2(p0l, p1l); \ - avr = avg2(p0r, p1r); \ + avl = avg2 (p0l, p1l); \ + avr = avg2 (p0r, p1r); \ p0l = p1l; \ p0r = p1r; \ pixels += line_size; \ - LOAD16(p1l, p1r, pixels); \ - STORE(avl, block); \ - STORE(avr, block + 8); \ + LOAD16 (p1l, p1r, pixels); \ + STORE (avl, block); \ + STORE (avr, block + 8); \ block += line_size; \ } while (h); \ } while (0) -#define OP8_XY2(LOAD, LOAD16, STORE) \ +#define OP8_XY2(LOAD,LOAD16,STORE) \ do { \ uint64_t pl, ph; \ - uint64_t p1 = LOAD(pixels); \ + uint64_t p1 = LOAD (pixels); \ uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \ \ - ph = ((p1 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p2 & ~BYTE_VEC(0x03)) >> 2); \ - pl = (p1 & BYTE_VEC(0x03)) \ - + (p2 & BYTE_VEC(0x03)); \ + ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ + pl = ((p1 & BYTE_VEC (0x03)) + \ + (p2 & BYTE_VEC (0x03))); \ \ do { \ uint64_t npl, nph; \ \ pixels += line_size; \ - p1 = LOAD(pixels); \ + p1 = LOAD (pixels); \ p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \ - nph = ((p1 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p2 & ~BYTE_VEC(0x03)) >> 2); \ - npl = (p1 & BYTE_VEC(0x03)) \ - + (p2 & BYTE_VEC(0x03)); \ + nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ + npl = ((p1 & BYTE_VEC (0x03)) + \ + (p2 & BYTE_VEC (0x03))); \ \ - STORE(ph + nph \ - + (((pl + npl + BYTE_VEC(0x02)) >> 2) \ - & BYTE_VEC(0x03)), block); \ + STORE (ph + nph + \ + (((pl + npl + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC (0x03)), block); \ \ block += line_size; \ pl = npl; \ @@ -163,44 +164,44 @@ static inline uint64_t avg2(uint64_t a, uint64_t b) } while (--h); \ } while (0) -#define OP16_XY2(LOAD, LOAD16, STORE) \ +#define OP16_XY2(LOAD,LOAD16,STORE) \ do { \ uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \ - LOAD16(p0, p2, pixels); \ + LOAD16 (p0, p2, pixels); \ p1 = p0 >> 8 | (p2 << 56); \ - p3 = p2 >> 8 | ((uint64_t) pixels[16] << 56); \ + p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ \ - ph_l = ((p0 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p1 & ~BYTE_VEC(0x03)) >> 2); \ - pl_l = (p0 & BYTE_VEC(0x03)) \ - + (p1 & BYTE_VEC(0x03)); \ - ph_r = ((p2 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p3 & ~BYTE_VEC(0x03)) >> 2); \ - pl_r = (p2 & BYTE_VEC(0x03)) \ - + (p3 & BYTE_VEC(0x03)); \ + ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ + pl_l = ((p0 & BYTE_VEC (0x03)) + \ + (p1 & BYTE_VEC(0x03))); \ + ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ + pl_r = ((p2 & BYTE_VEC (0x03)) + \ + (p3 & BYTE_VEC (0x03))); \ \ do { \ uint64_t npl_l, nph_l, npl_r, nph_r; \ \ pixels += line_size; \ - LOAD16(p0, p2, pixels); \ + LOAD16 (p0, p2, pixels); \ p1 = p0 >> 8 | (p2 << 56); \ - p3 = p2 >> 8 | ((uint64_t) pixels[16] << 56); \ - nph_l = ((p0 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p1 & ~BYTE_VEC(0x03)) >> 2); \ - npl_l = (p0 & BYTE_VEC(0x03)) \ - + (p1 & BYTE_VEC(0x03)); \ - nph_r = ((p2 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p3 & ~BYTE_VEC(0x03)) >> 2); \ - npl_r = (p2 & BYTE_VEC(0x03)) \ - + (p3 & BYTE_VEC(0x03)); \ + p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ + nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ + npl_l = ((p0 & BYTE_VEC (0x03)) + \ + (p1 & BYTE_VEC (0x03))); \ + nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ + npl_r = ((p2 & BYTE_VEC (0x03)) + \ + (p3 & BYTE_VEC (0x03))); \ \ - STORE(ph_l + nph_l \ - + (((pl_l + npl_l + BYTE_VEC(0x02)) >> 2) \ - & BYTE_VEC(0x03)), block); \ - STORE(ph_r + nph_r \ - + (((pl_r + npl_r + BYTE_VEC(0x02)) >> 2) \ - & BYTE_VEC(0x03)), block + 8); \ + STORE (ph_l + nph_l + \ + (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC(0x03)), block); \ + STORE (ph_r + nph_r + \ + (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC(0x03)), block + 8); \ \ block += line_size; \ pl_l = npl_l; \ @@ -210,34 +211,33 @@ static inline uint64_t avg2(uint64_t a, uint64_t b) } while (--h); \ } while (0) -#define MAKE_OP(OPNAME, SIZE, SUFF, OPKIND, STORE) \ +#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE) \ static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \ (uint8_t *restrict block, const uint8_t *restrict pixels, \ int line_size, int h) \ { \ if ((uint64_t) pixels & 0x7) { \ - OPKIND(uldq, ULOAD16, STORE); \ + OPKIND (uldq, ULOAD16, STORE); \ } else { \ - OPKIND(ldq, ALOAD16, STORE); \ + OPKIND (ldq, ALOAD16, STORE); \ } \ } -#define PIXOP(OPNAME, STORE) \ - MAKE_OP(OPNAME, 8, o, OP8, STORE); \ - MAKE_OP(OPNAME, 8, x, OP8_X2, STORE); \ - MAKE_OP(OPNAME, 8, y, OP8_Y2, STORE); \ - MAKE_OP(OPNAME, 8, xy, OP8_XY2, STORE); \ - MAKE_OP(OPNAME, 16, o, OP16, STORE); \ - MAKE_OP(OPNAME, 16, x, OP16_X2, STORE); \ - MAKE_OP(OPNAME, 16, y, OP16_Y2, STORE); \ - MAKE_OP(OPNAME, 16, xy, OP16_XY2, STORE); - -#define STORE(l, b) stq(l, b) -PIXOP(put, STORE); - +#define PIXOP(OPNAME,STORE) \ + MAKE_OP (OPNAME, 8, o, OP8, STORE); \ + MAKE_OP (OPNAME, 8, x, OP8_X2, STORE); \ + MAKE_OP (OPNAME, 8, y, OP8_Y2, STORE); \ + MAKE_OP (OPNAME, 8, xy, OP8_XY2, STORE); \ + MAKE_OP (OPNAME, 16, o, OP16, STORE); \ + MAKE_OP (OPNAME, 16, x, OP16_X2, STORE); \ + MAKE_OP (OPNAME, 16, y, OP16_Y2, STORE); \ + MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE); + +#define STORE(l,b) stq (l, b) +PIXOP (put, STORE); #undef STORE -#define STORE(l, b) stq(avg2(l, ldq(b)), b); -PIXOP(avg, STORE); +#define STORE(l,b) stq (avg2 (l, ldq (b)), b); +PIXOP (avg, STORE); mpeg2_mc_t mpeg2_mc_alpha = { { MC_put_o_16_alpha, MC_put_x_16_alpha, |