diff options
author | arpi_esp <arpi_esp@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-03-04 21:01:54 +0000 |
---|---|---|
committer | arpi_esp <arpi_esp@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-03-04 21:01:54 +0000 |
commit | 156ec7764eec11e78de6b7a17cef7679a9e30a63 (patch) | |
tree | a37ef9ade32aa15f51049aed533c7e7258749c6f /libmpeg2/idct_mmx.c | |
parent | c25474941c626032141c0a142274ce6769927d86 (diff) | |
download | mpv-156ec7764eec11e78de6b7a17cef7679a9e30a63.tar.bz2 mpv-156ec7764eec11e78de6b7a17cef7679a9e30a63.tar.xz |
libmpeg2-0.2.0 merge
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@37 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libmpeg2/idct_mmx.c')
-rw-r--r-- | libmpeg2/idct_mmx.c | 57 |
1 files changed, 28 insertions, 29 deletions
diff --git a/libmpeg2/idct_mmx.c b/libmpeg2/idct_mmx.c index 03ea5d7580..6c5d2ed96e 100644 --- a/libmpeg2/idct_mmx.c +++ b/libmpeg2/idct_mmx.c @@ -1,6 +1,6 @@ /* * idct_mmx.c - * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. * @@ -37,7 +37,7 @@ #if 0 -// C row IDCT - its just here to document the MMXEXT and MMX versions +/* C row IDCT - its just here to document the MMXEXT and MMX versions */ static inline void idct_row (int16_t * row, int offset, int16_t * table, int32_t * rounder) { @@ -76,7 +76,7 @@ static inline void idct_row (int16_t * row, int offset, #endif -// MMXEXT row IDCT +/* MMXEXT row IDCT */ #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ c4, c6, c4, c6, \ @@ -155,7 +155,7 @@ static inline void mmxext_row_tail (int16_t * row, int store) movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 - // slot + /* slot */ movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 } @@ -188,7 +188,7 @@ static inline void mmxext_row_mid (int16_t * row, int store, } -// MMX row IDCT +/* MMX row IDCT */ #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ c4, c6, -c4, -c2, \ @@ -276,7 +276,7 @@ static inline void mmx_row_tail (int16_t * row, int store) por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 - // slot + /* slot */ movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 } @@ -320,10 +320,10 @@ static inline void mmx_row_mid (int16_t * row, int store, // C column IDCT - its just here to document the MMXEXT and MMX versions static inline void idct_col (int16_t * col, int offset) { -// multiplication - as implemented on mmx +/* multiplication - as implemented on mmx */ #define F(c,x) (((c) * (x)) >> 16) -// saturation - it helps us handle torture test cases +/* saturation - it helps us handle torture test cases */ #define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x)) int16_t x0, x1, x2, x3, x4, x5, x6, x7; @@ -344,25 +344,25 @@ static inline void idct_col (int16_t * col, int offset) u04 = S (x0 + x4); v04 = S (x0 - x4); - u26 = S (F (T2, x6) + x2); // -0.5 - v26 = S (F (T2, x2) - x6); // -0.5 + u26 = S (F (T2, x6) + x2); + v26 = S (F (T2, x2) - x6); a0 = S (u04 + u26); a1 = S (v04 + v26); a2 = S (v04 - v26); a3 = S (u04 - u26); - u17 = S (F (T1, x7) + x1); // -0.5 - v17 = S (F (T1, x1) - x7); // -0.5 - u35 = S (F (T3, x5) + x3); // -0.5 - v35 = S (F (T3, x3) - x5); // -0.5 + u17 = S (F (T1, x7) + x1); + v17 = S (F (T1, x1) - x7); + u35 = S (F (T3, x5) + x3); + v35 = S (F (T3, x3) - x5); b0 = S (u17 + u35); b3 = S (v17 - v35); u12 = S (u17 - u35); v12 = S (v17 + v35); - u12 = S (2 * F (C4, u12)); // -0.5 - v12 = S (2 * F (C4, v12)); // -0.5 + u12 = S (2 * F (C4, u12)); + v12 = S (2 * F (C4, v12)); b1 = S (u12 + v12); b2 = S (u12 - v12); @@ -400,7 +400,6 @@ static inline void idct_col (int16_t * col, int offset) static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; - static mmx_t scratch0, scratch1; /* column code adapted from peter gubanov */ /* http://www.elecard.com/peter/idct.shtml */ @@ -428,7 +427,7 @@ static inline void idct_col (int16_t * col, int offset) paddsw_r2r (mm2, mm1); // mm1 = u17 pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 - // slot + /* slot */ movq_r2r (mm4, mm2); // mm2 = T2 paddsw_r2r (mm3, mm5); // mm5 = T3*x3 @@ -448,7 +447,7 @@ static inline void idct_col (int16_t * col, int offset) psubsw_r2r (mm3, mm4); // mm4 = v26 paddsw_r2r (mm6, mm5); // mm5 = v12 - movq_r2m (mm0, scratch0); // save b3 + movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 movq_r2r (mm1, mm6); // mm6 = u17 paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 @@ -463,7 +462,7 @@ static inline void idct_col (int16_t * col, int offset) movq_m2r (*_C4, mm0); // mm0 = C4/2 psubsw_r2r (mm5, mm7); // mm7 = u12-v12 - movq_r2m (mm6, scratch1); // save b0 + movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 pmulhw_r2r (mm0, mm1); // mm1 = b1/2 movq_r2r (mm4, mm6); // mm6 = v26 @@ -496,7 +495,7 @@ static inline void idct_col (int16_t * col, int offset) psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 psubsw_r2r (mm1, mm6); // mm6 = a1-b1 - movq_m2r (scratch1, mm1); // mm1 = b0 + movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 psubsw_r2r (mm7, mm2); // mm2 = a2-b2 psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 @@ -508,7 +507,7 @@ static inline void idct_col (int16_t * col, int offset) movq_r2m (mm3, *(col+offset+2*8)); // save y2 paddsw_r2r (mm1, mm5); // mm5 = a0+b0 - movq_m2r (scratch0, mm4); // mm4 = b3 + movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 psubsw_r2r (mm1, mm7); // mm7 = a0-b0 psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 @@ -538,17 +537,17 @@ static int32_t rounder0[] ATTR_ALIGN(8) = rounder ((1 << (COL_SHIFT - 1)) - 0.5); static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); static int32_t rounder1[] ATTR_ALIGN(8) = - rounder (1.25683487303); // C1*(C1/C4+C1+C7)/2 + rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ static int32_t rounder7[] ATTR_ALIGN(8) = - rounder (-0.25); // C1*(C7/C4+C7-C1)/2 + rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ static int32_t rounder2[] ATTR_ALIGN(8) = - rounder (0.60355339059); // C2 * (C6+C2)/2 + rounder (0.60355339059); /* C2 * (C6+C2)/2 */ static int32_t rounder6[] ATTR_ALIGN(8) = - rounder (-0.25); // C2 * (C6-C2)/2 + rounder (-0.25); /* C2 * (C6-C2)/2 */ static int32_t rounder3[] ATTR_ALIGN(8) = - rounder (0.087788325588); // C3*(-C3/C4+C3+C5)/2 + rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ static int32_t rounder5[] ATTR_ALIGN(8) = - rounder (-0.441341716183); // C3*(-C5/C4+C5-C3)/2 + rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ @@ -693,7 +692,7 @@ void idct_mmx_init (void) extern uint8_t scan_alt[64]; int i, j; - // the mmx/mmxext idct uses a reordered input, so we patch scan tables + /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ for (i = 0; i < 64; i++) { j = scan_norm[i]; |