diff options
author | anders <anders@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-12-20 15:30:22 +0000 |
---|---|---|
committer | anders <anders@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2001-12-20 15:30:22 +0000 |
commit | 30c2c12d50b1b6b3b2dda4ecc9ce40e951f57b33 (patch) | |
tree | facfd96b7b63c5d3ef24cea4c7929d890a234b38 /libao2/fir.h | |
parent | 12dd9ded9cb31217a9a43a5404e35c0b1559becf (diff) | |
download | mpv-30c2c12d50b1b6b3b2dda4ecc9ce40e951f57b33.tar.bz2 mpv-30c2c12d50b1b6b3b2dda4ecc9ce40e951f57b33.tar.xz |
Added plugin for fractional resampling (alpha code)
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@3632 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libao2/fir.h')
-rw-r--r-- | libao2/fir.h | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/libao2/fir.h b/libao2/fir.h new file mode 100644 index 0000000000..0124d03d9e --- /dev/null +++ b/libao2/fir.h @@ -0,0 +1,144 @@ +/*============================================================================= +// +// This file is part of mplayer. +// +// mplayer is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// mplayer is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with mplayer; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Copyright 2001 Anders Johansson ajh@atri.curtin.edu.au +// +//============================================================================= +*/ + +#ifndef __FIR_H__ +#define __FIR_H__ + +/* 4, 8 and 16 tap FIR filters implemented using SSE instructions + int16_t* x Input data + int16_t* y Output value + int16_t* w Filter weights + + C function + for(int i = 0 ; i < L ; i++) + *y += w[i]*x[i]; +*/ + +#ifdef HAVE_SSE + +// This block should be MMX only compatible, but it isn't... +#ifdef L4 +#define LOAD_QUE(x) \ + __asm __volatile("movq %0, %%mm2\n\t" \ + : \ + :"m"((x)[0]) \ + :"memory"); +#define SAVE_QUE(x) \ + __asm __volatile("movq %%mm2, %0\n\t" \ + :"=m"(x[0]) \ + : \ + :"memory"); +#define UPDATE_QUE(in) \ + __asm __volatile("psllq $16, %%mm2\n\t" \ + "pinsrw $0, %0,%%mm2\n\t" \ + : \ + :"m" ((in)[0]) \ + :"memory"); +#define FIR(x,w,y) \ + __asm __volatile("movq %%mm2, %%mm0\n\t" \ + "pmaddwd %1, %%mm0\n\t" \ + "movq %%mm0, %%mm1\n\t" \ + "psrlq $32, %%mm1\n\t" \ + "paddd %%mm0, %%mm1\n\t" \ + "movd %%mm1, %%esi\n\t" \ + "shrl $16, %%esi\n\t" \ + "movw %%si, %0\n\t" \ + : "=m" ((y)[0]) \ + : "m" ((w)[0]) \ + : "memory", "%esi"); +#endif /* L4 */ + +// It is possible to make the 8 bit filter a lot faster by using the +// 128 bit registers, feel free to optimize. +#ifdef L8 +#define LOAD_QUE(x) \ + __asm __volatile("movq %0, %%mm5\n\t" \ + "movq %1, %%mm4\n\t" \ + : \ + :"m"((x)[0]), \ + "m"((x)[4]) \ + :"memory"); +#define SAVE_QUE(x) \ + __asm __volatile("movq %%mm5, %0\n\t" \ + "movq %%mm4, %1\n\t" \ + :"=m"((x)[0]), \ + "=m"((x)[4]) \ + : \ + :"memory"); + +// Below operation could replace line 2 to 5 in macro below but can +// not cause of compiler bug ??? +// "pextrw $3, %%mm5,%%eax\n\t" +#define UPDATE_QUE(in) \ + __asm __volatile("psllq $16, %%mm4\n\t" \ + "movq %%mm5, %%mm0\n\t" \ + "psrlq $48, %%mm0\n\t" \ + "movd %%mm0, %%eax\n\t" \ + "pinsrw $0, %%eax,%%mm4\n\t" \ + "psllq $16, %%mm5\n\t" \ + "pinsrw $0, %0,%%mm5\n\t" \ + : \ + :"m" ((in)[0]) \ + :"memory", "%eax"); +#define FIR(x,w,y) \ + __asm __volatile("movq %%mm5, %%mm0\n\t" \ + "pmaddwd %1, %%mm0\n\t" \ + "movq %%mm4, %%mm1\n\t" \ + "pmaddwd %2, %%mm1\n\t" \ + "paddd %%mm1, %%mm0\n\t" \ + "movq %%mm0, %%mm1\n\t" \ + "psrlq $32, %%mm1\n\t" \ + "paddd %%mm0, %%mm1\n\t" \ + "movd %%mm1, %%esi\n\t" \ + "shrl $16, %%esi\n\t" \ + "movw %%si, %0\n\t" \ + : "=m" ((y)[0]) \ + : "m" ((w)[0]), \ + "m" ((w)[4]) \ + : "memory", "%esi"); +#endif /* L8 */ + +#else /* HAVE_SSE */ + +#define LOAD_QUE(x) +#define SAVE_QUE(x) +#define UPDATE_QUE(inm) \ + xi=(--xi)&(L-1); \ + x[xi]=x[xi+L]=*inm; + +#ifdef L4 +#define FIR(x,w,y) \ + y[0]=(w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; +#else +#define FIR(x,w,y){ \ + int16_t a = (w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; \ + int16_t b = (w[4]*x[4]+w[5]*x[5]+w[6]*x[6]+w[7]*x[7]) >> 16; \ + y[0] = a+b; \ +} +#endif /* L4 */ + +#endif /* HAVE_SSE */ + +#endif /* __FIR_H__ */ + + |