summaryrefslogtreecommitdiffstats
path: root/libao2/fir.h
diff options
context:
space:
mode:
Diffstat (limited to 'libao2/fir.h')
-rw-r--r--libao2/fir.h144
1 files changed, 144 insertions, 0 deletions
diff --git a/libao2/fir.h b/libao2/fir.h
new file mode 100644
index 0000000000..0124d03d9e
--- /dev/null
+++ b/libao2/fir.h
@@ -0,0 +1,144 @@
+/*=============================================================================
+//
+// This file is part of mplayer.
+//
+// mplayer is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// mplayer is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with mplayer; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// Copyright 2001 Anders Johansson ajh@atri.curtin.edu.au
+//
+//=============================================================================
+*/
+
+#ifndef __FIR_H__
+#define __FIR_H__
+
+/* 4, 8 and 16 tap FIR filters implemented using SSE instructions
+ int16_t* x Input data
+ int16_t* y Output value
+ int16_t* w Filter weights
+
+ C function
+ for(int i = 0 ; i < L ; i++)
+ *y += w[i]*x[i];
+*/
+
+#ifdef HAVE_SSE
+
+// This block should be MMX only compatible, but it isn't...
+#ifdef L4
+#define LOAD_QUE(x) \
+ __asm __volatile("movq %0, %%mm2\n\t" \
+ : \
+ :"m"((x)[0]) \
+ :"memory");
+#define SAVE_QUE(x) \
+ __asm __volatile("movq %%mm2, %0\n\t" \
+ :"=m"(x[0]) \
+ : \
+ :"memory");
+#define UPDATE_QUE(in) \
+ __asm __volatile("psllq $16, %%mm2\n\t" \
+ "pinsrw $0, %0,%%mm2\n\t" \
+ : \
+ :"m" ((in)[0]) \
+ :"memory");
+#define FIR(x,w,y) \
+ __asm __volatile("movq %%mm2, %%mm0\n\t" \
+ "pmaddwd %1, %%mm0\n\t" \
+ "movq %%mm0, %%mm1\n\t" \
+ "psrlq $32, %%mm1\n\t" \
+ "paddd %%mm0, %%mm1\n\t" \
+ "movd %%mm1, %%esi\n\t" \
+ "shrl $16, %%esi\n\t" \
+ "movw %%si, %0\n\t" \
+ : "=m" ((y)[0]) \
+ : "m" ((w)[0]) \
+ : "memory", "%esi");
+#endif /* L4 */
+
+// It is possible to make the 8 bit filter a lot faster by using the
+// 128 bit registers, feel free to optimize.
+#ifdef L8
+#define LOAD_QUE(x) \
+ __asm __volatile("movq %0, %%mm5\n\t" \
+ "movq %1, %%mm4\n\t" \
+ : \
+ :"m"((x)[0]), \
+ "m"((x)[4]) \
+ :"memory");
+#define SAVE_QUE(x) \
+ __asm __volatile("movq %%mm5, %0\n\t" \
+ "movq %%mm4, %1\n\t" \
+ :"=m"((x)[0]), \
+ "=m"((x)[4]) \
+ : \
+ :"memory");
+
+// Below operation could replace line 2 to 5 in macro below but can
+// not cause of compiler bug ???
+// "pextrw $3, %%mm5,%%eax\n\t"
+#define UPDATE_QUE(in) \
+ __asm __volatile("psllq $16, %%mm4\n\t" \
+ "movq %%mm5, %%mm0\n\t" \
+ "psrlq $48, %%mm0\n\t" \
+ "movd %%mm0, %%eax\n\t" \
+ "pinsrw $0, %%eax,%%mm4\n\t" \
+ "psllq $16, %%mm5\n\t" \
+ "pinsrw $0, %0,%%mm5\n\t" \
+ : \
+ :"m" ((in)[0]) \
+ :"memory", "%eax");
+#define FIR(x,w,y) \
+ __asm __volatile("movq %%mm5, %%mm0\n\t" \
+ "pmaddwd %1, %%mm0\n\t" \
+ "movq %%mm4, %%mm1\n\t" \
+ "pmaddwd %2, %%mm1\n\t" \
+ "paddd %%mm1, %%mm0\n\t" \
+ "movq %%mm0, %%mm1\n\t" \
+ "psrlq $32, %%mm1\n\t" \
+ "paddd %%mm0, %%mm1\n\t" \
+ "movd %%mm1, %%esi\n\t" \
+ "shrl $16, %%esi\n\t" \
+ "movw %%si, %0\n\t" \
+ : "=m" ((y)[0]) \
+ : "m" ((w)[0]), \
+ "m" ((w)[4]) \
+ : "memory", "%esi");
+#endif /* L8 */
+
+#else /* HAVE_SSE */
+
+#define LOAD_QUE(x)
+#define SAVE_QUE(x)
+#define UPDATE_QUE(inm) \
+ xi=(--xi)&(L-1); \
+ x[xi]=x[xi+L]=*inm;
+
+#ifdef L4
+#define FIR(x,w,y) \
+ y[0]=(w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16;
+#else
+#define FIR(x,w,y){ \
+ int16_t a = (w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; \
+ int16_t b = (w[4]*x[4]+w[5]*x[5]+w[6]*x[6]+w[7]*x[7]) >> 16; \
+ y[0] = a+b; \
+}
+#endif /* L4 */
+
+#endif /* HAVE_SSE */
+
+#endif /* __FIR_H__ */
+
+