summaryrefslogtreecommitdiffstats
path: root/mp3lib/dct64_sse.c
diff options
context:
space:
mode:
authorUoti Urpala <uau@mplayer2.org>2011-04-02 07:02:43 +0300
committerUoti Urpala <uau@mplayer2.org>2011-04-02 07:28:53 +0300
commitf9b5f2870cd7ebb8fe70eeb65e22d11bb88d5202 (patch)
tree3d369fff359296307c05e120b4d2b53ea5974928 /mp3lib/dct64_sse.c
parentb8e1456c25de18b8ecb8922d870bfe9161900d2a (diff)
downloadmpv-f9b5f2870cd7ebb8fe70eeb65e22d11bb88d5202.tar.bz2
mpv-f9b5f2870cd7ebb8fe70eeb65e22d11bb88d5202.tar.xz
mp3lib: drop internal mp3lib tree
Delete mp3lib which has been the default mp3 decoder until now. In addition to being an unnecessary embedded library it now fails to compile correctly with the new gcc-4.6, producing noise. After the deletion the default decoder priority for mp3 will be first libmpg123 (a newer version of the code that mp3lib was based on) if available, then ffmp3float which should be available in all normal compiles. I think that some tweaking may be required as these decoder alternatives get wider testing, but any problems should be solvable and there should be no need for mp3lib.
Diffstat (limited to 'mp3lib/dct64_sse.c')
-rw-r--r--mp3lib/dct64_sse.c423
1 files changed, 0 insertions, 423 deletions
diff --git a/mp3lib/dct64_sse.c b/mp3lib/dct64_sse.c
deleted file mode 100644
index bcf3b97f5b..0000000000
--- a/mp3lib/dct64_sse.c
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * Discrete Cosine Tansform (DCT) for SSE
- * Copyright (c) 2006 Zuxy MENG <zuxy.meng@gmail.com>
- * based upon code from mp3lib/dct64.c, mp3lib/dct64_altivec.c
- * and mp3lib/dct64_mmx.c
- */
-
-#include "libavutil/mem.h"
-
-#include "mpg123.h"
-
-extern float __attribute__((aligned(16))) costab_mmx[];
-
-static const int ppnn[4] __attribute__((aligned(16))) =
-{ 0, 0, 1 << 31, 1 << 31 };
-
-static const int pnpn[4] __attribute__((aligned(16))) =
-{ 0, 1 << 31, 0, 1 << 31 };
-
-static const int nnnn[4] __attribute__((aligned(16))) =
-{ 1 << 31, 1 << 31, 1 << 31, 1 << 31 };
-
-void dct64_sse(short *out0,short *out1,real *c)
-{
- DECLARE_ALIGNED(16, real, b1[0x20]);
- DECLARE_ALIGNED(16, real, b2[0x20]);
- static real const one = 1.f;
-
- {
- real *costab = costab_mmx;
- int i;
-
- for (i = 0; i < 0x20 / 2; i += 4)
- {
- __asm__(
- "movaps %2, %%xmm3\n\t"
- "shufps $27, %%xmm3, %%xmm3\n\t"
- "movaps %3, %%xmm1\n\t"
- "movaps %%xmm1, %%xmm4\n\t"
- "movaps %4, %%xmm2\n\t"
- "shufps $27, %%xmm4, %%xmm4\n\t"
- "movaps %%xmm2, %%xmm0\n\t"
- "shufps $27, %%xmm0, %%xmm0\n\t"
- "addps %%xmm0, %%xmm1\n\t"
- "movaps %%xmm1, %0\n\t"
- "subps %%xmm2, %%xmm4\n\t"
- "mulps %%xmm3, %%xmm4\n\t"
- "movaps %%xmm4, %1\n\t"
- :"=m"(*(b1 + i)), "=m"(*(b1 + 0x1c - i))
- :"m"(*(costab + i)), "m"(*(c + i)), "m"(*(c + 0x1c - i))
- );
- }
- }
-
- {
- int i;
-
- for (i = 0; i < 0x20; i += 0x10)
- {
- __asm__(
- "movaps %4, %%xmm1\n\t"
- "movaps %5, %%xmm3\n\t"
- "movaps %6, %%xmm4\n\t"
- "movaps %7, %%xmm6\n\t"
- "movaps %%xmm1, %%xmm7\n\t"
- "shufps $27, %%xmm7, %%xmm7\n\t"
- "movaps %%xmm3, %%xmm5\n\t"
- "shufps $27, %%xmm5, %%xmm5\n\t"
- "movaps %%xmm4, %%xmm2\n\t"
- "shufps $27, %%xmm2, %%xmm2\n\t"
- "movaps %%xmm6, %%xmm0\n\t"
- "shufps $27, %%xmm0, %%xmm0\n\t"
- "addps %%xmm0, %%xmm1\n\t"
- "movaps %%xmm1, %0\n\t"
- "addps %%xmm2, %%xmm3\n\t"
- "movaps %%xmm3, %1\n\t"
- "subps %%xmm4, %%xmm5\n\t"
- "movaps %%xmm5, %2\n\t"
- "subps %%xmm6, %%xmm7\n\t"
- "movaps %%xmm7, %3\n\t"
- :"=m"(*(b2 + i)), "=m"(*(b2 + i + 4)), "=m"(*(b2 + i + 8)), "=m"(*(b2 + i + 12))
- :"m"(*(b1 + i)), "m"(*(b1 + i + 4)), "m"(*(b1 + i + 8)), "m"(*(b1 + i + 12))
- );
- }
- }
-
- {
- real *costab = costab_mmx + 16;
- __asm__(
- "movaps %4, %%xmm0\n\t"
- "movaps %5, %%xmm1\n\t"
- "movaps %8, %%xmm4\n\t"
- "xorps %%xmm6, %%xmm6\n\t"
- "shufps $27, %%xmm4, %%xmm4\n\t"
- "mulps %%xmm4, %%xmm1\n\t"
- "movaps %9, %%xmm2\n\t"
- "xorps %%xmm7, %%xmm7\n\t"
- "shufps $27, %%xmm2, %%xmm2\n\t"
- "mulps %%xmm2, %%xmm0\n\t"
- "movaps %%xmm0, %0\n\t"
- "movaps %%xmm1, %1\n\t"
- "movaps %6, %%xmm3\n\t"
- "mulps %%xmm2, %%xmm3\n\t"
- "subps %%xmm3, %%xmm6\n\t"
- "movaps %%xmm6, %2\n\t"
- "movaps %7, %%xmm5\n\t"
- "mulps %%xmm4, %%xmm5\n\t"
- "subps %%xmm5, %%xmm7\n\t"
- "movaps %%xmm7, %3\n\t"
- :"=m"(*(b2 + 8)), "=m"(*(b2 + 0xc)), "=m"(*(b2 + 0x18)), "=m"(*(b2 + 0x1c))
- :"m"(*(b2 + 8)), "m"(*(b2 + 0xc)), "m"(*(b2 + 0x18)), "m"(*(b2 + 0x1c)), "m"(*costab), "m"(*(costab + 4))
- );
- }
-
- {
- real *costab = costab_mmx + 24;
- int i;
-
- __asm__(
- "movaps %0, %%xmm0\n\t"
- "shufps $27, %%xmm0, %%xmm0\n\t"
- "movaps %1, %%xmm5\n\t"
- "movaps %%xmm5, %%xmm6\n\t"
- :
- :"m"(*costab), "m"(*nnnn)
- );
-
- for (i = 0; i < 0x20; i += 8)
- {
- __asm__(
- "movaps %2, %%xmm2\n\t"
- "movaps %3, %%xmm3\n\t"
- "movaps %%xmm2, %%xmm4\n\t"
- "xorps %%xmm5, %%xmm6\n\t"
- "shufps $27, %%xmm4, %%xmm4\n\t"
- "movaps %%xmm3, %%xmm1\n\t"
- "shufps $27, %%xmm1, %%xmm1\n\t"
- "addps %%xmm1, %%xmm2\n\t"
- "movaps %%xmm2, %0\n\t"
- "subps %%xmm3, %%xmm4\n\t"
- "xorps %%xmm6, %%xmm4\n\t"
- "mulps %%xmm0, %%xmm4\n\t"
- "movaps %%xmm4, %1\n\t"
- :"=m"(*(b1 + i)), "=m"(*(b1 + i + 4))
- :"m"(*(b2 + i)), "m"(*(b2 + i + 4))
- );
- }
- }
-
- {
- int i;
-
- __asm__(
- "movss %0, %%xmm1\n\t"
- "movss %1, %%xmm0\n\t"
- "movaps %%xmm1, %%xmm3\n\t"
- "unpcklps %%xmm0, %%xmm3\n\t"
- "movss %2, %%xmm2\n\t"
- "movaps %%xmm1, %%xmm0\n\t"
- "unpcklps %%xmm2, %%xmm0\n\t"
- "unpcklps %%xmm3, %%xmm0\n\t"
- "movaps %3, %%xmm2\n\t"
- :
- :"m"(one), "m"(costab_mmx[28]), "m"(costab_mmx[29]), "m"(*ppnn)
- );
-
- for (i = 0; i < 0x20; i += 8)
- {
- __asm__(
- "movaps %2, %%xmm3\n\t"
- "movaps %%xmm3, %%xmm4\n\t"
- "shufps $20, %%xmm4, %%xmm4\n\t"
- "shufps $235, %%xmm3, %%xmm3\n\t"
- "xorps %%xmm2, %%xmm3\n\t"
- "addps %%xmm3, %%xmm4\n\t"
- "mulps %%xmm0, %%xmm4\n\t"
- "movaps %%xmm4, %0\n\t"
- "movaps %3, %%xmm6\n\t"
- "movaps %%xmm6, %%xmm5\n\t"
- "shufps $27, %%xmm5, %%xmm5\n\t"
- "xorps %%xmm2, %%xmm5\n\t"
- "addps %%xmm5, %%xmm6\n\t"
- "mulps %%xmm0, %%xmm6\n\t"
- "movaps %%xmm6, %1\n\t"
- :"=m"(*(b2 + i)), "=m"(*(b2 + i + 4))
- :"m"(*(b1 + i)), "m"(*(b1 + i + 4))
- );
- }
- }
-
- {
- int i;
- __asm__(
- "movss %0, %%xmm0\n\t"
- "movaps %%xmm1, %%xmm2\n\t"
- "movaps %%xmm0, %%xmm7\n\t"
- "unpcklps %%xmm1, %%xmm2\n\t"
- "unpcklps %%xmm0, %%xmm7\n\t"
- "movaps %1, %%xmm0\n\t"
- "unpcklps %%xmm7, %%xmm2\n\t"
- :
- :"m"(costab_mmx[30]), "m"(*pnpn)
- );
-
- for (i = 0x8; i < 0x20; i += 8)
- {
- __asm__ volatile (
- "movaps %2, %%xmm1\n\t"
- "movaps %%xmm1, %%xmm3\n\t"
- "shufps $224, %%xmm3, %%xmm3\n\t"
- "shufps $181, %%xmm1, %%xmm1\n\t"
- "xorps %%xmm0, %%xmm1\n\t"
- "addps %%xmm1, %%xmm3\n\t"
- "mulps %%xmm2, %%xmm3\n\t"
- "movaps %%xmm3, %0\n\t"
- "movaps %3, %%xmm4\n\t"
- "movaps %%xmm4, %%xmm5\n\t"
- "shufps $224, %%xmm5, %%xmm5\n\t"
- "shufps $181, %%xmm4, %%xmm4\n\t"
- "xorps %%xmm0, %%xmm4\n\t"
- "addps %%xmm4, %%xmm5\n\t"
- "mulps %%xmm2, %%xmm5\n\t"
- "movaps %%xmm5, %1\n\t"
- :"=m"(*(b1 + i)), "=m"(*(b1 + i + 4))
- :"m"(*(b2 + i)), "m"(*(b2 + i + 4))
- :"memory"
- );
- }
- for (i = 0x8; i < 0x20; i += 8)
- {
- b1[i + 2] += b1[i + 3];
- b1[i + 6] += b1[i + 7];
- b1[i + 4] += b1[i + 6];
- b1[i + 6] += b1[i + 5];
- b1[i + 5] += b1[i + 7];
- }
- }
-
-#if 0
- /* Reference C code */
-
- /*
- Should run faster than x87 asm, given that the compiler is sane.
- However, the C code dosen't round with saturation (0x7fff for too
- large positive float, 0x8000 for too small negative float). You
- can hear the difference if you listen carefully.
- */
-
- out0[256] = (short)(b2[0] + b2[1]);
- out0[0] = (short)((b2[0] - b2[1]) * costab_mmx[30]);
- out1[128] = (short)((b2[3] - b2[2]) * costab_mmx[30]);
- out0[128] = (short)((b2[3] - b2[2]) * costab_mmx[30] + b2[3] + b2[2]);
- out1[192] = (short)((b2[7] - b2[6]) * costab_mmx[30]);
- out0[192] = (short)((b2[7] - b2[6]) * costab_mmx[30] + b2[6] + b2[7] + b2[4] + b2[5]);
- out0[64] = (short)((b2[7] - b2[6]) * costab_mmx[30] + b2[6] + b2[7] + (b2[4] - b2[5]) * costab_mmx[30]);
- out1[64] = (short)((b2[7] - b2[6]) * costab_mmx[30] + (b2[4] - b2[5]) * costab_mmx[30]);
-
- out0[224] = (short)(b1[8] + b1[12]);
- out0[160] = (short)(b1[12] + b1[10]);
- out0[96] = (short)(b1[10] + b1[14]);
- out0[32] = (short)(b1[14] + b1[9]);
- out1[32] = (short)(b1[9] + b1[13]);
- out1[96] = (short)(b1[13] + b1[11]);
- out1[224] = (short)b1[15];
- out1[160] = (short)(b1[15] + b1[11]);
- out0[240] = (short)(b1[24] + b1[28] + b1[16]);
- out0[208] = (short)(b1[24] + b1[28] + b1[20]);
- out0[176] = (short)(b1[28] + b1[26] + b1[20]);
- out0[144] = (short)(b1[28] + b1[26] + b1[18]);
- out0[112] = (short)(b1[26] + b1[30] + b1[18]);
- out0[80] = (short)(b1[26] + b1[30] + b1[22]);
- out0[48] = (short)(b1[30] + b1[25] + b1[22]);
- out0[16] = (short)(b1[30] + b1[25] + b1[17]);
- out1[16] = (short)(b1[25] + b1[29] + b1[17]);
- out1[48] = (short)(b1[25] + b1[29] + b1[21]);
- out1[80] = (short)(b1[29] + b1[27] + b1[21]);
- out1[112] = (short)(b1[29] + b1[27] + b1[19]);
- out1[144] = (short)(b1[27] + b1[31] + b1[19]);
- out1[176] = (short)(b1[27] + b1[31] + b1[23]);
- out1[240] = (short)(b1[31]);
- out1[208] = (short)(b1[31] + b1[23]);
-
-#else
- /*
- To do saturation efficiently in x86 we can use fist(p)s,
- pf2iw, or packssdw. We use fist(p)s here.
- */
- __asm__(
- "flds %0\n\t"
- "flds (%2)\n\t"
- "fadds 4(%2)\n\t"
- "fistps 512(%3)\n\t"
-
- "flds (%2)\n\t"
- "fsubs 4(%2)\n\t"
- "fmul %%st(1)\n\t"
- "fistps (%3)\n\t"
-
- "flds 12(%2)\n\t"
- "fsubs 8(%2)\n\t"
- "fmul %%st(1)\n\t"
- "fists 256(%4)\n\t"
- "fadds 12(%2)\n\t"
- "fadds 8(%2)\n\t"
- "fistps 256(%3)\n\t"
-
- "flds 16(%2)\n\t"
- "fsubs 20(%2)\n\t"
- "fmul %%st(1)\n\t"
-
- "flds 28(%2)\n\t"
- "fsubs 24(%2)\n\t"
- "fmul %%st(2)\n\t"
- "fists 384(%4)\n\t"
- "fld %%st(0)\n\t"
- "fadds 24(%2)\n\t"
- "fadds 28(%2)\n\t"
- "fld %%st(0)\n\t"
- "fadds 16(%2)\n\t"
- "fadds 20(%2)\n\t"
- "fistps 384(%3)\n\t"
- "fadd %%st(2)\n\t"
- "fistps 128(%3)\n\t"
- "faddp %%st(1)\n\t"
- "fistps 128(%4)\n\t"
-
- "flds 32(%1)\n\t"
- "fadds 48(%1)\n\t"
- "fistps 448(%3)\n\t"
-
- "flds 48(%1)\n\t"
- "fadds 40(%1)\n\t"
- "fistps 320(%3)\n\t"
-
- "flds 40(%1)\n\t"
- "fadds 56(%1)\n\t"
- "fistps 192(%3)\n\t"
-
- "flds 56(%1)\n\t"
- "fadds 36(%1)\n\t"
- "fistps 64(%3)\n\t"
-
- "flds 36(%1)\n\t"
- "fadds 52(%1)\n\t"
- "fistps 64(%4)\n\t"
-
- "flds 52(%1)\n\t"
- "fadds 44(%1)\n\t"
- "fistps 192(%4)\n\t"
-
- "flds 60(%1)\n\t"
- "fists 448(%4)\n\t"
- "fadds 44(%1)\n\t"
- "fistps 320(%4)\n\t"
-
- "flds 96(%1)\n\t"
- "fadds 112(%1)\n\t"
- "fld %%st(0)\n\t"
- "fadds 64(%1)\n\t"
- "fistps 480(%3)\n\t"
- "fadds 80(%1)\n\t"
- "fistps 416(%3)\n\t"
-
- "flds 112(%1)\n\t"
- "fadds 104(%1)\n\t"
- "fld %%st(0)\n\t"
- "fadds 80(%1)\n\t"
- "fistps 352(%3)\n\t"
- "fadds 72(%1)\n\t"
- "fistps 288(%3)\n\t"
-
- "flds 104(%1)\n\t"
- "fadds 120(%1)\n\t"
- "fld %%st(0)\n\t"
- "fadds 72(%1)\n\t"
- "fistps 224(%3)\n\t"
- "fadds 88(%1)\n\t"
- "fistps 160(%3)\n\t"
-
- "flds 120(%1)\n\t"
- "fadds 100(%1)\n\t"
- "fld %%st(0)\n\t"
- "fadds 88(%1)\n\t"
- "fistps 96(%3)\n\t"
- "fadds 68(%1)\n\t"
- "fistps 32(%3)\n\t"
-
- "flds 100(%1)\n\t"
- "fadds 116(%1)\n\t"
- "fld %%st(0)\n\t"
- "fadds 68(%1)\n\t"
- "fistps 32(%4)\n\t"
- "fadds 84(%1)\n\t"
- "fistps 96(%4)\n\t"
-
- "flds 116(%1)\n\t"
- "fadds 108(%1)\n\t"
- "fld %%st(0)\n\t"
- "fadds 84(%1)\n\t"
- "fistps 160(%4)\n\t"
- "fadds 76(%1)\n\t"
- "fistps 224(%4)\n\t"
-
- "flds 108(%1)\n\t"
- "fadds 124(%1)\n\t"
- "fld %%st(0)\n\t"
- "fadds 76(%1)\n\t"
- "fistps 288(%4)\n\t"
- "fadds 92(%1)\n\t"
- "fistps 352(%4)\n\t"
-
- "flds 124(%1)\n\t"
- "fists 480(%4)\n\t"
- "fadds 92(%1)\n\t"
- "fistps 416(%4)\n\t"
- ".byte 0xdf, 0xc0\n\t" // ffreep %%st(0)
- :
- :"m"(costab_mmx[30]), "r"(b1), "r"(b2), "r"(out0), "r"(out1)
- :"memory"
- );
-#endif
- out1[0] = out0[0];
-}