summaryrefslogtreecommitdiffstats
path: root/mp3lib/dct64_sse.c
diff options
context:
space:
mode:
authorzuxy <zuxy@b3059339-0415-0410-9bf9-f77b7e298cf2>2007-06-06 05:13:13 +0000
committerzuxy <zuxy@b3059339-0415-0410-9bf9-f77b7e298cf2>2007-06-06 05:13:13 +0000
commit739f79a5ff6a74fd2ea52478267b4b84f3671275 (patch)
tree16ae82eb7fb937d38a0d3f35d8153db49eff0880 /mp3lib/dct64_sse.c
parent40c9f981a2e3ebfee952ac7eef72b02cf52f7163 (diff)
downloadmpv-739f79a5ff6a74fd2ea52478267b4b84f3671275.tar.bz2
mpv-739f79a5ff6a74fd2ea52478267b4b84f3671275.tar.xz
Align output pointer so that we can use movaps instead of movups in dct64_sse;
1.5% faster decode. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@23484 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'mp3lib/dct64_sse.c')
-rw-r--r--mp3lib/dct64_sse.c20
1 files changed, 5 insertions, 15 deletions
diff --git a/mp3lib/dct64_sse.c b/mp3lib/dct64_sse.c
index 4a9b3a092e..069a1da808 100644
--- a/mp3lib/dct64_sse.c
+++ b/mp3lib/dct64_sse.c
@@ -5,17 +5,7 @@
* and mp3lib/dct64_MMX.c
*/
-/* NOTE: The following code is suboptimal! It can be improved (at least) by
-
- 1. Replace all movups by movaps. (Can Parameter c be always aligned on
- a 16-byte boundary?)
-
- 2. Rewritten using intrinsics. (GCC generally optimizes intrinsics
- better. However, when __m128 locals are involved, GCC may
- produce bad code that uses movaps to access a stack not aligned
- on a 16-byte boundary, which leads to run-time crashes.)
-
-*/
+#include <libavutil/mem.h>
typedef float real;
@@ -32,8 +22,8 @@ static const int nnnn[4] __attribute__((aligned(16))) =
void dct64_sse(short *out0,short *out1,real *c)
{
- static real __attribute__ ((aligned(16))) b1[0x20];
- static real __attribute__ ((aligned(16))) b2[0x20];
+ static DECLARE_ALIGNED(16, real, b1[0x20]);
+ static DECLARE_ALIGNED(16, real, b2[0x20]);
static real const one = 1.f;
{
@@ -45,9 +35,9 @@ void dct64_sse(short *out0,short *out1,real *c)
asm(
"movaps %2, %%xmm3\n\t"
"shufps $27, %%xmm3, %%xmm3\n\t"
- "movups %3, %%xmm1\n\t"
+ "movaps %3, %%xmm1\n\t"
"movaps %%xmm1, %%xmm4\n\t"
- "movups %4, %%xmm2\n\t"
+ "movaps %4, %%xmm2\n\t"
"shufps $27, %%xmm4, %%xmm4\n\t"
"movaps %%xmm2, %%xmm0\n\t"
"shufps $27, %%xmm0, %%xmm0\n\t"