From 978c569da230236ce3d4d7cc4507007ea561f243 Mon Sep 17 00:00:00 2001 From: nick Date: Tue, 3 Jul 2001 09:25:16 +0000 Subject: Partial loops unrolling git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1260 b3059339-0415-0410-9bf9-f77b7e298cf2 --- mp3lib/decode_MMX.s | 128 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 115 insertions(+), 13 deletions(-) (limited to 'mp3lib') diff --git a/mp3lib/decode_MMX.s b/mp3lib/decode_MMX.s index 41c8d34c4d..d54a34bc93 100644 --- a/mp3lib/decode_MMX.s +++ b/mp3lib/decode_MMX.s @@ -3,9 +3,15 @@ # See ChangeLog of mpg123-0.59s-pre.1 for detail # Applied to mplayer by Nick Kurshev # -# TODO: Partial loops unrolling and removing MOVW insn. +# Local ChangeLog: +# - Partial loops unrolling and removing MOVW insn from loops # +.data +.align 8 +null_one: .long 0x0000ffff, 0x0000ffff +one_null: .long 0xffff0000, 0xffff0000 + .text .globl synth_1to1_MMX_s @@ -49,64 +55,160 @@ synth_1to1_MMX_s: addl $12,%esp leal 1(%ebx), %ecx subl %ebp,%ebx - + pushl %ecx leal decwins(%ebx,%ebx,1), %edx + shrl $1, %ecx +.align 16 .L3: movq (%edx),%mm0 + movq 64(%edx),%mm4 pmaddwd (%esi),%mm0 + pmaddwd 32(%esi),%mm4 movq 8(%edx),%mm1 + movq 72(%edx),%mm5 pmaddwd 8(%esi),%mm1 + pmaddwd 40(%esi),%mm5 movq 16(%edx),%mm2 + movq 80(%edx),%mm6 pmaddwd 16(%esi),%mm2 + pmaddwd 48(%esi),%mm6 movq 24(%edx),%mm3 + movq 88(%edx),%mm7 pmaddwd 24(%esi),%mm3 + pmaddwd 56(%esi),%mm7 paddd %mm1,%mm0 + paddd %mm5,%mm4 paddd %mm2,%mm0 + paddd %mm6,%mm4 paddd %mm3,%mm0 + paddd %mm7,%mm4 movq %mm0,%mm1 + movq %mm4,%mm5 psrlq $32,%mm1 + psrlq $32,%mm5 paddd %mm1,%mm0 + paddd %mm5,%mm4 psrad $13,%mm0 + psrad $13,%mm4 packssdw %mm0,%mm0 - movd %mm0,%eax - movw %ax, (%edi) + packssdw %mm4,%mm4 + + movq (%edi), %mm1 + punpckldq %mm4, %mm0 + pand one_null, %mm1 + pand null_one, %mm0 + por %mm0, %mm1 + movq %mm1,(%edi) + + leal 64(%esi),%esi + leal 128(%edx),%edx + leal 8(%edi),%edi - leal 32(%esi),%esi - leal 64(%edx),%edx - leal 4(%edi),%edi decl %ecx jnz .L3 + popl %ecx + andl $1, %ecx + jecxz .next_loop + movq (%edx),%mm0 + pmaddwd (%esi),%mm0 + movq 8(%edx),%mm1 + pmaddwd 8(%esi),%mm1 + movq 16(%edx),%mm2 + pmaddwd 16(%esi),%mm2 + movq 24(%edx),%mm3 + pmaddwd 24(%esi),%mm3 + paddd %mm1,%mm0 + paddd %mm2,%mm0 + paddd %mm3,%mm0 + movq %mm0,%mm1 + psrlq $32,%mm1 + paddd %mm1,%mm0 + psrad $13,%mm0 + packssdw %mm0,%mm0 + movd %mm0,%eax + movw %ax, (%edi) + leal 32(%esi),%esi + leal 64(%edx),%edx + leal 4(%edi),%edi + +.next_loop: subl $64,%esi - movl $15,%ecx + movl $7,%ecx +.align 16 .L4: movq (%edx),%mm0 + movq 64(%edx),%mm4 pmaddwd (%esi),%mm0 + pmaddwd -32(%esi),%mm4 movq 8(%edx),%mm1 + movq 72(%edx),%mm5 pmaddwd 8(%esi),%mm1 + pmaddwd -24(%esi),%mm5 movq 16(%edx),%mm2 + movq 80(%edx),%mm6 pmaddwd 16(%esi),%mm2 + pmaddwd -16(%esi),%mm6 movq 24(%edx),%mm3 + movq 88(%edx),%mm7 pmaddwd 24(%esi),%mm3 + pmaddwd -8(%esi),%mm7 paddd %mm1,%mm0 + paddd %mm5,%mm4 paddd %mm2,%mm0 + paddd %mm6,%mm4 paddd %mm3,%mm0 + paddd %mm7,%mm4 movq %mm0,%mm1 + movq %mm4,%mm5 psrlq $32,%mm1 + psrlq $32,%mm5 paddd %mm0,%mm1 + paddd %mm4,%mm5 psrad $13,%mm1 + psrad $13,%mm5 packssdw %mm1,%mm1 + packssdw %mm5,%mm5 psubd %mm0,%mm0 + psubd %mm4,%mm4 psubsw %mm1,%mm0 - movd %mm0,%eax - movw %ax,(%edi) + psubsw %mm5,%mm4 - subl $32,%esi - addl $64,%edx - leal 4(%edi),%edi + movq (%edi), %mm1 + punpckldq %mm4, %mm0 + pand one_null, %mm1 + pand null_one, %mm0 + por %mm0, %mm1 + movq %mm1,(%edi) + + subl $64,%esi + addl $128,%edx + leal 8(%edi),%edi decl %ecx jnz .L4 + + movq (%edx),%mm0 + pmaddwd (%esi),%mm0 + movq 8(%edx),%mm1 + pmaddwd 8(%esi),%mm1 + movq 16(%edx),%mm2 + pmaddwd 16(%esi),%mm2 + movq 24(%edx),%mm3 + pmaddwd 24(%esi),%mm3 + paddd %mm1,%mm0 + paddd %mm2,%mm0 + paddd %mm3,%mm0 + movq %mm0,%mm1 + psrlq $32,%mm1 + paddd %mm0,%mm1 + psrad $13,%mm1 + packssdw %mm1,%mm1 + psubd %mm0,%mm0 + psubsw %mm1,%mm0 + movd %mm0,%eax + movw %ax,(%edi) + emms popl %ebx popl %esi -- cgit v1.2.3