# this code comes under GPL # This code was taken from http://www.mpg123.org # See ChangeLog of mpg123-0.59s-pre.1 for detail # Applied to mplayer by Nick Kurshev # # Local ChangeLog: # - Partial loops unrolling and removing MOVW insn from loops # .data .align 8 null_one: .long 0x0000ffff, 0x0000ffff one_null: .long 0xffff0000, 0xffff0000 .text .globl synth_1to1_MMX_s synth_1to1_MMX_s: pushl %ebp pushl %edi pushl %esi pushl %ebx movl 24(%esp),%ecx movl 28(%esp),%edi movl $15,%ebx movl 36(%esp),%edx leal (%edi,%ecx,2),%edi decl %ecx movl 32(%esp),%esi movl (%edx),%eax jecxz .L1 decl %eax andl %ebx,%eax leal 1088(%esi),%esi movl %eax,(%edx) .L1: leal (%esi,%eax,2),%edx movl %eax,%ebp incl %eax pushl 20(%esp) andl %ebx,%eax leal 544(%esi,%eax,2),%ecx incl %ebx testl $1, %eax jnz .L2 xchgl %edx,%ecx incl %ebp leal 544(%esi),%esi .L2: emms pushl %edx pushl %ecx call *dct64_MMX_func addl $12,%esp leal 1(%ebx), %ecx subl %ebp,%ebx pushl %ecx leal decwins(%ebx,%ebx,1), %edx shrl $1, %ecx .align 16 .L3: movq (%edx),%mm0 movq 64(%edx),%mm4 pmaddwd (%esi),%mm0 pmaddwd 32(%esi),%mm4 movq 8(%edx),%mm1 movq 72(%edx),%mm5 pmaddwd 8(%esi),%mm1 pmaddwd 40(%esi),%mm5 movq 16(%edx),%mm2 movq 80(%edx),%mm6 pmaddwd 16(%esi),%mm2 pmaddwd 48(%esi),%mm6 movq 24(%edx),%mm3 movq 88(%edx),%mm7 pmaddwd 24(%esi),%mm3 pmaddwd 56(%esi),%mm7 paddd %mm1,%mm0 paddd %mm5,%mm4 paddd %mm2,%mm0 paddd %mm6,%mm4 paddd %mm3,%mm0 paddd %mm7,%mm4 movq %mm0,%mm1 movq %mm4,%mm5 psrlq $32,%mm1 psrlq $32,%mm5 paddd %mm1,%mm0 paddd %mm5,%mm4 psrad $13,%mm0 psrad $13,%mm4 packssdw %mm0,%mm0 packssdw %mm4,%mm4 movq (%edi), %mm1 punpckldq %mm4, %mm0 pand one_null, %mm1 pand null_one, %mm0 por %mm0, %mm1 movq %mm1,(%edi) leal 64(%esi),%esi leal 128(%edx),%edx leal 8(%edi),%edi decl %ecx jnz .L3 popl %ecx andl $1, %ecx jecxz .next_loop movq (%edx),%mm0 pmaddwd (%esi),%mm0 movq 8(%edx),%mm1 pmaddwd 8(%esi),%mm1 movq 16(%edx),%mm2 pmaddwd 16(%esi),%mm2 movq 24(%edx),%mm3 pmaddwd 24(%esi),%mm3 paddd %mm1,%mm0 paddd %mm2,%mm0 paddd %mm3,%mm0 movq %mm0,%mm1 psrlq $32,%mm1 paddd %mm1,%mm0 psrad $13,%mm0 packssdw %mm0,%mm0 movd %mm0,%eax movw %ax, (%edi) leal 32(%esi),%esi leal 64(%edx),%edx leal 4(%edi),%edi .next_loop: subl $64,%esi movl $7,%ecx .align 16 .L4: movq (%edx),%mm0 movq 64(%edx),%mm4 pmaddwd (%esi),%mm0 pmaddwd -32(%esi),%mm4 movq 8(%edx),%mm1 movq 72(%edx),%mm5 pmaddwd 8(%esi),%mm1 pmaddwd -24(%esi),%mm5 movq 16(%edx),%mm2 movq 80(%edx),%mm6 pmaddwd 16(%esi),%mm2 pmaddwd -16(%esi),%mm6 movq 24(%edx),%mm3 movq 88(%edx),%mm7 pmaddwd 24(%esi),%mm3 pmaddwd -8(%esi),%mm7 paddd %mm1,%mm0 paddd %mm5,%mm4 paddd %mm2,%mm0 paddd %mm6,%mm4 paddd %mm3,%mm0 paddd %mm7,%mm4 movq %mm0,%mm1 movq %mm4,%mm5 psrlq $32,%mm1 psrlq $32,%mm5 paddd %mm0,%mm1 paddd %mm4,%mm5 psrad $13,%mm1 psrad $13,%mm5 packssdw %mm1,%mm1 packssdw %mm5,%mm5 psubd %mm0,%mm0 psubd %mm4,%mm4 psubsw %mm1,%mm0 psubsw %mm5,%mm4 movq (%edi), %mm1 punpckldq %mm4, %mm0 pand one_null, %mm1 pand null_one, %mm0 por %mm0, %mm1 movq %mm1,(%edi) subl $64,%esi addl $128,%edx leal 8(%edi),%edi decl %ecx jnz .L4 movq (%edx),%mm0 pmaddwd (%esi),%mm0 movq 8(%edx),%mm1 pmaddwd 8(%esi),%mm1 movq 16(%edx),%mm2 pmaddwd 16(%esi),%mm2 movq 24(%edx),%mm3 pmaddwd 24(%esi),%mm3 paddd %mm1,%mm0 paddd %mm2,%mm0 paddd %mm3,%mm0 movq %mm0,%mm1 psrlq $32,%mm1 paddd %mm0,%mm1 psrad $13,%mm1 packssdw %mm1,%mm1 psubd %mm0,%mm0 psubsw %mm1,%mm0 movd %mm0,%eax movw %ax,(%edi) emms popl %ebx popl %esi popl %edi popl %ebp ret