/ / mpg123_synth_1to1 works the same way as the c version of this / file. only two types of changes have been made: / - reordered floating point instructions to / prevent pipline stalls / - made WRITE_SAMPLE use integer instead of / (slower) floating point / all kinds of x86 processors should benefit from these / modifications. / / useful sources of information on optimizing x86 code include: / / Intel Architecture Optimization Manual / http://www.intel.com/design/pentium/manuals/242816.htm / / Cyrix 6x86 Instruction Set Summary / ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf / / AMD-K5 Processor Software Development / http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf / / Stefan Bieschewski / / $Id$ / .bss .comm buffs,4352,4 .data .align 4 bo: .long 1 .section .rodata .align 8 .LC0: .long 0x0,0x40dfffc0 .align 8 .LC1: .long 0x0,0xc0e00000 .align 8 .text .globl synth_1to1_pent synth_1to1_pent: subl $12,%esp pushl %ebp pushl %edi pushl %esi pushl %ebx movl 32(%esp),%eax movl 40(%esp),%esi xorl %edi,%edi movl bo,%ebp cmpl %edi,36(%esp) jne .L48 decl %ebp andl $15,%ebp movl %ebp,bo movl $buffs,%ecx jmp .L49 .L48: addl $2,%esi movl $buffs+2176,%ecx .L49: testl $1,%ebp je .L50 movl %ecx,%ebx movl %ebp,16(%esp) pushl %eax movl 20(%esp),%edx leal (%ebx,%edx,4),%eax pushl %eax movl 24(%esp),%eax incl %eax andl $15,%eax leal 1088(,%eax,4),%eax addl %ebx,%eax jmp .L74 .L50: leal 1088(%ecx),%ebx leal 1(%ebp),%edx movl %edx,16(%esp) pushl %eax leal 1092(%ecx,%ebp,4),%eax pushl %eax leal (%ecx,%ebp,4),%eax .L74: pushl %eax call dct64 addl $12,%esp movl 16(%esp),%edx leal 0(,%edx,4),%edx movl $decwin+64,%eax movl %eax,%ecx subl %edx,%ecx movl $16,%ebp .L55: flds (%ecx) fmuls (%ebx) flds 4(%ecx) fmuls 4(%ebx) fxch %st(1) flds 8(%ecx) fmuls 8(%ebx) fxch %st(2) fsubrp %st,%st(1) flds 12(%ecx) fmuls 12(%ebx) fxch %st(2) faddp %st,%st(1) flds 16(%ecx) fmuls 16(%ebx) fxch %st(2) fsubrp %st,%st(1) flds 20(%ecx) fmuls 20(%ebx) fxch %st(2) faddp %st,%st(1) flds 24(%ecx) fmuls 24(%ebx) fxch %st(2) fsubrp %st,%st(1) flds 28(%ecx) fmuls 28(%ebx) fxch %st(2) faddp %st,%st(1) flds 32(%ecx) fmuls 32(%ebx) fxch %st(2) fsubrp %st,%st(1) flds 36(%ecx) fmuls 36(%ebx) fxch %st(2) faddp %st,%st(1) flds 40(%ecx) fmuls 40(%ebx) fxch %st(2) fsubrp %st,%st(1) flds 44(%ecx) fmuls 44(%ebx) fxch %st(2) faddp %st,%st(1) flds 48(%ecx) fmuls 48(%ebx) fxch %st(2) fsubrp %st,%st(1) flds 52(%ecx) fmuls 52(%ebx) fxch %st(2) faddp %st,%st(1) flds 56(%ecx) fmuls 56(%ebx) fxch %st(2) fsubrp %st,%st(1) flds 60(%ecx) fmuls 60(%ebx) fxch %st(2) subl $4,%esp faddp %st,%st(1) fxch %st(1) fsubrp %st,%st(1) fistpl (%esp) popl %eax cmpl $32767,%eax jg 1f cmpl $-32768,%eax jl 2f movw %ax,(%esi) jmp 4f 1: movw $32767,(%esi) jmp 3f 2: movw $-32768,(%esi) 3: incl %edi 4: .L54: addl $64,%ebx subl $-128,%ecx addl $4,%esi decl %ebp jnz .L55 flds (%ecx) fmuls (%ebx) flds 8(%ecx) fmuls 8(%ebx) flds 16(%ecx) fmuls 16(%ebx) fxch %st(2) faddp %st,%st(1) flds 24(%ecx) fmuls 24(%ebx) fxch %st(2) faddp %st,%st(1) flds 32(%ecx) fmuls 32(%ebx) fxch %st(2) faddp %st,%st(1) flds 40(%ecx) fmuls 40(%ebx) fxch %st(2) faddp %st,%st(1) flds 48(%ecx) fmuls 48(%ebx) fxch %st(2) faddp %st,%st(1) flds 56(%ecx) fmuls 56(%ebx) fxch %st(2) subl $4,%esp faddp %st,%st(1) fxch %st(1) faddp %st,%st(1) fistpl (%esp) popl %eax cmpl $32767,%eax jg 1f cmpl $-32768,%eax jl 2f movw %ax,(%esi) jmp 4f 1: movw $32767,(%esi) jmp 3f 2: movw $-32768,(%esi) 3: incl %edi 4: .L62: addl $-64,%ebx addl $4,%esi movl 16(%esp),%edx leal -128(%ecx,%edx,8),%ecx movl $15,%ebp .L68: flds -4(%ecx) fchs fmuls (%ebx) flds -8(%ecx) fmuls 4(%ebx) fxch %st(1) flds -12(%ecx) fmuls 8(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -16(%ecx) fmuls 12(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -20(%ecx) fmuls 16(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -24(%ecx) fmuls 20(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -28(%ecx) fmuls 24(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -32(%ecx) fmuls 28(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -36(%ecx) fmuls 32(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -40(%ecx) fmuls 36(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -44(%ecx) fmuls 40(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -48(%ecx) fmuls 44(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -52(%ecx) fmuls 48(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -56(%ecx) fmuls 52(%ebx) fxch %st(2) fsubrp %st,%st(1) flds -60(%ecx) fmuls 56(%ebx) fxch %st(2) fsubrp %st,%st(1) flds (%ecx) fmuls 60(%ebx) fxch %st(2) subl $4,%esp fsubrp %st,%st(1) fxch %st(1) fsubrp %st,%st(1) fistpl (%esp) popl %eax cmpl $32767,%eax jg 1f cmpl $-32768,%eax jl 2f movw %ax,(%esi) jmp 4f 1: movw $32767,(%esi) jmp 3f 2: movw $-32768,(%esi) 3: incl %edi 4: .L67: addl $-64,%ebx addl $-128,%ecx addl $4,%esi decl %ebp jnz .L68 movl %edi,%eax popl %ebx popl %esi popl %edi popl %ebp addl $12,%esp ret