summaryrefslogtreecommitdiffstats
path: root/mp3lib/decode_MMX.s
blob: d54a34bc9382624ed75ff561e18d5e13fd2ef5ed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# this code comes under GPL
# This code was taken from http://www.mpg123.org
# See ChangeLog of mpg123-0.59s-pre.1 for detail
# Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
#
# Local ChangeLog:
# - Partial loops unrolling and removing MOVW insn from loops
#

.data
.align 8
null_one: .long 0x0000ffff, 0x0000ffff
one_null: .long 0xffff0000, 0xffff0000

.text

.globl synth_1to1_MMX_s

synth_1to1_MMX_s:
        pushl %ebp
        pushl %edi
        pushl %esi
        pushl %ebx
        movl 24(%esp),%ecx              
        movl 28(%esp),%edi              
        movl $15,%ebx
        movl 36(%esp),%edx              
        leal (%edi,%ecx,2),%edi
	decl %ecx
        movl 32(%esp),%esi              
        movl (%edx),%eax                
        jecxz .L1
        decl %eax
        andl %ebx,%eax                  
        leal 1088(%esi),%esi                            
        movl %eax,(%edx)                
.L1:
        leal (%esi,%eax,2),%edx         
        movl %eax,%ebp                  
        incl %eax                       
        pushl 20(%esp)                  
        andl %ebx,%eax                  
        leal 544(%esi,%eax,2),%ecx      
        incl %ebx                       
	testl $1, %eax
	jnz .L2                       
        xchgl %edx,%ecx
	incl %ebp
        leal 544(%esi),%esi           
.L2: 
	emms
        pushl %edx
        pushl %ecx
        call *dct64_MMX_func
        addl $12,%esp
	leal 1(%ebx), %ecx
        subl %ebp,%ebx                
	pushl %ecx
	leal decwins(%ebx,%ebx,1), %edx
	shrl $1, %ecx
.align 16
.L3: 
        movq  (%edx),%mm0
        movq  64(%edx),%mm4
        pmaddwd (%esi),%mm0
        pmaddwd 32(%esi),%mm4
        movq  8(%edx),%mm1
        movq  72(%edx),%mm5
        pmaddwd 8(%esi),%mm1
        pmaddwd 40(%esi),%mm5
        movq  16(%edx),%mm2
        movq  80(%edx),%mm6
        pmaddwd 16(%esi),%mm2
        pmaddwd 48(%esi),%mm6
        movq  24(%edx),%mm3
        movq  88(%edx),%mm7
        pmaddwd 24(%esi),%mm3
        pmaddwd 56(%esi),%mm7
        paddd %mm1,%mm0
        paddd %mm5,%mm4
        paddd %mm2,%mm0
        paddd %mm6,%mm4
        paddd %mm3,%mm0
        paddd %mm7,%mm4
        movq  %mm0,%mm1
        movq  %mm4,%mm5
        psrlq $32,%mm1
        psrlq $32,%mm5
        paddd %mm1,%mm0
        paddd %mm5,%mm4
        psrad $13,%mm0
        psrad $13,%mm4
        packssdw %mm0,%mm0
        packssdw %mm4,%mm4

	movq	(%edi), %mm1
	punpckldq %mm4, %mm0
	pand   one_null, %mm1
	pand   null_one, %mm0
	por    %mm0, %mm1
	movq   %mm1,(%edi)

        leal 64(%esi),%esi
        leal 128(%edx),%edx
        leal 8(%edi),%edi                

	decl %ecx
        jnz  .L3

	popl %ecx
	andl $1, %ecx
	jecxz .next_loop

        movq  (%edx),%mm0
        pmaddwd (%esi),%mm0
        movq  8(%edx),%mm1
        pmaddwd 8(%esi),%mm1
        movq  16(%edx),%mm2
        pmaddwd 16(%esi),%mm2
        movq  24(%edx),%mm3
        pmaddwd 24(%esi),%mm3
        paddd %mm1,%mm0
        paddd %mm2,%mm0
        paddd %mm3,%mm0
        movq  %mm0,%mm1
        psrlq $32,%mm1
        paddd %mm1,%mm0
        psrad $13,%mm0
        packssdw %mm0,%mm0
        movd %mm0,%eax
	movw %ax, (%edi)
        leal 32(%esi),%esi
        leal 64(%edx),%edx
        leal 4(%edi),%edi                
	
.next_loop:
        subl $64,%esi                    
        movl $7,%ecx
.align 16
.L4: 
        movq  (%edx),%mm0
        movq  64(%edx),%mm4
        pmaddwd (%esi),%mm0
        pmaddwd -32(%esi),%mm4
        movq  8(%edx),%mm1
        movq  72(%edx),%mm5
        pmaddwd 8(%esi),%mm1
        pmaddwd -24(%esi),%mm5
        movq  16(%edx),%mm2
        movq  80(%edx),%mm6
        pmaddwd 16(%esi),%mm2
        pmaddwd -16(%esi),%mm6
        movq  24(%edx),%mm3
        movq  88(%edx),%mm7
        pmaddwd 24(%esi),%mm3
        pmaddwd -8(%esi),%mm7
        paddd %mm1,%mm0
        paddd %mm5,%mm4
        paddd %mm2,%mm0
        paddd %mm6,%mm4
        paddd %mm3,%mm0
        paddd %mm7,%mm4
        movq  %mm0,%mm1
        movq  %mm4,%mm5
        psrlq $32,%mm1
        psrlq $32,%mm5
        paddd %mm0,%mm1
        paddd %mm4,%mm5
        psrad $13,%mm1
        psrad $13,%mm5
        packssdw %mm1,%mm1
        packssdw %mm5,%mm5
        psubd %mm0,%mm0
        psubd %mm4,%mm4
        psubsw %mm1,%mm0
        psubsw %mm5,%mm4

	movq	(%edi), %mm1
	punpckldq %mm4, %mm0
	pand   one_null, %mm1
	pand   null_one, %mm0
	por    %mm0, %mm1
	movq   %mm1,(%edi)

        subl $64,%esi
        addl $128,%edx
        leal 8(%edi),%edi                
        decl %ecx
	jnz  .L4

        movq  (%edx),%mm0
        pmaddwd (%esi),%mm0
        movq  8(%edx),%mm1
        pmaddwd 8(%esi),%mm1
        movq  16(%edx),%mm2
        pmaddwd 16(%esi),%mm2
        movq  24(%edx),%mm3
        pmaddwd 24(%esi),%mm3
        paddd %mm1,%mm0
        paddd %mm2,%mm0
        paddd %mm3,%mm0
        movq  %mm0,%mm1
        psrlq $32,%mm1
        paddd %mm0,%mm1
        psrad $13,%mm1
        packssdw %mm1,%mm1
        psubd %mm0,%mm0
        psubsw %mm1,%mm0
        movd %mm0,%eax
	movw %ax,(%edi)

	emms
        popl %ebx
        popl %esi
        popl %edi
        popl %ebp
        ret