From f8fb16fc317680c3eb722e9a2057e9a191b2b9cb Mon Sep 17 00:00:00 2001 From: mhoffman Date: Fri, 22 Jun 2007 00:12:44 +0000 Subject: re pipeline loop, to eliminate extra chroma reads git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@23602 b3059339-0415-0410-9bf9-f77b7e298cf2 --- libswscale/internal_bfin.S | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'libswscale/internal_bfin.S') diff --git a/libswscale/internal_bfin.S b/libswscale/internal_bfin.S index 9ed98c38eb..4d395870fe 100644 --- a/libswscale/internal_bfin.S +++ b/libswscale/internal_bfin.S @@ -496,30 +496,29 @@ DEFUN(uyvytoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8 r2 = r2 - r0; m1 = r2; - r6.l = w[i2--]; r6.l = w[i2]; - r6.h = w[i3--]; r6.h = w[i3]; /* I0,I1 - src input line pointers * p0,p1 - luma output line pointers * I2 - dstU * I3 - dstV */ - lsetup (0f, 1f) lc0 = p4; - -0: lsetup (2f, 3f) lc1 = p5; - r0 = [i0++] || r2 = [i1++]; - r1 = [i0++] || r3 = [i1++]; -2: r4 = byteop1p(r1:0, r3:2) || w[i2++] = r6.l; - r5 = byteop1p(r1:0, r3:2) (r) || w[i3++] = r6.h; - r0 = r0 >> 8(v); + lsetup (0f, 1f) lc1 = p4; // H/2 +0: r0 = [i0++] || r2 = [i1++]; + r1 = [i0++] || r3 = [i1++]; + r4 = byteop1p(r1:0, r3:2); + r5 = byteop1p(r1:0, r3:2) (r); + lsetup (2f, 3f) lc0 = p5; // W/4 +2: r0 = r0 >> 8(v); r1 = r1 >> 8(v); r2 = r2 >> 8(v); r3 = r3 >> 8(v); r0 = bytepack(r0, r1); - r2 = bytepack(r2, r3) || [p0++] = r0; - r6 = pack(r5.l, r4.l) || [p1++] = r2; - r7 = pack(r5.h, r4.h) || r0 = [i0++] || r2 = [i1++]; -3: r6 = bytepack(r6, r7) || r1 = [i0++] || r3 = [i1++]; + r2 = bytepack(r2, r3) || [p0++] = r0; // yyyy + r6 = pack(r5.l, r4.l) || [p1++] = r2; // yyyy + r7 = pack(r5.h, r4.h) || r0 = [i0++] || r2 = [i1++]; + r6 = bytepack(r6, r7) || r1 = [i0++] || r3 = [i1++]; + r4 = byteop1p(r1:0, r3:2) || w[i2++] = r6.l; // uu +3: r5 = byteop1p(r1:0, r3:2) (r) || w[i3++] = r6.h; // vv i0 += m0; i1 += m0; @@ -528,9 +527,6 @@ DEFUN(uyvytoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8 p0 = p0 + p2; 1: p1 = p1 + p2; - w[i2++] = r6.l; - w[i3++] = r6.h; - (r7:4,p5:4) = [sp++]; unlink; rts; -- cgit v1.2.3