summaryrefslogtreecommitdiffstats
path: root/libmpcodecs/native/rtjpegn.c
diff options
context:
space:
mode:
authorUoti Urpala <uau@glyph.nonexistent.invalid>2010-10-31 00:04:18 +0300
committerUoti Urpala <uau@glyph.nonexistent.invalid>2010-11-02 04:16:55 +0200
commit389c32b5c72897a8b1b3c929c8f278e8980f2290 (patch)
tree47f7784ec12f9d53172f7ad326ed78f2f9889f74 /libmpcodecs/native/rtjpegn.c
parent8939645dcf39c398e1b70b851b3410299ca619ce (diff)
downloadmpv-389c32b5c72897a8b1b3c929c8f278e8980f2290.tar.bz2
mpv-389c32b5c72897a8b1b3c929c8f278e8980f2290.tar.xz
Remove MEncoder
Disable MEncoder compilation and remove files used by MEncoder only. There's no attempt to remove all references to MEncoder from the build system, documentation etc at this point. Removed files: (muxers, audio/video encoders, misc) mencoder.c cfg-mencoder.h parser-mecmd.[ch] xvid_vbr.[ch] libmpdemux/muxer* libmpcodecs/ae* libmpcodecs/ve* libmpcodecs/native/rtjpegn.[ch] libmpcodecs/native/mmx.h // was used by rtjpegn only Rationale: MEncoder is still useful for some people, but there's not much potential for further development; in the long run almost all use cases can be handled better by solutions based on something else (for example using FFmpeg or encoding MPlayer output). FFmpeg is already getting video filtering support which should work for some common MEncoder uses. Keeping MEncoder working takes extra work that is away from player development. While that amount of work is not huge (mostly MEncoder can be just ignored), it's not completely insignificant either. MEncoder is still maintained to some degree in the svn tree, so if necessary it's possible to use it from there for now. This tree has never had major improvements for the MEncoder side, so using svn MEncoder instead should be no major loss.
Diffstat (limited to 'libmpcodecs/native/rtjpegn.c')
-rw-r--r--libmpcodecs/native/rtjpegn.c1758
1 files changed, 0 insertions, 1758 deletions
diff --git a/libmpcodecs/native/rtjpegn.c b/libmpcodecs/native/rtjpegn.c
deleted file mode 100644
index 66089b0ecf..0000000000
--- a/libmpcodecs/native/rtjpegn.c
+++ /dev/null
@@ -1,1758 +0,0 @@
-/*
- RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za)
-
- With modifications by:
- (c) 1998, 1999 by Joerg Walter <trouble@moes.pmnet.uni-oldenburg.de>
- and
- (c) 1999 by Wim Taymans <wim.taymans@tvd.be>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "config.h"
-
-#include "mpbswap.h"
-#include "rtjpegn.h"
-
-#if HAVE_MMX
-#include "mmx.h"
-#endif
-
-//#define SHOWBLOCK 1
-#define BETTERCOMPRESSION 1
-
-static const unsigned char RTjpeg_ZZ[64]={
-0,
-8, 1,
-2, 9, 16,
-24, 17, 10, 3,
-4, 11, 18, 25, 32,
-40, 33, 26, 19, 12, 5,
-6, 13, 20, 27, 34, 41, 48,
-56, 49, 42, 35, 28, 21, 14, 7,
-15, 22, 29, 36, 43, 50, 57,
-58, 51, 44, 37, 30, 23,
-31, 38, 45, 52, 59,
-60, 53, 46, 39,
-47, 54, 61,
-62, 55,
-63 };
-
-static const __u64 RTjpeg_aan_tab[64]={
-4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL,
-5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL,
-5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL,
-5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL,
-4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL,
-3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL,
-2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL,
-1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL,
-};
-
-#if !HAVE_MMX
-static __s32 RTjpeg_ws[64+31];
-#endif
-static __u8 RTjpeg_alldata[2*64+4*64+4*64+4*64+4*64+32];
-
-static __s16 *block; // rh
-static __s16 *RTjpeg_block;
-static __s32 *RTjpeg_lqt;
-static __s32 *RTjpeg_cqt;
-static __u32 *RTjpeg_liqt;
-static __u32 *RTjpeg_ciqt;
-
-static unsigned char RTjpeg_lb8;
-static unsigned char RTjpeg_cb8;
-static int RTjpeg_width, RTjpeg_height;
-static int RTjpeg_Ywidth, RTjpeg_Cwidth;
-static int RTjpeg_Ysize, RTjpeg_Csize;
-
-static __s16 *RTjpeg_old=NULL;
-
-#if HAVE_MMX
-static mmx_t RTjpeg_lmask;
-static mmx_t RTjpeg_cmask;
-#else
-static __u16 RTjpeg_lmask;
-static __u16 RTjpeg_cmask;
-#endif
-
-static const unsigned char RTjpeg_lum_quant_tbl[64] = {
- 16, 11, 10, 16, 24, 40, 51, 61,
- 12, 12, 14, 19, 26, 58, 60, 55,
- 14, 13, 16, 24, 40, 57, 69, 56,
- 14, 17, 22, 29, 51, 87, 80, 62,
- 18, 22, 37, 56, 68, 109, 103, 77,
- 24, 35, 55, 64, 81, 104, 113, 92,
- 49, 64, 78, 87, 103, 121, 120, 101,
- 72, 92, 95, 98, 112, 100, 103, 99
- };
-
-static const unsigned char RTjpeg_chrom_quant_tbl[64] = {
- 17, 18, 24, 47, 99, 99, 99, 99,
- 18, 21, 26, 66, 99, 99, 99, 99,
- 24, 26, 56, 99, 99, 99, 99, 99,
- 47, 66, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99
- };
-
-#ifdef BETTERCOMPRESSION
-
-/*--------------------------------------------------*/
-/* better encoding, but needs a lot more cpu time */
-/* seems to be more effective than old method +lzo */
-/* with this encoding lzo isn't efficient anymore */
-/* there is still more potential for better */
-/* encoding but that would need even more cputime */
-/* anyway your mileage may vary */
-/* */
-/* written by Martin BIELY and Roman HOCHLEITNER */
-/*--------------------------------------------------*/
-
-/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* Block to Stream (encoding) */
-/* */
-
-static int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8)
-{
- register int ci, co=1;
- register __s16 ZZvalue;
- register unsigned char bitten;
- register unsigned char bitoff;
-
-#ifdef SHOWBLOCK
-
- int ii;
- for (ii=0; ii < 64; ii++) {
- fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]);
- }
- fprintf(stdout, "\n\n");
-
-#endif
-
- // first byte allways written
- ((__u8*)strm)[0]=
- (__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
-
-
- ci=63;
- while (data[RTjpeg_ZZ[ci]]==0 && ci>0) ci--;
-
- bitten = ((unsigned char)ci) << 2;
-
- if (ci==0) {
- ((__u8*)strm)[1]= bitten;
- co = 2;
- return (int)co;
- }
-
- /* bitoff=0 because the high 6bit contain first non zero position */
- bitoff = 0;
- co = 1;
-
- for(; ci>0; ci--) {
-
- ZZvalue = data[RTjpeg_ZZ[ci]];
-
- switch(ZZvalue) {
- case 0:
- break;
- case 1:
- bitten |= (0x01<<bitoff);
- break;
- case -1:
- bitten |= (0x03<<bitoff);
- break;
- default:
- bitten |= (0x02<<bitoff);
- goto HERZWEH;
- break;
- }
-
- if( bitoff == 0 ) {
- ((__u8*)strm)[co]= bitten;
- bitten = 0;
- bitoff = 8;
- co++;
- } /* "fall through" */
- bitoff-=2;
-
- }
-
- /* ci must be 0 */
- if(bitoff != 6) {
-
- ((__u8*)strm)[co]= bitten;
- co++;
-
- }
- goto BAUCHWEH;
-
-HERZWEH:
-/* ci cannot be 0 */
-/* correct bitoff to nibble boundaries */
-
- switch(bitoff){
- case 4:
- case 6:
- bitoff = 0;
- break;
- case 2:
- case 0:
- ((__u8*)strm)[co]= bitten;
- bitoff = 4;
- co++;
- bitten = 0; // clear half nibble values in bitten
- break;
- default:
- break;
- }
-
- for(; ci>0; ci--) {
-
- ZZvalue = data[RTjpeg_ZZ[ci]];
-
- if( (ZZvalue > 7) || (ZZvalue < -7) ) {
- bitten |= (0x08<<bitoff);
- goto HIRNWEH;
- }
-
- bitten |= (ZZvalue&0xf)<<bitoff;
-
- if( bitoff == 0 ) {
- ((__u8*)strm)[co]= bitten;
- bitten = 0;
- bitoff = 8;
- co++;
- } /* "fall thru" */
- bitoff-=4;
- }
-
- /* ci must be 0 */
- if( bitoff == 0 ) {
- ((__u8*)strm)[co]= bitten;
- co++;
- }
- goto BAUCHWEH;
-
-HIRNWEH:
-
- ((__u8*)strm)[co]= bitten;
- co++;
-
-
- /* bitting is over now we bite */
- for(; ci>0; ci--) {
-
- ZZvalue = data[RTjpeg_ZZ[ci]];
-
- if(ZZvalue>0)
- {
- strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue;
- }
- else
- {
- strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue;
- }
-
- }
-
-
-BAUCHWEH:
- /* we gotoo much now we are ill */
-#ifdef SHOWBLOCK
-{
-int i;
-fprintf(stdout, "\nco = '%d'\n", co);
- for (i=0; i < co+2; i++) {
- fprintf(stdout, "%d ", strm[i]);
- }
-fprintf(stdout, "\n\n");
-}
-#endif
-
- return (int)co;
-}
-
-#else
-
-static int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8)
-{
- register int ci, co=1, tmp;
- register __s16 ZZvalue;
-
-#ifdef SHOWBLOCK
-
- int ii;
- for (ii=0; ii < 64; ii++) {
- fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]);
- }
- fprintf(stdout, "\n\n");
-
-#endif
-
- (__u8)strm[0]=(__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
-
- for(ci=1; ci<=bt8; ci++)
- {
- ZZvalue = data[RTjpeg_ZZ[ci]];
-
- if(ZZvalue>0)
- {
- strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue;
- }
- else
- {
- strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue;
- }
- }
-
- for(; ci<64; ci++)
- {
- ZZvalue = data[RTjpeg_ZZ[ci]];
-
- if(ZZvalue>0)
- {
- strm[co++]=(__s8)(ZZvalue>63)?63:ZZvalue;
- }
- else if(ZZvalue<0)
- {
- strm[co++]=(__s8)(ZZvalue<-64)?-64:ZZvalue;
- }
- else /* compress zeros */
- {
- tmp=ci;
- do
- {
- ci++;
- }
- while((ci<64)&&(data[RTjpeg_ZZ[ci]]==0));
-
- strm[co++]=(__s8)(63+(ci-tmp));
- ci--;
- }
- }
- return (int)co;
-}
-
-static int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl)
-{
- int ci=1, co=1, tmp;
- register int i;
-
- i=RTjpeg_ZZ[0];
- data[i]=((__u8)strm[0])*qtbl[i];
-
- for(co=1; co<=bt8; co++)
- {
- i=RTjpeg_ZZ[co];
- data[i]=strm[ci++]*qtbl[i];
- }
-
- for(; co<64; co++)
- {
- if(strm[ci]>63)
- {
- tmp=co+strm[ci]-63;
- for(; co<tmp; co++)data[RTjpeg_ZZ[co]]=0;
- co--;
- } else
- {
- i=RTjpeg_ZZ[co];
- data[i]=strm[ci]*qtbl[i];
- }
- ci++;
- }
- return (int)ci;
-}
-#endif
-
-#if HAVE_MMX
-static void RTjpeg_quant_init(void)
-{
- int i;
- __s16 *qtbl;
-
- qtbl=(__s16 *)RTjpeg_lqt;
- for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_lqt[i];
-
- qtbl=(__s16 *)RTjpeg_cqt;
- for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_cqt[i];
-}
-
-static mmx_t RTjpeg_ones={0x0001000100010001LL};
-static mmx_t RTjpeg_half={0x7fff7fff7fff7fffLL};
-
-static void RTjpeg_quant(__s16 *block, __s32 *qtbl)
-{
- int i;
- mmx_t *bl, *ql;
-
- ql=(mmx_t *)qtbl;
- bl=(mmx_t *)block;
-
- movq_m2r(RTjpeg_ones, mm6);
- movq_m2r(RTjpeg_half, mm7);
-
- for(i=16; i; i--)
- {
- movq_m2r(*(ql++), mm0); /* quant vals (4) */
- movq_m2r(*bl, mm2); /* block vals (4) */
- movq_r2r(mm0, mm1);
- movq_r2r(mm2, mm3);
-
- punpcklwd_r2r(mm6, mm0); /* 1 qb 1 qa */
- punpckhwd_r2r(mm6, mm1); /* 1 qd 1 qc */
-
- punpcklwd_r2r(mm7, mm2); /* 32767 bb 32767 ba */
- punpckhwd_r2r(mm7, mm3); /* 32767 bd 32767 bc */
-
- pmaddwd_r2r(mm2, mm0); /* 32767+bb*qb 32767+ba*qa */
- pmaddwd_r2r(mm3, mm1); /* 32767+bd*qd 32767+bc*qc */
-
- psrad_i2r(16, mm0);
- psrad_i2r(16, mm1);
-
- packssdw_r2r(mm1, mm0);
-
- movq_r2m(mm0, *(bl++));
-
- }
-}
-#else
-static void RTjpeg_quant_init(void)
-{
-}
-
-static void RTjpeg_quant(__s16 *block, __s32 *qtbl)
-{
- int i;
-
- for(i=0; i<64; i++)
- block[i]=(__s16)((block[i]*qtbl[i]+32767)>>16);
-}
-#endif
-
-/*
- * Perform the forward DCT on one block of samples.
- */
-#if HAVE_MMX
-static mmx_t RTjpeg_C4 ={0x2D412D412D412D41LL};
-static mmx_t RTjpeg_C6 ={0x187E187E187E187ELL};
-static mmx_t RTjpeg_C2mC6={0x22A322A322A322A3LL};
-static mmx_t RTjpeg_C2pC6={0x539F539F539F539FLL};
-static mmx_t RTjpeg_zero ={0x0000000000000000LL};
-
-#else
-
-#define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */
-#define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */
-#define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */
-#define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */
-
-#define DESCALE10(x) (__s16)( ((x)+128) >> 8)
-#define DESCALE20(x) (__s16)(((x)+32768) >> 16)
-#define D_MULTIPLY(var,const) ((__s32) ((var) * (const)))
-#endif
-
-static void RTjpeg_dct_init(void)
-{
- int i;
-
- for(i=0; i<64; i++)
- {
- RTjpeg_lqt[i]=(((__u64)RTjpeg_lqt[i]<<32)/RTjpeg_aan_tab[i]);
- RTjpeg_cqt[i]=(((__u64)RTjpeg_cqt[i]<<32)/RTjpeg_aan_tab[i]);
- }
-}
-
-static void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
-{
-#if !HAVE_MMX
- __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- __s32 tmp10, tmp11, tmp12, tmp13;
- __s32 z1, z2, z3, z4, z5, z11, z13;
- __u8 *idataptr;
- __s16 *odataptr;
- __s32 *wsptr;
- int ctr;
-
- idataptr = idata;
- wsptr = RTjpeg_ws;
- for (ctr = 7; ctr >= 0; ctr--) {
- tmp0 = idataptr[0] + idataptr[7];
- tmp7 = idataptr[0] - idataptr[7];
- tmp1 = idataptr[1] + idataptr[6];
- tmp6 = idataptr[1] - idataptr[6];
- tmp2 = idataptr[2] + idataptr[5];
- tmp5 = idataptr[2] - idataptr[5];
- tmp3 = idataptr[3] + idataptr[4];
- tmp4 = idataptr[3] - idataptr[4];
-
- tmp10 = (tmp0 + tmp3); /* phase 2 */
- tmp13 = tmp0 - tmp3;
- tmp11 = (tmp1 + tmp2);
- tmp12 = tmp1 - tmp2;
-
- wsptr[0] = (tmp10 + tmp11)<<8; /* phase 3 */
- wsptr[4] = (tmp10 - tmp11)<<8;
-
- z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
- wsptr[2] = (tmp13<<8) + z1; /* phase 5 */
- wsptr[6] = (tmp13<<8) - z1;
-
- tmp10 = tmp4 + tmp5; /* phase 2 */
- tmp11 = tmp5 + tmp6;
- tmp12 = tmp6 + tmp7;
-
- z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
- z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
- z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
- z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
-
- z11 = (tmp7<<8) + z3; /* phase 5 */
- z13 = (tmp7<<8) - z3;
-
- wsptr[5] = z13 + z2; /* phase 6 */
- wsptr[3] = z13 - z2;
- wsptr[1] = z11 + z4;
- wsptr[7] = z11 - z4;
-
- idataptr += rskip<<3; /* advance pointer to next row */
- wsptr += 8;
- }
-
- wsptr = RTjpeg_ws;
- odataptr=odata;
- for (ctr = 7; ctr >= 0; ctr--) {
- tmp0 = wsptr[0] + wsptr[56];
- tmp7 = wsptr[0] - wsptr[56];
- tmp1 = wsptr[8] + wsptr[48];
- tmp6 = wsptr[8] - wsptr[48];
- tmp2 = wsptr[16] + wsptr[40];
- tmp5 = wsptr[16] - wsptr[40];
- tmp3 = wsptr[24] + wsptr[32];
- tmp4 = wsptr[24] - wsptr[32];
-
- tmp10 = tmp0 + tmp3; /* phase 2 */
- tmp13 = tmp0 - tmp3;
- tmp11 = tmp1 + tmp2;
- tmp12 = tmp1 - tmp2;
-
- odataptr[0] = DESCALE10(tmp10 + tmp11); /* phase 3 */
- odataptr[32] = DESCALE10(tmp10 - tmp11);
-
- z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
- odataptr[16] = DESCALE20((tmp13<<8) + z1); /* phase 5 */
- odataptr[48] = DESCALE20((tmp13<<8) - z1);
-
- tmp10 = tmp4 + tmp5; /* phase 2 */
- tmp11 = tmp5 + tmp6;
- tmp12 = tmp6 + tmp7;
-
- z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
- z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
- z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
- z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
-
- z11 = (tmp7<<8) + z3; /* phase 5 */
- z13 = (tmp7<<8) - z3;
-
- odataptr[40] = DESCALE20(z13 + z2); /* phase 6 */
- odataptr[24] = DESCALE20(z13 - z2);
- odataptr[8] = DESCALE20(z11 + z4);
- odataptr[56] = DESCALE20(z11 - z4);
-
- odataptr++; /* advance pointer to next column */
- wsptr++;
- }
-#else
- volatile mmx_t tmp6, tmp7;
- register mmx_t *dataptr = (mmx_t *)odata;
- mmx_t *idata2 = (mmx_t *)idata;
-
- // first copy the input 8 bit to the destination 16 bits
-
- movq_m2r(RTjpeg_zero, mm2);
-
-
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
-
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr));
-
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+1));
-
- idata2 += rskip;
-
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
-
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+2));
-
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+3));
-
- idata2 += rskip;
-
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
-
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+4));
-
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+5));
-
- idata2 += rskip;
-
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
-
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+6));
-
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+7));
-
- idata2 += rskip;
-
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
-
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+8));
-
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+9));
-
- idata2 += rskip;
-
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
-
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+10));
-
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+11));
-
- idata2 += rskip;
-
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
-
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+12));
-
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+13));
-
- idata2 += rskip;
-
- movq_m2r(*idata2, mm0);
- movq_r2r(mm0, mm1);
-
- punpcklbw_r2r(mm2, mm0);
- movq_r2m(mm0, *(dataptr+14));
-
- punpckhbw_r2r(mm2, mm1);
- movq_r2m(mm1, *(dataptr+15));
-
-/* Start Transpose to do calculations on rows */
-
- movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into m5
-
- movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2
- movq_r2r(mm7, mm5);
-
- punpcklwd_m2r(*(dataptr+11), mm7); // m11:m01|m10:m00 - interleave first and second lines
- movq_r2r(mm6, mm2);
-
- punpcklwd_m2r(*(dataptr+15), mm6); // m31:m21|m30:m20 - interleave third and fourth lines
- movq_r2r(mm7, mm1);
-
- movq_m2r(*(dataptr+11), mm3); // m13:m13|m11:m10 - second line
- punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1
-
- movq_m2r(*(dataptr+15), mm0); // m13:m13|m11:m10 - fourth line
- punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2
-
- movq_r2m(mm7,*(dataptr+9)); // write result 1
- punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines
-
- movq_r2m(mm1,*(dataptr+11)); // write result 2
- punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines
-
- movq_r2r(mm5, mm1);
- punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3
-
- movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4
- punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4
-
- movq_r2m(mm5,*(dataptr+13)); // write result 3
-
- // last 4x4 done
-
- movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4
-
- movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line
- movq_r2r(mm0, mm6);
-
- punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines
- movq_r2r(mm2, mm7);
-
- punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines
- movq_r2r(mm0, mm4);
-
- //
- movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line
- punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result
-
- movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line
- punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result
-
- punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines
- movq_r2r(mm1, mm2); // copy first line
-
- punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines
- movq_r2r(mm6, mm5); // copy first intermediate result
-
- movq_r2m(mm0, *(dataptr+8)); // write result 1
- punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result
-
- punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines
- movq_r2r(mm3, mm0); // copy third line
-
- punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines
-
- movq_r2m(mm4, *(dataptr+10)); // write result 2 out
- punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result
-
- punpcklwd_m2r(*(dataptr+14), mm3); // n31:n21|n30:n20 - interleave third and fourth lines
- movq_r2r(mm1, mm4);
-
- movq_r2m(mm6, *(dataptr+12)); // write result 3 out
- punpckldq_r2r(mm3, mm1); // n30:n20|n10:n00 - produce first result
-
- punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines
- movq_r2r(mm2, mm6);
-
- movq_r2m(mm5, *(dataptr+14)); // write result 4 out
- punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result
-
- movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block)
- punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result
-
- movq_r2m(mm4, *(dataptr+3)); // write result 6 out
- punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result
-
- movq_r2m(mm2, *(dataptr+5)); // write result 7 out
-
- movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4
-
- movq_r2m(mm6, *(dataptr+7)); // write result 8 out
-
-
-// Do first 4x4 quadrant, which is used in the beginning of the DCT:
-
- movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line
- movq_r2r(mm0, mm2);
-
- punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines
- movq_r2r(mm7, mm4);
-
- punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines
- movq_r2r(mm0, mm1);
-
- movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line
- punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1
-
- movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line
- punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2
-
- movq_r2r(mm0, mm7); // write result 1
- punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines
-
- psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */
- movq_r2r(mm1, mm6); // write result 2
-
- paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */
- punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines
-
- paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */
- movq_r2r(mm2, mm3); // copy first intermediate result
-
- psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */
- punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3
-
- movq_r2m(mm7, tmp7);
- movq_r2r(mm2, mm5); // write result 3
-
- movq_r2m(mm6, tmp6);
- punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4
-
- paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+5 /* Stage 1 */
- movq_r2r(mm3, mm4); // write result 4
-
-/************************************************************************************************
- End of Transpose
-************************************************************************************************/
-
-
- paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/
- movq_r2r(mm0, mm7);
-
- psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/
- movq_r2r(mm1, mm6);
-
- paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */
- psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */
-
- psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */
- paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */
-
- psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/
- paddw_r2r(mm7, mm6); // tmp12 + tmp13
-
- /* stage 3 */
-
- movq_m2r(tmp6, mm2);
- movq_r2r(mm0, mm3);
-
- psllw_i2r(2, mm6); // m8 * 2^2
- paddw_r2r(mm1, mm0);
-
- pmulhw_m2r(RTjpeg_C4, mm6); // z1
- psubw_r2r(mm1, mm3);
-
- movq_r2m(mm0, *dataptr);
- movq_r2r(mm7, mm0);
-
- /* Odd part */
- movq_r2m(mm3, *(dataptr+8));
- paddw_r2r(mm5, mm4); // tmp10
-
- movq_m2r(tmp7, mm3);
- paddw_r2r(mm6, mm0); // tmp32
-
- paddw_r2r(mm2, mm5); // tmp11
- psubw_r2r(mm6, mm7); // tmp33
-
- movq_r2m(mm0, *(dataptr+4));
- paddw_r2r(mm3, mm2); // tmp12
-
- /* stage 4 */
-
- movq_r2m(mm7, *(dataptr+12));
- movq_r2r(mm4, mm1); // copy of tmp10
-
- psubw_r2r(mm2, mm1); // tmp10 - tmp12
- psllw_i2r(2, mm4); // m8 * 2^2
-
- movq_m2r(RTjpeg_C2mC6, mm0);
- psllw_i2r(2, mm1);
-
- pmulhw_m2r(RTjpeg_C6, mm1); // z5
- psllw_i2r(2, mm2);
-
- pmulhw_r2r(mm0, mm4); // z5
-
- /* stage 5 */
-
- pmulhw_m2r(RTjpeg_C2pC6, mm2);
- psllw_i2r(2, mm5);
-
- pmulhw_m2r(RTjpeg_C4, mm5); // z3
- movq_r2r(mm3, mm0); // copy tmp7
-
- movq_m2r(*(dataptr+1), mm7);
- paddw_r2r(mm1, mm4); // z2
-
- paddw_r2r(mm1, mm2); // z4
-
- paddw_r2r(mm5, mm0); // z11
- psubw_r2r(mm5, mm3); // z13
-
- /* stage 6 */
-
- movq_r2r(mm3, mm5); // copy z13
- psubw_r2r(mm4, mm3); // y3=z13 - z2
-
- paddw_r2r(mm4, mm5); // y5=z13 + z2
- movq_r2r(mm0, mm6); // copy z11
-
- movq_r2m(mm3, *(dataptr+6)); //save y3
- psubw_r2r(mm2, mm0); // y7=z11 - z4
-
- movq_r2m(mm5, *(dataptr+10)); //save y5
- paddw_r2r(mm2, mm6); // y1=z11 + z4
-
- movq_r2m(mm0, *(dataptr+14)); //save y7
-
- /************************************************
- * End of 1st 4 rows
- ************************************************/
-
- movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */
- movq_r2r(mm7, mm0); // copy x0
-
- movq_r2m(mm6, *(dataptr+2)); //save y1
-
- movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */
- movq_r2r(mm1, mm6); // copy x1
-
- paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7
-
- movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */
- movq_r2r(mm2, mm5); // copy x2
-
- psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7
- movq_r2r(mm3, mm4); // copy x3
-
- paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6
-
- movq_r2m(mm7, tmp7); // save tmp07
- movq_r2r(mm0, mm7); // copy tmp00
-
- psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6
-
- /* stage 2, Even Part */
-
- paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4
-
- movq_r2m(mm6, tmp6); // save tmp07
- movq_r2r(mm1, mm6); // copy tmp01
-
- paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5
- paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03
-
- psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03
-
- psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4
- psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02
-
- paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02
-
- psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5
- paddw_r2r(mm7, mm6); // tmp12 + tmp13
-
- /* stage 3, Even and stage 4 & 5 even */
-
- movq_m2r(tmp6, mm2); // load tmp6
- movq_r2r(mm0, mm3); // copy tmp10
-
- psllw_i2r(2, mm6); // shift z1
- paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11
-
- pmulhw_m2r(RTjpeg_C4, mm6); // z1
- psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11
-
- movq_r2m(mm0, *(dataptr+1)); //save y0
- movq_r2r(mm7, mm0); // copy tmp13
-
- /* odd part */
-
- movq_r2m(mm3, *(dataptr+9)); //save y4
- paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5
-
- movq_m2r(tmp7, mm3); // load tmp7
- paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1
-
- paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6
- psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1
-
- movq_r2m(mm0, *(dataptr+5)); //save y2
- paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7
-
- /* stage 4 */
-
- movq_r2m(mm7, *(dataptr+13)); //save y6
- movq_r2r(mm4, mm1); // copy tmp10
-
- psubw_r2r(mm2, mm1); // tmp10 - tmp12
- psllw_i2r(2, mm4); // shift tmp10
-
- movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6
- psllw_i2r(2, mm1); // shift (tmp10-tmp12)
-
- pmulhw_m2r(RTjpeg_C6, mm1); // z5
- psllw_i2r(2, mm5); // prepare for multiply
-
- pmulhw_r2r(mm0, mm4); // multiply by converted real
-
- /* stage 5 */
-
- pmulhw_m2r(RTjpeg_C4, mm5); // z3
- psllw_i2r(2, mm2); // prepare for multiply
-
- pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply
- movq_r2r(mm3, mm0); // copy tmp7
-
- movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7
- paddw_r2r(mm1, mm4); // z2
-
- paddw_r2r(mm5, mm0); // z11
- psubw_r2r(mm5, mm3); // z13
-
- /* stage 6 */
-
- movq_r2r(mm3, mm5); // copy z13
- paddw_r2r(mm1, mm2); // z4
-
- movq_r2r(mm0, mm6); // copy z11
- psubw_r2r(mm4, mm5); // y3
-
- paddw_r2r(mm2, mm6); // y1
- paddw_r2r(mm4, mm3); // y5
-
- movq_r2m(mm5, *(dataptr+7)); //save y3
-
- movq_r2m(mm6, *(dataptr+3)); //save y1
- psubw_r2r(mm2, mm0); // y7
-
-/************************************************************************************************
- Start of Transpose
-************************************************************************************************/
-
- movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2
- movq_r2r(mm7, mm5); // copy first line
-
- punpcklwd_r2r(mm3, mm7); // m11:m01|m10:m00 - interleave first and second lines
- movq_r2r(mm6, mm2); // copy third line
-
- punpcklwd_r2r(mm0, mm6); // m31:m21|m30:m20 - interleave third and fourth lines
- movq_r2r(mm7, mm1); // copy first intermediate result
-
- punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1
-
- punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2
-
- movq_r2m(mm7, *(dataptr+9)); // write result 1
- punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines
-
- movq_r2m(mm1, *(dataptr+11)); // write result 2
- punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines
-
- movq_r2r(mm5, mm1); // copy first intermediate result
- punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3
-
- movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4
- punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4
-
- movq_r2m(mm5, *(dataptr+13)); // write result 3
-
- /****** last 4x4 done */
-
- movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4
-
- movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line
- movq_r2r(mm0, mm6); // copy first line
-
- punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines
- movq_r2r(mm2, mm7); // copy third line
-
- punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines
- movq_r2r(mm0, mm4); // copy first intermediate result
-
-
-
- movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line
- punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result
-
- movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line
- punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result
-
- punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines
- movq_r2r(mm1, mm2); // copy first line
-
- punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines
- movq_r2r(mm6, mm5); // copy first intermediate result
-
- movq_r2m(mm0, *(dataptr+8)); // write result 1
- punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result
-
- punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines
- movq_r2r(mm3, mm0); // copy third line
-
- punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines
-
- movq_r2m(mm4, *(dataptr+10)); // write result 2 out
- pu