From 6e695dc64f0310745a32e2399a955fbf1368cd1a Mon Sep 17 00:00:00 2001 From: bircoph Date: Wed, 13 May 2009 15:22:13 +0000 Subject: Remove all kind of trailing whitespaces from all MPlayer's files. This affects all kind of spaces (' ',^I,^M,^L,...): actually [:space:] regex character set. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@29306 b3059339-0415-0410-9bf9-f77b7e298cf2 --- liba52/bitstream.c | 2 +- liba52/bitstream.h | 8 +- liba52/crc.c | 12 +- liba52/downmix.c | 178 ++++++++--------- liba52/imdct.c | 160 ++++++++-------- liba52/imdct_3dnow.h | 84 ++++---- liba52/liba52_changes.diff | 464 ++++++++++++++++++++++----------------------- liba52/parse.c | 16 +- liba52/srfftp.h | 20 +- liba52/srfftp_3dnow.h | 6 +- liba52/test.c | 8 +- 11 files changed, 479 insertions(+), 479 deletions(-) (limited to 'liba52') diff --git a/liba52/bitstream.c b/liba52/bitstream.c index a46ccced6b..7307527194 100644 --- a/liba52/bitstream.c +++ b/liba52/bitstream.c @@ -99,7 +99,7 @@ int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits) if (num_bits != 0) result = (result << num_bits) | (state->current_word >> (32 - num_bits)); - + state->bits_left = 32 - num_bits; return result; diff --git a/liba52/bitstream.h b/liba52/bitstream.h index 8500212c7e..e894f16781 100644 --- a/liba52/bitstream.h +++ b/liba52/bitstream.h @@ -104,11 +104,11 @@ static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) result<<= (indx&0x07); result>>= 32 - num_bits; indx+= num_bits; - + return result; #else uint32_t result; - + if (num_bits < state->bits_left) { result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits); state->bits_left -= num_bits; @@ -136,11 +136,11 @@ static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) result<<= (indx&0x07); result>>= 32 - num_bits; indx+= num_bits; - + return result; #else int32_t result; - + if (num_bits < state->bits_left) { result = (((int32_t)state->current_word) << (32 - state->bits_left)) >> (32 - num_bits); state->bits_left -= num_bits; diff --git a/liba52/crc.c b/liba52/crc.c index d19a4a2e11..aa0a19c005 100644 --- a/liba52/crc.c +++ b/liba52/crc.c @@ -1,23 +1,23 @@ -/* +/* * crc.c * * Copyright (C) Aaron Holtzman - May 1999 * * This file is part of ac3dec, a free Dolby AC-3 stream decoder. - * + * * ac3dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. - * + * * ac3dec is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * */ @@ -25,7 +25,7 @@ #include #include -static const uint16_t crc_lut[256] = +static const uint16_t crc_lut[256] = { 0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011, 0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022, diff --git a/liba52/downmix.c b/liba52/downmix.c index c44317fd6f..212c87d921 100644 --- a/liba52/downmix.c +++ b/liba52/downmix.c @@ -62,7 +62,7 @@ void downmix_accel_init(uint32_t mm_accel) if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; #endif } - + int a52_downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev) { @@ -695,10 +695,10 @@ static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" - "addps (%1, %%"REG_S"), %%xmm0 \n\t" - "addps 16(%1, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "addps (%1, %%"REG_S"), %%xmm0 \n\t" + "addps 16(%1, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" "addps %%xmm7, %%xmm1 \n\t" "movaps %%xmm0, (%1, %%"REG_S") \n\t" @@ -718,9 +718,9 @@ static void mix3to1_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" - "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm1, %%xmm0 \n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -739,10 +739,10 @@ static void mix4to1_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" - "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" "addps %%xmm1, %%xmm0 \n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -761,12 +761,12 @@ static void mix5to1_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" - "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" - "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm1, %%xmm0 \n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" @@ -784,9 +784,9 @@ static void mix3to2_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" //common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -807,9 +807,9 @@ static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" //common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps (%1, %%"REG_S"), %%xmm2 \n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -831,7 +831,7 @@ static void mix21toS_SSE (sample_t * samples, sample_t bias) ASMALIGN(4) "1: \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm7, %%xmm2 \n\t" @@ -854,10 +854,10 @@ static void mix31to2_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" // common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -878,10 +878,10 @@ static void mix31toS_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround "addps %%xmm7, %%xmm0 \n\t" // common - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm0, %%xmm1 \n\t" "addps %%xmm0, %%xmm2 \n\t" @@ -904,9 +904,9 @@ static void mix22toS_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm1 \n\t" "addps %%xmm7, %%xmm2 \n\t" @@ -929,13 +929,13 @@ static void mix32to2_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps %%xmm7, %%xmm0 \n\t" // common "movaps %%xmm0, %%xmm1 \n\t" // common - "addps (%0, %%"REG_S"), %%xmm0 \n\t" - "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" - "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" - "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "addps (%0, %%"REG_S"), %%xmm0 \n\t" + "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" "movaps %%xmm0, (%0, %%"REG_S") \n\t" "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" "add $16, %%"REG_S" \n\t" @@ -953,16 +953,16 @@ static void mix32toS_SSE (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" - "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" "addps %%xmm7, %%xmm0 \n\t" // common - "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround - "movaps (%0, %%"REG_S"), %%xmm1 \n\t" - "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" - "subps %%xmm2, %%xmm1 \n\t" - "addps %%xmm2, %%xmm3 \n\t" - "addps %%xmm0, %%xmm1 \n\t" - "addps %%xmm0, %%xmm3 \n\t" + "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" + "subps %%xmm2, %%xmm1 \n\t" + "addps %%xmm2, %%xmm3 \n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm3 \n\t" "movaps %%xmm1, (%0, %%"REG_S") \n\t" "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" "add $16, %%"REG_S" \n\t" @@ -980,8 +980,8 @@ static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movaps (%0, %%"REG_S"), %%xmm0 \n\t" - "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" "addps %%xmm7, %%xmm0 \n\t" @@ -1229,13 +1229,13 @@ static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" "movq 24(%0, %%"REG_S"), %%mm3 \n\t" - "pfadd (%1, %%"REG_S"), %%mm0 \n\t" + "pfadd (%1, %%"REG_S"), %%mm0 \n\t" "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" - "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" + "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" @@ -1260,11 +1260,11 @@ static void mix3to1_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" "movq 1032(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" @@ -1287,13 +1287,13 @@ static void mix4to1_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" "movq 1032(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" @@ -1316,17 +1316,17 @@ static void mix5to1_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" "movq 1032(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm7, %%mm0 \n\t" "pfadd %%mm7, %%mm1 \n\t" - "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" "pfadd %%mm2, %%mm0 \n\t" "pfadd %%mm3, %%mm1 \n\t" @@ -1347,11 +1347,11 @@ static void mix3to2_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" //common "pfadd %%mm7, %%mm1 \n\t" //common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 2048(%0, %%"REG_S"), %%mm4\n\t" "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1378,11 +1378,11 @@ static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" "movq 1032(%1, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" //common "pfadd %%mm7, %%mm1 \n\t" //common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq (%1, %%"REG_S"), %%mm4 \n\t" "movq 8(%1, %%"REG_S"), %%mm5 \n\t" @@ -1411,7 +1411,7 @@ static void mix21toS_3dnow (sample_t * samples, sample_t bias) "1: \n\t" "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 1024(%0, %%"REG_S"), %%mm4\n\t" "movq 1032(%0, %%"REG_S"), %%mm5\n\t" @@ -1442,13 +1442,13 @@ static void mix31to2_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 2048(%0, %%"REG_S"), %%mm4\n\t" "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1475,11 +1475,11 @@ static void mix31toS_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 2048(%0, %%"REG_S"), %%mm4\n\t" "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1512,11 +1512,11 @@ static void mix22toS_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 2048(%0, %%"REG_S"), %%mm0\n\t" + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" "movq 2056(%0, %%"REG_S"), %%mm1\n\t" "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" "movq 1024(%0, %%"REG_S"), %%mm4\n\t" "movq 1032(%0, %%"REG_S"), %%mm5\n\t" @@ -1547,19 +1547,19 @@ static void mix32to2_3dnow (sample_t * samples, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common "movq %%mm0, %%mm2 \n\t" // common "movq %%mm1, %%mm3 \n\t" // common - "pfadd (%0, %%"REG_S"), %%mm0 \n\t" + "pfadd (%0, %%"REG_S"), %%mm0 \n\t" "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" - "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" - "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" - "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" "movq %%mm0, (%0, %%"REG_S") \n\t" "movq %%mm1, 8(%0, %%"REG_S") \n\t" @@ -1581,25 +1581,25 @@ static void mix32toS_3dnow (sample_t * samples, sample_t bias) "1: \n\t" "movd %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t" - "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" "movq 1032(%0, %%"REG_S"), %%mm1\n\t" - "movq 3072(%0, %%"REG_S"), %%mm4\n\t" + "movq 3072(%0, %%"REG_S"), %%mm4\n\t" "movq 3080(%0, %%"REG_S"), %%mm5\n\t" "pfadd %%mm7, %%mm0 \n\t" // common "pfadd %%mm7, %%mm1 \n\t" // common - "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround + "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround - "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq (%0, %%"REG_S"), %%mm2 \n\t" "movq 8(%0, %%"REG_S"), %%mm3 \n\t" - "movq 2048(%0, %%"REG_S"), %%mm6\n\t" + "movq 2048(%0, %%"REG_S"), %%mm6\n\t" "movq 2056(%0, %%"REG_S"), %%mm7\n\t" - "pfsub %%mm4, %%mm2 \n\t" + "pfsub %%mm4, %%mm2 \n\t" "pfsub %%mm5, %%mm3 \n\t" - "pfadd %%mm4, %%mm6 \n\t" + "pfadd %%mm4, %%mm6 \n\t" "pfadd %%mm5, %%mm7 \n\t" - "pfadd %%mm0, %%mm2 \n\t" + "pfadd %%mm0, %%mm2 \n\t" "pfadd %%mm1, %%mm3 \n\t" - "pfadd %%mm0, %%mm6 \n\t" + "pfadd %%mm0, %%mm6 \n\t" "pfadd %%mm1, %%mm7 \n\t" "movq %%mm2, (%0, %%"REG_S") \n\t" "movq %%mm3, 8(%0, %%"REG_S") \n\t" @@ -1620,9 +1620,9 @@ static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) "mov $-1024, %%"REG_S" \n\t" ASMALIGN(4) "1: \n\t" - "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq (%0, %%"REG_S"), %%mm0 \n\t" "movq 8(%0, %%"REG_S"), %%mm1 \n\t" - "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" "movq 24(%0, %%"REG_S"), %%mm3 \n\t" "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" diff --git a/liba52/imdct.c b/liba52/imdct.c index b813345537..089fa0acae 100644 --- a/liba52/imdct.c +++ b/liba52/imdct.c @@ -72,24 +72,24 @@ static const int pm128[128] attribute_used __attribute__((aligned(16))) = 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 -}; +}; static uint8_t attribute_used bit_reverse_512[] = { - 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, - 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, - 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, - 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, - 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, - 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, - 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, - 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, - 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, - 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, - 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, - 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, - 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, - 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, - 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, + 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, + 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, + 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, + 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, + 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, + 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, + 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, + 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, + 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, + 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, + 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, + 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, + 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, + 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, + 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f}; static uint8_t fftorder[] = { @@ -120,8 +120,8 @@ static sample_t __attribute__((aligned(16))) xcos1[128]; static sample_t __attribute__((aligned(16))) xsin1[128]; #if ARCH_X86 || ARCH_X86_64 -// NOTE: SSE needs 16byte alignment or it will segfault -// +// NOTE: SSE needs 16byte alignment or it will segfault +// static float __attribute__((aligned(16))) sseSinCos1c[256]; static float __attribute__((aligned(16))) sseSinCos1d[256]; static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1}; @@ -328,7 +328,7 @@ void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; const sample_t * window = a52_imdct_window; complex_t buf[128]; - + for (i = 0; i < 128; i++) { k = fftorder[i]; t_r = pre1[i].real; @@ -417,17 +417,17 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) sample_t *data_ptr; sample_t *delay_ptr; sample_t *window_ptr; - + /* 512 IMDCT with source and dest data in 'data' */ - + /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ for( i=0; i < 128; i++) { - /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ + /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ int j= bit_reverse_512[i]; buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); } - + /* 1. iteration */ for(i = 0; i < 128; i += 2) { #if 0 @@ -440,7 +440,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) buf[i+1].real = tmp_a_r - tmp_b_r; buf[i+1].imag = tmp_a_i - tmp_b_i; #else - vector float temp, bufv; + vector float temp, bufv; bufv = vec_ld(i << 3, (float*)buf); temp = vec_perm(bufv, bufv, vcprm(2,3,0,1)); @@ -448,7 +448,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) vec_st(bufv, i << 3, (float*)buf); #endif } - + /* 2. iteration */ // Note w[1]={{1,0}, {0,-1}} for(i = 0; i < 128; i += 4) { @@ -472,7 +472,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) buf[i+3].imag = tmp_a_i + tmp_b_i; #else vector float buf01, buf23, temp1, temp2; - + buf01 = vec_ld((i + 0) << 3, (float*)buf); buf23 = vec_ld((i + 2) << 3, (float*)buf); buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2)); @@ -540,14 +540,14 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) buf45 = vec_ld((i + 4) << 3, (float*)buf); buf67 = vec_ld((i + 6) << 3, (float*)buf); buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3)); - + vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf); vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf); vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf); vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf); #endif } - + /* 4-7. iterations */ for (m=3; m < 7; m++) { two_m = (1 << m); @@ -600,10 +600,10 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) vecq = vec_madd(temp4, vcii(n,p,n,p), temp3); // then butterfly with buf[p] and buf[p+1] vecp = vec_ld(p << 3, (float*)buf); - + temp1 = vec_add(vecp, vecq); temp2 = vec_sub(vecp, vecq); - + vec_st(temp1, p << 3, (float*)buf); vec_st(temp2, q << 3, (float*)buf); #endif @@ -660,7 +660,7 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1)); temp2 = vec_madd(temp1133, tempCS01, vczero); bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1); - + vec_st(bufv_0, (i + 0) << 3, (float*)buf); /* idem with bufv_2 and high-order cosv/sinv */ @@ -674,36 +674,36 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1); vec_st(bufv_2, (i + 2) << 3, (float*)buf); - + #endif } - + data_ptr = data; delay_ptr = delay; window_ptr = a52_imdct_window; /* Window and convert to real valued signal */ - for(i=0; i< 64; i++) { - *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; - *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; + *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; } - - for(i=0; i< 64; i++) { - *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; - *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; + + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; + *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; } - + /* The trailing edge of the window goes into the delay line */ delay_ptr = delay; - for(i=0; i< 64; i++) { - *delay_ptr++ = -buf[64+i].real * *--window_ptr; - *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf[64+i].real * *--window_ptr; + *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; } - + for(i=0; i<64; i++) { - *delay_ptr++ = buf[i].imag * *--window_ptr; - *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; + *delay_ptr++ = buf[i].imag * *--window_ptr; + *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; } } #endif @@ -716,8 +716,8 @@ imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) #define HAVE_AMD3DNOW 1 #include "srfftp_3dnow.h" -const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; -const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; +const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; +const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 }; #undef HAVE_AMD3DNOWEXT @@ -746,9 +746,9 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) sample_t *data_ptr; sample_t *delay_ptr; sample_t *window_ptr; - + /* 512 IMDCT with source and dest data in 'data' */ - /* see the c version (dct_do_512()), its allmost identical, just in C */ + /* see the c version (dct_do_512()), its allmost identical, just in C */ /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ /* Bit reversed shuffling */ @@ -809,7 +809,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) } } */ - + /* 1. iteration */ // Note w[0][0]={1,0} __asm__ volatile( @@ -831,7 +831,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) :: "g" (buf), "r" (buf + 128) : "%"REG_S ); - + /* 2. iteration */ // Note w[1]={{1,0}, {0,-1}} __asm__ volatile( @@ -863,8 +863,8 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) */ __asm__ volatile( - "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" - "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" + "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" + "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" "xorps %%xmm5, %%xmm5 \n\t" "xorps %%xmm2, %%xmm2 \n\t" "mov %0, %%"REG_S" \n\t" @@ -890,10 +890,10 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "addps %%xmm1, %%xmm3 \n\t" "subps %%xmm4, %%xmm0 \n\t" "subps %%xmm5, %%xmm1 \n\t" - "movaps %%xmm2, (%%"REG_S") \n\t" - "movaps %%xmm3, 16(%%"REG_S") \n\t" - "movaps %%xmm0, 32(%%"REG_S") \n\t" - "movaps %%xmm1, 48(%%"REG_S") \n\t" + "movaps %%xmm2, (%%"REG_S") \n\t" + "movaps %%xmm3, 16(%%"REG_S") \n\t" + "movaps %%xmm0, 32(%%"REG_S") \n\t" + "movaps %%xmm1, 48(%%"REG_S") \n\t" "add $64, %%"REG_S" \n\t" "cmp %1, %%"REG_S" \n\t" " jb 1b \n\t" @@ -927,7 +927,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" "add $16, %%"REG_D" \n\t" - "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 "jb 2b \n\t" "add %2, %%"REG_S" \n\t" "cmp %1, %%"REG_S" \n\t" @@ -954,9 +954,9 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) " jnz 1b \n\t" :: "r" (buf+128) : "%"REG_S - ); + ); + - data_ptr = data; delay_ptr = delay; window_ptr = a52_imdct_window; @@ -980,7 +980,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias) : "%"REG_S, "%"REG_D @@ -988,7 +988,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) data_ptr+=128; delay_ptr+=128; // window_ptr+=128; - + __asm__ volatile( "mov $1024, %%"REG_D" \n\t" // 512 "xor %%"REG_S", %%"REG_S" \n\t" // 0 @@ -1007,7 +1007,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias) : "%"REG_S, "%"REG_D @@ -1025,21 +1025,21 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "1: \n\t" "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C - "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C - "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf+64), "r" (delay_ptr) : "%"REG_S, "%"REG_D ); delay_ptr+=128; // window_ptr-=128; - + __asm__ volatile( "mov $1024, %%"REG_D" \n\t" // 1024 "xor %%"REG_S", %%"REG_S" \n\t" // 0 @@ -1047,14 +1047,14 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) "1: \n\t" "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? - "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? - "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" "movaps %%xmm0, (%1, %%"REG_S") \n\t" "add $16, %%"REG_S" \n\t" "sub $16, %%"REG_D" \n\t" - "cmp $512, %%"REG_S" \n\t" + "cmp $512, %%"REG_S" \n\t" " jb 1b \n\t" :: "r" (buf), "r" (delay_ptr) : "%"REG_S, "%"REG_D @@ -1088,7 +1088,7 @@ void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) /* Post IFFT complex multiply */ /* Window and convert to real valued signal */ for (i = 0; i < 32; i++) { - /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ + /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ t_r = post2[i].real; t_i = post2[i].imag; @@ -1209,12 +1209,12 @@ void a52_imdct_init (uint32_t mm_accel) sseSinCos1c[2*i+0]= xcos1[i]; sseSinCos1c[2*i+1]= -xcos1[i]; sseSinCos1d[2*i+0]= xsin1[i]; - sseSinCos1d[2*i+1]= xsin1[i]; + sseSinCos1d[2*i+1]= xsin1[i]; } for (i = 1; i < 7; i++) { j = 1 << i; for (k = 0; k < j; k+=2) { - + sseW[i][4*k + 0] = w[i][k+0].real; sseW[i][4*k + 1] = w[i][k+0].real; sseW[i][4*k + 2] = w[i][k+1].real; @@ -1223,15 +1223,15 @@ void a52_imdct_init (uint32_t mm_accel) sseW[i][4*k + 4] = -w[i][k+0].imag; sseW[i][4*k + 5] = w[i][k+0].imag; sseW[i][4*k + 6] = -w[i][k+1].imag; - sseW[i][4*k + 7] = w[i][k+1].imag; - + sseW[i][4*k + 7] = w[i][k+1].imag; + //we multiply more or less uninitalized numbers so we need to use exactly 0.0 if(k==0) { // sseW[i][4*k + 0]= sseW[i][4*k + 1]= 1.0; sseW[i][4*k + 4]= sseW[i][4*k + 5]= 0.0; } - + if(2*k == j) { sseW[i][4*k + 0]= sseW[i][4*k + 1]= 0.0; @@ -1243,9 +1243,9 @@ void a52_imdct_init (uint32_t mm_accel) for(i=0; i<128; i++) { sseWindow[2*i+0]= -a52_imdct_window[2*i+0]; - sseWindow[2*i+1]= a52_imdct_window[2*i+1]; + sseWindow[2*i+1]= a52_imdct_window[2*i+1]; } - + for(i=0; i<64; i++) { sseWindow[256 + 2*i+0]= -a52_imdct_window[254 - 2*i+1]; diff --git a/liba52/imdct_3dnow.h b/liba52/imdct_3dnow.h index 1c13f06870..e8a91d11a4 100644 --- a/liba52/imdct_3dnow.h +++ b/liba52/imdct_3dnow.h @@ -43,7 +43,7 @@ static void FFT_4_3DNOW(complex_t *x) { /* delta_p = 1 here */ - /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} + /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} */ __asm__ volatile( "movq 24(%1), %%mm3\n\t" @@ -86,10 +86,10 @@ static void FFT_4_3DNOW(complex_t *x) static void FFT_8_3DNOW(complex_t *x) { /* delta_p = diag{1, sqrt(i)} here */ - /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8} + /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8} */ complex_t wT1, wB1, wB2; - + __asm__ volatile( "movq 8(%2), %%mm0\n\t" "movq 24(%2), %%mm1\n\t" @@ -111,9 +111,9 @@ static void FFT_8_3DNOW(complex_t *x) :"memory"); fft_4_3dnow(&x[0]); - + /* x[0] x[4] x[2] x[6] */ - + __asm__ volatile( "movq 40(%1), %%mm0\n\t" "movq %%mm0, %%mm3\n\t" @@ -151,7 +151,7 @@ static void FFT_8_3DNOW(complex_t *x) :"=r"(x) :"0"(x), "r"(&wT1), "r"(&wB1) :"memory"); - + /* x[1] x[5] */ __asm__ volatile ( "movq %6, %%mm6\n\t" @@ -180,7 +180,7 @@ static void FFT_8_3DNOW(complex_t *x) "pxor %%mm6, %%mm1\n\t" "pfacc %%mm1, %%mm0\n\t" "pfmul %4, %%mm0\n\t" - + "movq 40(%3), %%mm5\n\t" #if HAVE_AMD3DNOWEXT "pswapd %%mm5, %%mm5\n\t" @@ -189,7 +189,7 @@ static void FFT_8_3DNOW(complex_t *x) "punpckhdq %%mm1, %%mm5\n\t" #endif "movq %%mm5, %0\n\t" - + "movq 8(%3), %%mm1\n\t" "movq %%mm1, %%mm2\n\t" "pfsub %%mm0, %%mm1\n\t" @@ -197,7 +197,7 @@ static void FFT_8_3DNOW(complex_t *x) "movq %%mm1, 40(%3)\n\t" "movq %%mm2, 8(%3)\n\t" :"=m"(wB2) - :"m"(wT1), "m"(wB1), "r"(x), "m"(HSQRT2_3DNOW), + :"m"(wT1), "m"(wB1), "r"(x), "m"(HSQRT2_3DNOW), "m"(x_plus_minus_3dnow), "m"(x_minus_plus_3dnow) :"memory"); @@ -212,7 +212,7 @@ static void FFT_8_3DNOW(complex_t *x) "punpckldq %%mm1, %%mm2\n\t" "punpckhdq %%mm2, %%mm1\n\t" #endif - "pxor %%mm6, %%mm1\n\t" + "pxor %%mm6, %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movq %2, %%mm2\n\t" "movq 56(%4), %%mm3\n\t" @@ -253,10 +253,10 @@ static void FFT_ASMB_3DNOW(int k, complex_t *x, complex_t *wTB, x3k = x2k + 2 * k; x4k = x3k + 2 * k; wB = wTB + 2 * k; - + TRANSZERO_3DNOW(x[0],x2k[0],x3k[0],x4k[0]); TRANS_3DNOW(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]); - + --k; for(;;) { TRANS_3DNOW(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]); @@ -271,7 +271,7 @@ static void FFT_ASMB_3DNOW(int k, complex_t *x, complex_t *wTB, wTB += 2; wB += 2; } - + } void FFT_ASMB16_3DNOW(complex_t *x, complex_t *wTB) @@ -291,13 +291,13 @@ void FFT_ASMB16_3DNOW(complex_t *x, complex_t *wTB) /* transform x[3], x[11], x[7], x[15] */ TRANS_3DNOW(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]); -} +} static void FFT_128P_3DNOW(complex_t *a) { FFT_8_3DNOW(&a[0]); FFT_4_3DNOW(&a[8]); FFT_4_3DNOW(&a[12]); FFT_ASMB16_3DNOW(&a[0], &a[8]); - + FFT_8_3DNOW(&a[16]), FFT_8_3DNOW(&a[24]); FFT_ASMB_3DNOW(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); @@ -314,7 +314,7 @@ static void FFT_128P_3DNOW(complex_t *a) FFT_ASMB16_3DNOW(&a[64], &a[72]); FFT_8_3DNOW(&a[80]); FFT_8_3DNOW(&a[88]); - + /* FFT_32(&a[64]); */ FFT_ASMB_3DNOW(4, &a[64], &a[80],&delta32[0], &delta32_3[0]); @@ -325,7 +325,7 @@ static void FFT_128P_3DNOW(complex_t *a) FFT_8_3DNOW(&a[112]), FFT_8_3DNOW(&a[120]); /* FFT_32(&a[96]); */ FFT_ASMB_3DNOW(4, &a[96], &a[112], &delta32[0], &delta32_3[0]); - + /* FFT_128(&a[0]); */ FFT_ASMB_3DNOW(16, &a[0], &a[64], &delta128[0], &delta128_3[0]); } @@ -353,9 +353,9 @@ imdct_do_512_3dnow sample_t *data_ptr; sample_t *delay_ptr; sample_t *window_ptr; - + /* 512 IMDCT with source and dest data in 'data' */ - + /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ #if 1 __asm__ volatile ( @@ -396,7 +396,7 @@ imdct_do_512_3dnow #else __asm__ volatile ("femms":::"memory"); for( i=0; i < 128; i++) { - /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ + /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ int j= pm128[i]; buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); @@ -432,9 +432,9 @@ imdct_do_512_3dnow FFT_128P_3DNOW (&buf[0]); // __asm__ volatile ("femms \n\t":::"memory"); - + /* Post IFFT complex multiply plus IFFT complex conjugate*/ -#if 1 +#if 1 __asm__ volatile ( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" @@ -448,9 +448,9 @@ imdct_do_512_3dnow #if !HAVE_AMD3DNOWEXT "punpckldq %%mm1, %%mm2\n\t" "punpckhdq %%mm2, %%mm1\n\t" -#else +#else "pswapd %%mm1, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */ -#endif +#endif "movd %3, %%mm3\n\t" /* ac3_xsin[i] */ "punpckldq %2, %%mm3\n\t" /* ac3_xsin[i] | ac3_xcos[i] */ "pfmul %%mm3, %%mm0\n\t" @@ -472,7 +472,7 @@ imdct_do_512_3dnow /* ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i]) + (tmp_a_i * ac3_xsin1[i]); ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i]) - (tmp_a_i * ac3_xcos1[i]);*/ } -#else +#else __asm__ volatile ("femms":::"memory"); for( i=0; i < 128; i++) { /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ @@ -482,7 +482,7 @@ imdct_do_512_3dnow buf[i].imag =(tmp_a_r * xsin1[i]) + (tmp_a_i * xcos1[i]); } #endif - + data_ptr = data; delay_ptr = delay; window_ptr = a52_imdct_window; @@ -519,16 +519,16 @@ imdct_do_512_3dnow delay_ptr += 2; } window_ptr += 128; -#else +#else __asm__ volatile ("femms":::"memory"); - for(i=0; i< 64; i++) { - *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; - *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; + *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; } - - for(i=0; i< 64; i++) { - *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; - *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; + + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; + *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; } #endif @@ -566,16 +566,16 @@ imdct_do_512_3dnow delay_ptr += 2; } __asm__ volatile ("femms":::"memory"); -#else +#else __asm__ volatile ("femms":::"memory"); - for(i=0; i< 64; i++) { - *delay_ptr++ = -buf[64+i].real * *--window_ptr; - *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf[64+i].real * *--window_ptr; + *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; } - + for(i=0; i<64; i++) { - *delay_ptr++ = buf[i].imag * *--window_ptr; - *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; + *delay_ptr++ = buf[i].imag * *--window_ptr; + *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; } -#endif +#endif } diff --git a/liba52/liba52_changes.diff b/liba52/liba52_changes.diff index f00bb444d2..1b3574f043 100644 --- a/liba52/liba52_changes.diff +++ b/liba52/liba52_changes.diff @@ -3,7 +3,7 @@ @@ -59,4 +66,9 @@ int a52_block (a52_state_t * state); void a52_free (a52_state_t * state); - + +void* a52_resample_init(uint32_t mm_accel,int flags,int chans); +extern int (* a52_resample) (float * _f, int16_t * s16); + @@ -15,7 +15,7 @@ @@ -103,18 +107,34 @@ #define DELTA_BIT_NONE (2) #define DELTA_BIT_RESERVED (3) - + +#if ARCH_X86_64 +# define REG_a "rax" +# define REG_d "rdx" @@ -33,7 +33,7 @@ void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, int start, int end, int fastleak, int slowleak, expbap_t * expbap); - + int a52_downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev); +void downmix_accel_init(uint32_t mm_accel); @@ -44,7 +44,7 @@ sample_t clev, sample_t slev); -void a52_upmix (sample_t * samples, int acmod, int output); +extern void (*a52_upmix) (sample_t * samples, int acmod, int output); - + void a52_imdct_init (uint32_t mm_accel); void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias); @@ -53,9 +53,9 @@ --- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200 +++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200 @@ -31,6 +35,10 @@ - + #define BUFFER_SIZE 4096 - + +#ifdef ALT_BITSTREAM_READER +int indx=0; +#endif @@ -72,13 +72,13 @@ +#endif bitstream_get (state, align * 8); } - + --- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200 +++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200 @@ -21,6 +25,42 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - + +/* code from ffmpeg/libavcodec */ +#if defined(__sparc__) || defined(hpux) +/* @@ -117,20 +117,20 @@ + /* (stolen from the kernel) */ #ifdef WORDS_BIGENDIAN - + @@ -28,7 +68,7 @@ - + #else - + -# if 0 && defined (__i386__) +# if defined (__i386__) - + # define swab32(x) __i386_swab32(x) static inline const uint32_t __i386_swab32(uint32_t x) @@ -39,19 +79,34 @@ - + # else - + -# define swab32(x)\ -((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \ - (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])) @@ -143,7 +143,7 @@ + } # endif #endif - + +#ifdef ALT_BITSTREAM_READER +extern int indx; +#endif @@ -151,7 +151,7 @@ void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf); uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits); int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits); - + static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) { +#ifdef ALT_BITSTREAM_READER @@ -160,15 +160,15 @@ + result<<= (indx&0x07); + result>>= 32 - num_bits; + indx+= num_bits; -+ ++ + return result; +#else uint32_t result; - + if (num_bits < state->bits_left) { @@ -61,10 +116,29 @@ } - + return a52_bitstream_get_bh (state, num_bits); +#endif +} @@ -181,7 +181,7 @@ + bitstream_get(state, num_bits); +#endif } - + static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) { +#ifdef ALT_BITSTREAM_READER @@ -190,15 +190,15 @@ + result<<= (indx&0x07); + result>>= 32 - num_bits; + indx+= num_bits; -+ ++ + return result; +#else int32_t result; - + if (num_bits < state->bits_left) { @@ -74,4 +148,5 @@ } - + return a52_bitstream_get_bh_2 (state, num_bits); +#endif } @@ -211,18 +211,18 @@ + * + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) */ - + #include "config.h" - + #include #include - + #include "a52.h" #include "a52_internal.h" +#include "mm_accel.h" - + #define CONVERT(acmod,output) (((output) << 3) + (acmod)) - + + +void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev)= NULL; @@ -247,14 +247,14 @@ + if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; +#endif +} -+ ++ int a52_downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev) { @@ -447,7 +479,7 @@ samples[i] = 0; } - + -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, +void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, sample_t clev, sample_t slev) @@ -262,28 +262,28 @@ switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { @@ -559,7 +591,7 @@ break; - + case CONVERT (A52_3F2R, A52_2F1R): - mix3to2 (samples, bias); + mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) move2to1 (samples + 768, samples + 512, bias); break; - + @@ -583,12 +615,12 @@ break; - + case CONVERT (A52_3F1R, A52_3F2R): - memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); break; } } - + -void a52_upmix (sample_t * samples, int acmod, int output) +void upmix_C (sample_t * samples, int acmod, int output) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { - + @@ -653,3 +685,1104 @@ goto mix_31to21; } @@ -298,10 +298,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps (%1, %%"REG_S"), %%xmm0 \n\t" -+ "addps 16(%1, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps (%1, %%"REG_S"), %%xmm0 \n\t" ++ "addps 16(%1, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" + "addps %%xmm7, %%xmm1 \n\t" + "movaps %%xmm0, (%1, %%"REG_S") \n\t" @@ -321,9 +321,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm1, %%xmm0 \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -342,10 +342,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" + "addps %%xmm1, %%xmm0 \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" @@ -364,12 +364,12 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" -+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm1, %%xmm0 \n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "add $16, %%"REG_S" \n\t" @@ -387,9 +387,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" //common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -410,9 +410,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" //common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps (%1, %%"REG_S"), %%xmm2 \n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -434,7 +434,7 @@ + ASMALIGN(4) + "1: \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm7, %%xmm2 \n\t" @@ -457,10 +457,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -481,10 +481,10 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround + "addps %%xmm7, %%xmm0 \n\t" // common -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" @@ -507,9 +507,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm7, %%xmm2 \n\t" @@ -532,13 +532,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common + "movaps %%xmm0, %%xmm1 \n\t" // common -+ "addps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" -+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" -+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "movaps %%xmm0, (%0, %%"REG_S") \n\t" + "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" @@ -556,16 +556,16 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" -+ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common -+ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround -+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" -+ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" -+ "subps %%xmm2, %%xmm1 \n\t" -+ "addps %%xmm2, %%xmm3 \n\t" -+ "addps %%xmm0, %%xmm1 \n\t" -+ "addps %%xmm0, %%xmm3 \n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" ++ "subps %%xmm2, %%xmm1 \n\t" ++ "addps %%xmm2, %%xmm3 \n\t" ++ "addps %%xmm0, %%xmm1 \n\t" ++ "addps %%xmm0, %%xmm3 \n\t" + "movaps %%xmm1, (%0, %%"REG_S") \n\t" + "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" + "add $16, %%"REG_S" \n\t" @@ -583,8 +583,8 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" -+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" + "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" @@ -832,13 +832,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" -+ "pfadd (%1, %%"REG_S"), %%mm0 \n\t" ++ "pfadd (%1, %%"REG_S"), %%mm0 \n\t" + "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" -+ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" ++ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" + "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" @@ -863,11 +863,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" @@ -890,13 +890,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" @@ -919,17 +919,17 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" -+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm2, %%mm0 \n\t" + "pfadd %%mm3, %%mm1 \n\t" @@ -950,11 +950,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" //common + "pfadd %%mm7, %%mm1 \n\t" //common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -981,11 +981,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" + "movq 1032(%1, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" //common + "pfadd %%mm7, %%mm1 \n\t" //common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq (%1, %%"REG_S"), %%mm4 \n\t" + "movq 8(%1, %%"REG_S"), %%mm5 \n\t" @@ -1014,7 +1014,7 @@ + "1: \n\t" + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround + "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm4\n\t" + "movq 1032(%0, %%"REG_S"), %%mm5\n\t" @@ -1045,13 +1045,13 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1078,11 +1078,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" @@ -1115,11 +1115,11 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" + "movq 2056(%0, %%"REG_S"), %%mm1\n\t" + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" + "movq 1024(%0, %%"REG_S"), %%mm4\n\t" + "movq 1032(%0, %%"REG_S"), %%mm5\n\t" @@ -1150,19 +1150,19 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common + "movq %%mm0, %%mm2 \n\t" // common + "movq %%mm1, %%mm3 \n\t" // common -+ "pfadd (%0, %%"REG_S"), %%mm0 \n\t" ++ "pfadd (%0, %%"REG_S"), %%mm0 \n\t" + "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" -+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" -+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" + "movq %%mm0, (%0, %%"REG_S") \n\t" + "movq %%mm1, 8(%0, %%"REG_S") \n\t" @@ -1184,25 +1184,25 @@ + "1: \n\t" + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" -+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" -+ "movq 3072(%0, %%"REG_S"), %%mm4\n\t" ++ "movq 3072(%0, %%"REG_S"), %%mm4\n\t" + "movq 3080(%0, %%"REG_S"), %%mm5\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common -+ "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround ++ "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround + "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround -+ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" -+ "movq 2048(%0, %%"REG_S"), %%mm6\n\t" ++ "movq 2048(%0, %%"REG_S"), %%mm6\n\t" + "movq 2056(%0, %%"REG_S"), %%mm7\n\t" -+ "pfsub %%mm4, %%mm2 \n\t" ++ "pfsub %%mm4, %%mm2 \n\t" + "pfsub %%mm5, %%mm3 \n\t" -+ "pfadd %%mm4, %%mm6 \n\t" ++ "pfadd %%mm4, %%mm6 \n\t" + "pfadd %%mm5, %%mm7 \n\t" -+ "pfadd %%mm0, %%mm2 \n\t" ++ "pfadd %%mm0, %%mm2 \n\t" + "pfadd %%mm1, %%mm3 \n\t" -+ "pfadd %%mm0, %%mm6 \n\t" ++ "pfadd %%mm0, %%mm6 \n\t" + "pfadd %%mm1, %%mm7 \n\t" + "movq %%mm2, (%0, %%"REG_S") \n\t" + "movq %%mm3, 8(%0, %%"REG_S") \n\t" @@ -1223,9 +1223,9 @@ + "mov $-1024, %%"REG_S" \n\t" + ASMALIGN(4) + "1: \n\t" -+ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" -+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" + "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" + "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" @@ -1401,7 +1401,7 @@ + * michael did port them from libac3 (untested, perhaps totally broken) + * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org) */ - + #include "config.h" @@ -39,12 +48,50 @@ #include "a52.h" @@ -1415,12 +1415,12 @@ +#undef HAVE_AMD3DNOWEXT +#define HAVE_AMD3DNOWEXT 0 +#endif - + typedef struct complex_s { sample_t real; sample_t imag; } complex_t; - + +static const int pm128[128] attribute_used __attribute__((aligned(16))) = +{ + 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, @@ -1431,24 +1431,24 @@ + 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, + 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, + 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 -+}; ++}; + +static uint8_t attribute_used bit_reverse_512[] = { -+ 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, -+ 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, -+ 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, -+ 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, -+ 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, -+ 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, -+ 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, -+ 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, -+ 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, -+ 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, -+ 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, -+ 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d,