/*
* imdct.c
* Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org>
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
*
* This file is part of a52dec, a free ATSC A-52 stream decoder.
* See http://liba52.sourceforge.net/ for updates.
*
* a52dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* a52dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
* 3DNOW optimizations from Nick Kurshev <nickols_k@mail.ru>
* michael did port them from libac3 (untested, perhaps totally broken)
* AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org)
*/
#include "config.h"
#include <math.h>
#include <stdio.h>
#ifndef M_PI
#define M_PI 3.1415926535897932384626433832795029
#endif
#include <inttypes.h>
#include "a52.h"
#include "a52_internal.h"
#include "mm_accel.h"
#include "mangle.h"
#ifdef RUNTIME_CPUDETECT
#undef HAVE_3DNOWEX
#endif
#define USE_AC3_C
void (* imdct_256) (sample_t data[], sample_t delay[], sample_t bias);
void (* imdct_512) (sample_t data[], sample_t delay[], sample_t bias);
typedef struct complex_s {
sample_t real;
sample_t imag;
} complex_t;
static void fft_128p(complex_t *a);
static const int pm128[128] attribute_used __attribute__((aligned(16))) =
{
0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
};
/* 128 point bit-reverse LUT */
static uint8_t attribute_used bit_reverse_512[] = {
0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
static uint8_t bit_reverse_256[] = {
0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38,
0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c,
0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a,
0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e,
0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39,
0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d,
0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b,
0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f};
#ifdef ARCH_X86
// NOTE: SSE needs 16byte alignment or it will segfault
//
static complex_t __attribute__((aligned(16))) buf[128];
static float __attribute__((aligned(16))) sseSinCos1c[256];
static float __attribute__((aligned(16))) sseSinCos1d[256];
static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
//static float __attribute__((aligned(16))) sseW0[4];
static float __attribute__((aligned(16))) sseW1[8];
static float __attribute__((aligned(16))) sseW2[16];
static float __attribute__((aligned(16))) sseW3[32];
static float __attribute__((aligned(16))) sseW4[64];
static float __attribute__((aligned(16))) sseW5[128];
static float __attribute__((aligned(16))) sseW6[256];
static float __attribute__((aligned(16))) *sseW[7]=
{NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6};
static float __attribute__((aligned(16))) sseWindow[512];
#else
static complex_t __attribute__((aligned(16))) buf[128];
#endif
/* Twiddle factor LUT */
static complex_t __attribute__((aligned(16))) w_1[1];
static complex_t __attribute__((aligned(16))) w_2[2];
static complex_t __attribute__((aligned(1
|