/* * this code comes under GPL * This code was taken from http://www.mpg123.org * See ChangeLog of mpg123-0.59s-pre.1 for detail * Applied to mplayer by Nick Kurshev * * Local ChangeLog: * - Partial loops unrolling and removing MOVW insn from loops */ #include "config.h" #include "mangle.h" #define real float /* ugly - but only way */ extern short mp3lib_decwins[]; extern void (*dct64_MMX_func)(short*, short*, real*); static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL; static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL; unsigned long __attribute__((aligned(16))) costab_mmx[] = { 1056974725, 1057056395, 1057223771, 1057485416, 1057855544, 1058356026, 1059019886, 1059897405, 1061067246, 1062657950, 1064892987, 1066774581, 1069414683, 1073984175, 1079645762, 1092815430, 1057005197, 1057342072, 1058087743, 1059427869, 1061799040, 1065862217, 1071413542, 1084439708, 1057128951, 1058664893, 1063675095, 1076102863, 1057655764, 1067924853, 1060439283, }; int synth_1to1_MMX(real *bandPtr, int channel, short *samples) { static short buffs[2][2][0x110] __attribute__((aligned(8))); static int bo = 1; short *b0, (*buf)[0x110], *a, *b; short* window; int bo1, i = 8; if (channel == 0) { bo = (bo - 1) & 0xf; buf = buffs[1]; } else { samples++; buf = buffs[0]; } if (bo & 1) { b0 = buf[1]; bo1 = bo + 1; a = buf[0] + bo; b = buf[1] + ((bo + 1) & 0xf); } else { b0 = buf[0]; bo1 = bo; b = buf[0] + bo; a = buf[1] + ((bo + 1) & 0xf); } dct64_MMX_func(a, b, bandPtr); window = mp3lib_decwins + 16 - bo1; //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1); __asm __volatile( ASMALIGN(4) ".L03:\n\t" "movq (%1),%%mm0\n\t" "movq 64(%1),%%mm4\n\t" "pmaddwd (%2),%%mm0\n\t" "pmaddwd 32(%2),%%mm4\n\t" "movq 8(%1),%%mm1\n\t" "movq 72(%1),%%mm5\n\t" "pmaddwd 8(%2),%%mm1\n\t" "pmaddwd 40(%2),%%mm5\n\t" "movq 16(%1),%%mm2\n\t" "movq 80(%1),%%mm6\n\t" "pmaddwd 16(%2),%%mm2\n\t" "pmaddwd 48(%2),%%mm6\n\t" "movq 24(%1),%%mm3\n\t" "movq 88(%1),%%mm7\n\t" "pmaddwd 24(%2),%%mm3\n\t" "pmaddwd 56(%2),%%mm7\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm5,%%mm4\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm6,%%mm4\n\t" "paddd %%mm3,%%mm0\n\t" "paddd %%mm7,%%mm4\n\t" "movq %%mm0,%%mm1\n\t" "movq %%mm4,%%mm5\n\t" "psrlq $32,%%mm1\n\t" "psrlq $32,%%mm5\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm5,%%mm4\n\t" "psrad $13,%%mm0\n\t" "psrad $13,%%mm4\n\t" "packssdw %%mm0,%%mm0\n\t" "packssdw %%mm4,%%mm4\n\t" "movq (%3), %%mm1\n\t" "punpckldq %%mm4, %%mm0\n\t" "pand "MANGLE(one_null)", %%mm1\n\t" "pand "MANGLE(null_one)", %%mm0\n\t" "por %%mm0, %%mm1\n\t" "movq %%mm1,(%3)\n\t" "add $64,%2\n\t" "add $128,%1\n\t" "add $8,%3\n\t" "decl %0\n\t" "jnz .L03\n\t" "movq (%1),%%mm0\n\t" "pmaddwd (%2),%%mm0\n\t" "movq 8(%1),%%mm1\n\t" "pmaddwd 8(%2),%%mm1\n\t" "movq 16(%1),%%mm2\n\t" "pmaddwd 16(%2),%%mm2\n\t" "movq 24(%1),%%mm3\n\t" "pmaddwd 24(%2),%%mm3\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm3,%%mm0\n\t" "movq %%mm0,%%mm1\n\t" "psrlq $32,%%mm1\n\t" "paddd %%mm1,%%mm0\n\t" "psrad $13,%%mm0\n\t" "packssdw %%mm0,%%mm0\n\t" "movd %%mm0,%%eax\n\t" "movw %%ax, (%3)\n\t" "sub $32,%2\n\t" "add $64,%1\n\t" "add $4,%3\n\t" "movl $7,%0\n\t" ASMALIGN(4) ".L04:\n\t" "movq (%1),%%mm0\n\t" "movq 64(%1),%%mm4\n\t" "pmaddwd (%2),%%mm0\n\t" "pmaddwd -32(%2),%%mm4\n\t" "movq 8(%1),%%mm1\n\t" "movq 72(%1),%%mm5\n\t" "pmaddwd 8(%2),%%mm1\n\t" "pmaddwd -24(%2),%%mm5\n\t" "movq 16(%1),%%mm2\n\t" "movq 80(%1),%%mm6\n\t" "pmaddwd 16(%2),%%mm2\n\t" "pmaddwd -16(%2),%%mm6\n\t" "movq 24(%1),%%mm3\n\t" "movq 88(%1),%%mm7\n\t" "pmaddwd 24(%2),%%mm3\n\t" "pmaddwd -8(%2),%%mm7\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm5,%%mm4\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm6,%%mm4\n\t" "paddd %%mm3,%%mm0\n\t" "paddd %%mm7,%%mm4\n\t" "movq %%mm0,%%mm1\n\t" "movq %%mm4,%%mm5\n\t" "psrlq $32,%%mm1\n\t" "psrlq $32,%%mm5\n\t" "paddd %%mm0,%%mm1\n\t" "paddd %%mm4,%%mm5\n\t" "psrad $13,%%mm1\n\t" "psrad $13,%%mm5\n\t" "packssdw %%mm1,%%mm1\n\t" "packssdw %%mm5,%%mm5\n\t" "psubd %%mm0,%%mm0\n\t" "psubd %%mm4,%%mm4\n\t" "psubsw %%mm1,%%mm0\n\t" "psubsw %%mm5,%%mm4\n\t" "movq (%3), %%mm1\n\t" "punpckldq %%mm4, %%mm0\n\t" "pand "MANGLE(one_null)", %%mm1\n\t" "pand "MANGLE(null_one)", %%mm0\n\t" "por %%mm0, %%mm1\n\t" "movq %%mm1,(%3)\n\t" "sub $64,%2\n\t" "add $128,%1\n\t" "add $8,%3\n\t" "decl %0\n\t" "jnz .L04\n\t" "movq (%1),%%mm0\n\t" "pmaddwd (%2),%%mm0\n\t" "movq 8(%1),%%mm1\n\t" "pmaddwd 8(%2),%%mm1\n\t" "movq 16(%1),%%mm2\n\t" "pmaddwd 16(%2),%%mm2\n\t" "movq 24(%1),%%mm3\n\t" "pmaddwd 24(%2),%%mm3\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm3,%%mm0\n\t" "movq %%mm0,%%mm1\n\t" "psrlq $32,%%mm1\n\t" "paddd %%mm0,%%mm1\n\t" "psrad $13,%%mm1\n\t" "packssdw %%mm1,%%mm1\n\t" "psubd %%mm0,%%mm0\n\t" "psubsw %%mm1,%%mm0\n\t" "movd %%mm0,%%eax\n\t" "movw %%ax,(%3)\n\t" "emms\n\t" :"+r"(i), "+r"(window), "+r"(b0), "+r"(samples) : :"memory", "%eax"); return 0; }