From ce11ba5de5b6a9c03d405d515b60e386e7b0937f Mon Sep 17 00:00:00 2001 From: reimar Date: Sun, 18 Nov 2007 22:13:49 +0000 Subject: Add padding and unroll loop 4x for at least another 10% speedup git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@25104 b3059339-0415-0410-9bf9-f77b7e298cf2 --- libaf/af_scaletempo.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'libaf') diff --git a/libaf/af_scaletempo.c b/libaf/af_scaletempo.c index 984afa5db3..6e53d13bab 100644 --- a/libaf/af_scaletempo.c +++ b/libaf/af_scaletempo.c @@ -115,6 +115,8 @@ static int fill_queue(struct af_instance_s* af, af_data_t* data, int offset) return offset - offset_unchanged; } +#define UNROLL_PADDING (4*4) + static int best_overlap_offset_float(af_scaletempo_t* s) { float *pw, *po, *ppc, *search_start; @@ -174,8 +176,12 @@ static int best_overlap_offset_s16(af_scaletempo_t* s) ps += s->samples_overlap - s->num_channels; i = -(s->samples_overlap - s->num_channels); do { - corr += ppc[i] * ps[i]; - } while (++i < 0); + corr += ppc[i+0] * ps[i+0]; + corr += ppc[i+1] * ps[i+1]; + corr += ppc[i+2] * ps[i+2]; + corr += ppc[i+3] * ps[i+3]; + i += 4; + } while (i < 0); if (corr > best_corr) { best_corr = corr; best_off = off; @@ -368,12 +374,13 @@ static int control(struct af_instance_s* af, int cmd, void* arg) int64_t t = frames_overlap; int32_t n = 8589934588LL / (t * t); // 4 * (2^31 - 1) / t^2 int32_t* pw; - s->buf_pre_corr = realloc(s->buf_pre_corr, s->bytes_overlap * 2); + s->buf_pre_corr = realloc(s->buf_pre_corr, s->bytes_overlap * 2 + UNROLL_PADDING); s->table_window = realloc(s->table_window, s->bytes_overlap * 2 - nch * bps * 2); if(!s->buf_pre_corr || !s->table_window) { af_msg(AF_MSG_FATAL, "[scaletempo] Out of memory\n"); return AF_ERROR; } + memset((char *)s->buf_pre_corr + s->bytes_overlap * 2, 0, UNROLL_PADDING); pw = s->table_window; for (i=1; i> 15; @@ -406,7 +413,7 @@ static int control(struct af_instance_s* af, int cmd, void* arg) s->bytes_queue = (s->frames_search + frames_stride + frames_overlap) * bps * nch; - s->buf_queue = realloc(s->buf_queue, s->bytes_queue); + s->buf_queue = realloc(s->buf_queue, s->bytes_queue + UNROLL_PADDING); if(!s->buf_queue) { af_msg(AF_MSG_FATAL, "[scaletempo] Out of memory\n"); return AF_ERROR; -- cgit v1.2.3