From 0f0e2f5f5b68b6dcb73a760e4333ea3dcc79062d Mon Sep 17 00:00:00 2001 From: reimar Date: Sun, 18 Nov 2007 18:52:51 +0000 Subject: Change to a 64 bit accumulation variable instead of shifting. Changing the way the loop is done is necessary to reduce register pressure. About 20% speedup even on 32 bit x86. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@25103 b3059339-0415-0410-9bf9-f77b7e298cf2 --- libaf/af_scaletempo.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libaf/af_scaletempo.c b/libaf/af_scaletempo.c index 90f781f655..984afa5db3 100644 --- a/libaf/af_scaletempo.c +++ b/libaf/af_scaletempo.c @@ -69,7 +69,6 @@ typedef struct af_scaletempo_s void* buf_pre_corr; void* table_window; int (*best_overlap_offset)(struct af_scaletempo_s* s); - short shift_corr; // command line float scale_nominal; float ms_stride; @@ -153,7 +152,7 @@ static int best_overlap_offset_s16(af_scaletempo_t* s) { int32_t *pw, *ppc; int16_t *po, *search_start; - int32_t best_corr = INT_MIN; + int64_t best_corr = INT64_MIN; int best_off = 0; int off; long i; @@ -168,12 +167,14 @@ static int best_overlap_offset_s16(af_scaletempo_t* s) search_start = (int16_t*)s->buf_queue + s->num_channels; for (off=0; offframes_search; off++) { - int32_t corr = 0; + int64_t corr = 0; int16_t* ps = search_start; ppc = s->buf_pre_corr; + ppc += s->samples_overlap - s->num_channels; + ps += s->samples_overlap - s->num_channels; i = -(s->samples_overlap - s->num_channels); do { - corr += ( *ppc++ * *ps++ ) >> s->shift_corr; + corr += ppc[i] * ps[i]; } while (++i < 0); if (corr > best_corr) { best_corr = corr; @@ -380,7 +381,6 @@ static int control(struct af_instance_s* af, int cmd, void* arg) *pw++ = v; } } - s->shift_corr = av_log2( 2*(s->samples_overlap - nch) - 1 ); s->best_overlap_offset = best_overlap_offset_s16; } else { float* pw; -- cgit v1.2.3