libaf/af_hrtf.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416

/* Experimental audio filter that mixes 5.1 and 5.1 with matrix
   encoded rear channels into headphone signal using FIR filtering
   with HRTF.
*/
//#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <inttypes.h>

#include <math.h>

#include "af.h"
#include "dsp.h"

/* HRTF filter coefficients and adjustable parameters */
#include "af_hrtf.h"

typedef struct af_hrtf_s {
    /* Lengths */
    int dlbuflen, hrflen, basslen;
    /* L, C, R, Ls, Rs channels */
    float *lf, *rf, *lr, *rr, *cf, *cr;
    float *cf_ir, *af_ir, *of_ir, *ar_ir, *or_ir, *cr_ir;
    int cf_o, af_o, of_o, ar_o, or_o, cr_o;
    /* Bass */
    float *ba_l, *ba_r;
    float *ba_ir;
    /* Whether to matrix decode the rear center channel */
    int matrix_mode;
    /* Full wave rectified amplitude used to steer the active matrix
       decoding of center rear channel */
    float lr_fwr, rr_fwr;
    /* Cyclic position on the ring buffer */
    int cyc_pos;
} af_hrtf_t;

/* Convolution on a ring buffer
 *    nx:	length of the ring buffer
 *    nk:	length of the convolution kernel
 *    sx:	ring buffer
 *    sk:	convolution kernel
 *    offset:	offset on the ring buffer, can be 
 */
static float conv(const int nx, const int nk, float *sx, float *sk,
		  const int offset)
{
    /* k = reminder of offset / nx */
    int k = offset >= 0 ? offset % nx : nx + (offset % nx);

    if(nk + k <= nx)
	return fir(nk, sx + k, sk);
    else
	return fir(nk + k - nx, sx, sk + nx - k) +
	    fir(nx - k, sx + k, sk);
}

/* Detect when the impulse response starts (significantly) */
int pulse_detect(float *sx)
{
    /* nmax must be the reference impulse response length (128) minus
       s->hrflen */
    const int nmax = 128 - HRTFFILTLEN;
    const float thresh = IRTHRESH;
    int i;

    for(i = 0; i < nmax; i++)
	if(fabs(sx[i]) > thresh)
	    return i;
    return 0;
}

inline void update_ch(af_hrtf_t *s, short *in, const int k)
{
    /* Update the full wave rectified total amplutude */
    s->lr_fwr += abs(in[2]) - fabs(s->lr[k]);
    s->rr_fwr += abs(in[3]) - fabs(s->rr[k]);

    s->lf[k] = in[0];
    s->cf[k] = in[4];
    s->rf[k] = in[1];
    s->lr[k] = in[2];
    s->rr[k] = in[3];

    s->ba_l[k] = in[0] + in[4] + in[2];
    s->ba_r[k] = in[4] + in[1] + in[3];
}

inline void matrix_decode_cr(af_hrtf_t *s, short *in, const int k)
{
    /* Active matrix decoding of the center rear channel, 1 in the
       denominator is to prevent singularity */
    float lr_agc = in[2] * (s->lr_fwr + s->rr_fwr) /
	(1 + s->lr_fwr + s->lr_fwr);
    float rr_agc = in[3] * (s->lr_fwr + s->rr_fwr) /
	(1 + s->rr_fwr + s->rr_fwr);

    s->cr[k] = (lr_agc + rr_agc) * M_SQRT1_2;
}

/* Initialization and runtime control */
static int control(struct af_instance_s *af, int cmd, void* arg)
{
    af_hrtf_t *s = af->setup;
    char mode;

    switch(cmd) {
    case AF_CONTROL_REINIT:
	af->data->rate   = ((af_data_t*)arg)->rate;
	if(af->data->rate != 48000) {
	    af_msg(AF_MSG_ERROR,
		   "[hrtf] ERROR: Sampling rate is not 48000 Hz (%d)!\n",
		   af->data->rate);
	    return AF_ERROR;
	}
	af->data->nch    = ((af_data_t*)arg)->nch;
	if(af->data->nch < 5) {
	    af_msg(AF_MSG_ERROR,
		   "[hrtf] ERROR: Insufficient channels (%d < 5).\n",
		   af->data->nch);
	    return AF_ERROR;
	}
	af->data->format = AF_FORMAT_SI | AF_FORMAT_NE;
	af->data->bps    = 2;
	return AF_OK;
    case AF_CONTROL_COMMAND_LINE:
	sscanf((char*)arg, "%c", &mode);
	switch(mode) {
	case 'm':
	    s->matrix_mode = 1;
	    break;
	case '0':
	    s->matrix_mode = 0;
	    break;
	default:
	    af_msg(AF_MSG_ERROR,
		   "[hrtf] Mode is neither 'm', nor '0' (%c).\n",
		   mode);
	    return AF_ERROR;
	}
	return AF_OK;
    }    

    af_msg(AF_MSG_INFO,
	   "[hrtf] Using HRTF to mix %s discrete surround into "
	   "L, R channels\n", s->matrix_mode ? "5" : "5+1");
    if(s->matrix_mode)
	af_msg(AF_MSG_INFO,
	       "[hrtf] Using active matrix to decode rear center "
	       "channel\n");

    return AF_UNKNOWN;
}

/* Deallocate memory */
static void uninit(struct af_instance_s *af)
{
    if(af->setup) {
	af_hrtf_t *s = af->setup;

	if(s->lf)
	    free(s->lf);
	if(s->rf)
	    free(s->rf);
	if(s->lr)
	    free(s->lr);
	if(s->rr)
	    free(s->rr);
	if(s->cf)
	    free(s->cf);
	if(s->cr)
	    free(s->cr);
	if(s->ba_l)
	    free(s->ba_l);
	if(s->ba_r)
	    free(s->ba_r);
	if(s->ba_ir)
	    free(s->ba_ir);
	free(af->setup);
    }
    if(af->data)
	free(af->data);
}

/* Filter data through filter

Two "tricks" are used to compensate the "color" of the KEMAR data:

1. The KEMAR data is refiltered to ensure that the front L, R channels
on the same side of the ear are equalized (especially in the high
frequencies).

2. A bass compensation is introduced to ensure that 0-200 Hz are not
damped (without any real 3D acoustical image, however).
*/
static af_data_t* play(struct af_instance_s *af, af_data_t *data)
{
    af_hrtf_t *s = af->setup;
    short *in = data->audio; // Input audio data
    short *out = NULL; // Output audio data
    short *end = in + data->len / sizeof(short); // Loop end
    float common, left, right, diff, left_b, right_b;
    const int dblen = s->dlbuflen, hlen = s->hrflen, blen = s->basslen;

    if(AF_OK != RESIZE_LOCAL_BUFFER(af, data))
	return NULL;

    out = af->data->audio;

    /* MPlayer's 5 channel layout (notation for the variable):
     * 
     * 0: L (LF), 1: R (RF), 2: Ls (LR), 3: Rs (RR), 4: C (CF), matrix
     * encoded: Cs (CR)
     * 
     * or: L = left, C = center, R = right, F = front, R = rear
     * 
     * Filter notation:
     * 
     *      CF
     * OF        AF
     *      Ear->
     * OR        AR
     *      CR
     * 
     * or: C = center, A = same side, O = opposite, F = front, R = rear
     */

    while(in < end) {
	const int k = s->cyc_pos;

	update_ch(s, in, k);

	/* Simulate a 7.5 ms -20 dB echo of the center channel in the
	   front channels (like reflection from a room wall) - a kind of
	   psycho-acoustically "cheating" to focus the center front
	   channel, which is normally hard to be perceived as front */
	s->lf[k] += CFECHOAMPL * s->cf[(k + CFECHODELAY) % s->dlbuflen];
	s->rf[k] += CFECHOAMPL * s->cf[(k + CFECHODELAY) % s->dlbuflen];

	/* Mixer filter matrix */
	common = conv(dblen, hlen, s->cf, s->cf_ir, k + s->cf_o);
	if(s->matrix_mode) {
	    /* In matrix decoding mode, the rear channel gain must be
	       renormalized, as there is an additional channel. */
	    matrix_decode_cr(s, in, k);
	    common +=
		conv(dblen, hlen, s->cr, s->cr_ir, k + s->cr_o) *
		M1_76DB;
	    left    =
		( conv(dblen, hlen, s->lf, s->af_ir, k + s->af_o) +
		  conv(dblen, hlen, s->rf, s->of_ir, k + s->of_o) +
		  (conv(dblen, hlen, s->lr, s->ar_ir, k + s->ar_o) +
		   conv(dblen, hlen, s->rr, s->or_ir, k + s->or_o)) *
		  M1_76DB + common);
	    right   =
		( conv(dblen, hlen, s->rf, s->af_ir, k + s->af_o) +
		  conv(dblen, hlen, s->lf, s->of_ir, k + s->of_o) +
		  (conv(dblen, hlen, s->rr, s->ar_ir, k + s->ar_o) +
		   conv(dblen, hlen, s->lr, s->or_ir, k + s->or_o)) *
		  M1_76DB + common);
	}
	else {
	    left    =
		( conv(dblen, hlen, s->lf, s->af_ir, k + s->af_o) +
		  conv(dblen, hlen, s->rf, s->of_ir, k + s->of_o) +
		  conv(dblen, hlen, s->lr, s->ar_ir, k + s->ar_o) +
		  conv(dblen, hlen, s->rr, s->or_ir, k + s->or_o) +
		  common);
	    right   =
		( conv(dblen, hlen, s->rf, s->af_ir, k + s->af_o) +
		  conv(dblen, hlen, s->lf, s->of_ir, k + s->of_o) +
		  conv(dblen, hlen, s->rr, s->ar_ir, k + s->ar_o) +
		  conv(dblen, hlen, s->lr, s->or_ir, k + s->or_o) +
		  common);
	}

	/* Bass compensation for the lower frequency cut of the HRTF.  A
	   cross talk of the left and right channel is introduced to
	   match the directional characteristics of higher frequencies.
	   The bass will not have any real 3D perception, but that is
	   OK. */
	left_b  = conv(dblen, blen, s->ba_l, s->ba_ir, k);
	right_b = conv(dblen, blen, s->ba_r, s->ba_ir, k);
	left  += (1 - BASSCROSS) * left_b  + BASSCROSS * right_b;
	right += (1 - BASSCROSS) * right_b + BASSCROSS * left_b;
	/* Also mix the LFE channel (if available) */
	if(af->data->nch >= 6) {
	    left  += out[5] * M3_01DB;
	    right += out[5] * M3_01DB;
	}

	/* Amplitude renormalization. */
	left  *= AMPLNORM;
	right *= AMPLNORM;

	/* "Cheating": linear stereo expansion to amplify the 3D
	   perception.  Note: Too much will destroy the acoustic space
	   and may even result in headaches. */
	diff = STEXPAND2 * (left - right);
	out[0] = (int16_t)(left  + diff);
	out[1] = (int16_t)(right - diff);

	/* The remaining channels are not needed any more */
	out[2] = out[3] = out[4] = 0;
	if(af->data->nch >= 6)
	    out[5] = 0;

	/* Next sample... */
	in = &in[data->nch];
	out = &out[af->data->nch];
	(s->cyc_pos)--;
	if(s->cyc_pos < 0)
	    s->cyc_pos += dblen;
    }

    /* Set output data */
    data->audio = af->data->audio;
    data->len   = (data->len * af->mul.n) / af->mul.d;
    data->nch   = af->data->nch;

    return data;
}

static int allocate(af_hrtf_t *s)
{
    if ((s->lf = malloc(s->dlbuflen * sizeof(float))) == NULL) return -1;
    if ((s->rf = malloc(s->dlbuflen * sizeof(float))) == NULL) return -1;
    if ((s->lr = malloc(s->dlbuflen * sizeof(float))) == NULL) return -1;
    if ((s->rr = malloc(s->dlbuflen * sizeof(float))) == NULL) return -1;
    if ((s->cf = malloc(s->dlbuflen * sizeof(float))) == NULL) return -1;
    if ((s->cr = malloc(s->dlbuflen * sizeof(float))) == NULL) return -1;
    if ((s->ba_l = malloc(s->dlbuflen * sizeof(float))) == NULL) return -1;
    if ((s->ba_r = malloc(s->dlbuflen * sizeof(float))) == NULL) return -1;
    return 0;
}

/* Allocate memory and set function pointers */
static int open(af_instance_t* af)
{
    int i;
    af_hrtf_t *s;
    float fc;

    af_msg(AF_MSG_INFO,
	   "[hrtf] Head related impulse response (HRIR) derived from KEMAR measurement\n"
	   "[hrtf] data by Bill Gardner <billg@media.mit.edu>\n"
	   "[hrtf] and Keith Martin <kdm@media.mit.edu>.\n"
	   "[hrtf] This data is Copyright 1994 by the MIT Media Laboratory.  It is\n"
	   "[hrtf] provided free with no restrictions on use, provided the authors are\n"
	   "[hrtf] cited when the data is used in any research or commercial application.\n"
	   "[hrtf] URL: http://sound.media.mit.edu/KEMAR.html\n");

    af->control = control;
    af->uninit = uninit;
    af->play = play;
    af->mul.n = 1;
    af->mul.d = 1;
    af->data = calloc(1, sizeof(af_data_t));
    af->setup = calloc(1, sizeof(af_hrtf_t));
    if((af->data == NULL) || (af->setup == NULL))
	return AF_ERROR;

    s = af->setup;

    s->dlbuflen = DELAYBUFLEN;
    s->hrflen = HRTFFILTLEN;
    s->basslen = BASSFILTLEN;

    s->cyc_pos = s->dlbuflen - 1;
    s->matrix_mode = 1;

    if (allocate(s) != 0) {
 	af_msg(AF_MSG_ERROR, "[hrtf] Memory allocation error.\n");
	return AF_ERROR;
    }

    for(i = 0; i < s->dlbuflen; i++)
	s->lf[i] = s->rf[i] = s->lr[i] = s->rr[i] = s->cf[i] =
	    s->cr[i] = 0;

    s->lr_fwr =
	s->rr_fwr = 0;

    s->cf_ir = cf_filt + (s->cf_o = pulse_detect(cf_filt));
    s->af_ir = af_filt + (s->af_o = pulse_detect(af_filt));
    s->of_ir = of_filt + (s->of_o = pulse_detect(of_filt));
    s->ar_ir = ar_filt + (s->ar_o = pulse_detect(ar_filt));
    s->or_ir = or_filt + (s->or_o = pulse_detect(or_filt));
    s->cr_ir = cr_filt + (s->cr_o = pulse_detect(cr_filt));

    if((s->ba_ir = malloc(s->basslen * sizeof(float))) == NULL) {
 	af_msg(AF_MSG_ERROR, "[hrtf] Memory allocation error.\n");
	return AF_ERROR;
    }
    fc = 2.0 * BASSFILTFREQ / (float)af->data->rate;
    if(design_fir(s->basslen, s->ba_ir, &fc, LP | KAISER, 4 * M_PI) ==
       -1) {
	af_msg(AF_MSG_ERROR, "[hrtf] Unable to design low-pass "
	       "filter.\n");
	return AF_ERROR;
    }
    for(i = 0; i < s->basslen; i++)
	s->ba_ir[i] *= BASSGAIN;
    
    return AF_OK;
}

/* Description of this filter */
af_info_t af_info_hrtf = {
    "HRTF Headphone",
    "hrtf",
    "ylai",
    "",
    AF_FLAGS_REENTRANT,
    open
};