diff options
Diffstat (limited to 'libao2/ao_lavc.c')
-rw-r--r-- | libao2/ao_lavc.c | 588 |
1 files changed, 588 insertions, 0 deletions
diff --git a/libao2/ao_lavc.c b/libao2/ao_lavc.c new file mode 100644 index 0000000000..b22cd325dc --- /dev/null +++ b/libao2/ao_lavc.c @@ -0,0 +1,588 @@ +/* + * audio encoding using libavformat + * Copyright (C) 2011 Rudolf Polzer <divVerent@xonotic.org> + * NOTE: this file is partially based on ao_pcm.c by Atmosfear + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include <libavutil/common.h> +#include <libavutil/audioconvert.h> + +#include "config.h" +#include "options.h" +#include "mpcommon.h" +#include "fmt-conversion.h" +#include "libaf/format.h" +#include "libaf/reorder_ch.h" +#include "talloc.h" +#include "audio_out.h" +#include "mp_msg.h" + +#include "encode_lavc.h" + +static const char *sample_padding_signed = "\x00\x00\x00\x00"; +static const char *sample_padding_u8 = "\x80"; +static const char *sample_padding_float = "\x00\x00\x00\x00"; + +struct priv { + uint8_t *buffer; + size_t buffer_size; + AVStream *stream; + int pcmhack; + int aframesize; + int aframecount; + int offset; + int offset_left; + int64_t savepts; + int framecount; + int64_t lastpts; + int sample_size; + const void *sample_padding; + + AVRational worst_time_base; + int worst_time_base_is_stream; +}; + +// open & setup audio device +static int init(struct ao *ao, char *params) +{ + struct priv *ac = talloc_zero(ao, struct priv); + const enum AVSampleFormat *sampleformat; + AVCodec *codec; + + if (!encode_lavc_available(ao->encode_lavc_ctx)) { + mp_msg(MSGT_ENCODE, MSGL_ERR, + "ao-lavc: the option -o (output file) must be specified\n"); + return -1; + } + + if (ac->stream) { + mp_msg(MSGT_ENCODE, MSGL_ERR, "ao-lavc: rejecting reinitialization\n"); + return -1; + } + + ac->stream = encode_lavc_alloc_stream(ao->encode_lavc_ctx, + AVMEDIA_TYPE_AUDIO); + + if (!ac->stream) { + mp_msg(MSGT_ENCODE, MSGL_ERR, "ao-lavc: could not get a new audio stream\n"); + return -1; + } + + codec = encode_lavc_get_codec(ao->encode_lavc_ctx, ac->stream); + + // ac->stream->time_base.num = 1; + // ac->stream->time_base.den = ao->samplerate; + // doing this breaks mpeg2ts in ffmpeg + // which doesn't properly force the time base to be 90000 + // furthermore, ffmpeg.c doesn't do this either and works + + ac->stream->codec->time_base.num = 1; + ac->stream->codec->time_base.den = ao->samplerate; + + ac->stream->codec->sample_rate = ao->samplerate; + ac->stream->codec->channels = ao->channels; + + ac->stream->codec->sample_fmt = AV_SAMPLE_FMT_NONE; + + { + // first check if the selected format is somewhere in the list of + // supported formats by the codec + for (sampleformat = codec->sample_fmts; + sampleformat && *sampleformat != AV_SAMPLE_FMT_NONE; + ++sampleformat) { + switch (*sampleformat) { + case AV_SAMPLE_FMT_U8: + if (ao->format == AF_FORMAT_U8) + goto out_search; + break; + case AV_SAMPLE_FMT_S16: + if (ao->format == AF_FORMAT_S16_BE) + goto out_search; + if (ao->format == AF_FORMAT_S16_LE) + goto out_search; + break; + case AV_SAMPLE_FMT_S32: + if (ao->format == AF_FORMAT_S32_BE) + goto out_search; + if (ao->format == AF_FORMAT_S32_LE) + goto out_search; + break; + case AV_SAMPLE_FMT_FLT: + if (ao->format == AF_FORMAT_FLOAT_BE) + goto out_search; + if (ao->format == AF_FORMAT_FLOAT_LE) + goto out_search; + break; + default: + break; + } + } +out_search: + ; + } + + if (!sampleformat || *sampleformat == AV_SAMPLE_FMT_NONE) { + // if the selected format is not supported, we have to pick the first + // one we CAN support + // note: not needing to select endianness here, as the switch() below + // does that anyway for us + for (sampleformat = codec->sample_fmts; + sampleformat && *sampleformat != AV_SAMPLE_FMT_NONE; + ++sampleformat) { + switch (*sampleformat) { + case AV_SAMPLE_FMT_U8: + ao->format = AF_FORMAT_U8; + goto out_takefirst; + case AV_SAMPLE_FMT_S16: + ao->format = AF_FORMAT_S16_NE; + goto out_takefirst; + case AV_SAMPLE_FMT_S32: + ao->format = AF_FORMAT_S32_NE; + goto out_takefirst; + case AV_SAMPLE_FMT_FLT: + ao->format = AF_FORMAT_FLOAT_NE; + goto out_takefirst; + default: + break; + } + } +out_takefirst: + ; + } + + switch (ao->format) { + // now that we have chosen a format, set up the fields for it, boldly + // switching endianness if needed (mplayer code will convert for us + // anyway, but ffmpeg always expects native endianness) + case AF_FORMAT_U8: + ac->stream->codec->sample_fmt = AV_SAMPLE_FMT_U8; + ac->sample_size = 1; + ac->sample_padding = sample_padding_u8; + ao->format = AF_FORMAT_U8; + break; + default: + case AF_FORMAT_S16_BE: + case AF_FORMAT_S16_LE: + ac->stream->codec->sample_fmt = AV_SAMPLE_FMT_S16; + ac->sample_size = 2; + ac->sample_padding = sample_padding_signed; + ao->format = AF_FORMAT_S16_NE; + break; + case AF_FORMAT_S32_BE: + case AF_FORMAT_S32_LE: + ac->stream->codec->sample_fmt = AV_SAMPLE_FMT_S32; + ac->sample_size = 4; + ac->sample_padding = sample_padding_signed; + ao->format = AF_FORMAT_S32_NE; + break; + case AF_FORMAT_FLOAT_BE: + case AF_FORMAT_FLOAT_LE: + ac->stream->codec->sample_fmt = AV_SAMPLE_FMT_FLT; + ac->sample_size = 4; + ac->sample_padding = sample_padding_float; + ao->format = AF_FORMAT_FLOAT_NE; + break; + } + + ac->stream->codec->bits_per_raw_sample = ac->sample_size * 8; + + switch (ao->channels) { + case 1: + ac->stream->codec->channel_layout = AV_CH_LAYOUT_MONO; + break; + case 2: + ac->stream->codec->channel_layout = AV_CH_LAYOUT_STEREO; + break; + /* someone please check if these are what mplayer normally assumes + case 3: + ac->stream->codec->channel_layout = AV_CH_LAYOUT_SURROUND; + break; + case 4: + ac->stream->codec->channel_layout = AV_CH_LAYOUT_2_2; + break; + */ + case 5: + ac->stream->codec->channel_layout = AV_CH_LAYOUT_5POINT0; + break; + case 6: + ac->stream->codec->channel_layout = AV_CH_LAYOUT_5POINT1; + break; + case 8: + ac->stream->codec->channel_layout = AV_CH_LAYOUT_7POINT1; + break; + default: + mp_msg(MSGT_ENCODE, MSGL_ERR, + "ao-lavc: unknown channel layout; hoping for the best\n"); + break; + } + + if (encode_lavc_open_codec(ao->encode_lavc_ctx, ac->stream) < 0) + return -1; + + ac->pcmhack = 0; + if (ac->stream->codec->frame_size <= 1) + ac->pcmhack = av_get_bits_per_sample(ac->stream->codec->codec_id) / 8; + + if (ac->pcmhack) { + ac->aframesize = 16384; // "enough" + ac->buffer_size = ac->aframesize * ac->pcmhack * ao->channels * 2 + 200; + } else { + ac->aframesize = ac->stream->codec->frame_size; + ac->buffer_size = ac->aframesize * ac->sample_size * ao->channels * 2 + + 200; + } + if (ac->buffer_size < FF_MIN_BUFFER_SIZE) + ac->buffer_size = FF_MIN_BUFFER_SIZE; + ac->buffer = talloc_size(ac, ac->buffer_size); + + // enough frames for at least 0.25 seconds + ac->framecount = ceil(ao->samplerate * 0.25 / ac->aframesize); + // but at least one! + ac->framecount = FFMAX(ac->framecount, 1); + + ac->savepts = MP_NOPTS_VALUE; + ac->lastpts = MP_NOPTS_VALUE; + ac->offset = ac->stream->codec->sample_rate * + encode_lavc_getoffset(ao->encode_lavc_ctx, ac->stream); + ac->offset_left = ac->offset; + + //fill_ao_data: + ao->outburst = ac->aframesize * ac->sample_size * ao->channels * + ac->framecount; + ao->buffersize = ao->outburst * 2; + ao->bps = ao->channels * ao->samplerate * ac->sample_size; + ao->untimed = true; + ao->priv = ac; + + return 0; +} + +static void fill_with_padding(void *buf, int cnt, int sz, const void *padding) +{ + int i; + if (sz == 1) { + memset(buf, cnt, *(char *)padding); + return; + } + for (i = 0; i < cnt; ++i) + memcpy((char *) buf + i * sz, padding, sz); +} + +// close audio device +static int encode(struct ao *ao, int ptsvalid, double apts, void *data); +static void uninit(struct ao *ao, bool cut_audio) +{ + struct priv *ac = ao->priv; + if (ac->buffer) { + double pts = ao->pts + ac->offset / (double) ao->samplerate; + if (ao->buffer.len > 0) { + void *paddingbuf = talloc_size(ao, + ac->aframesize * ao->channels * ac->sample_size); + memcpy(paddingbuf, ao->buffer.start, ao->buffer.len); + fill_with_padding((char *) paddingbuf + ao->buffer.len, + (ac->aframesize * ao->channels * ac->sample_size + - ao->buffer.len) / ac->sample_size, + ac->sample_size, ac->sample_padding); + encode(ao, ao->pts != MP_NOPTS_VALUE, pts, paddingbuf); + pts += ac->aframesize / (double) ao->samplerate; + talloc_free(paddingbuf); + ao->buffer.len = 0; + } + while (encode(ao, true, pts, NULL) > 0) ; + } + + ao->priv = NULL; +} + +// return: how many bytes can be played without blocking +static int get_space(struct ao *ao) +{ + return ao->outburst; +} + +// must get exactly ac->aframesize amount of data +static int encode(struct ao *ao, int ptsvalid, double apts, void *data) +{ + AVFrame *frame; + AVPacket packet; + struct priv *ac = ao->priv; + struct encode_lavc_context *ectx = ao->encode_lavc_ctx; + double realapts = ac->aframecount * (double) ac->aframesize / + ao->samplerate; + int status, gotpacket; + + ac->aframecount++; + if (data && (ao->channels == 5 || ao->channels == 6 || ao->channels == 8)) { + reorder_channel_nch(data, AF_CHANNEL_LAYOUT_MPLAYER_DEFAULT, + AF_CHANNEL_LAYOUT_LAVC_DEFAULT, + ao->channels, + ac->aframesize * ao->channels, ac->sample_size); + } + + if (data && ptsvalid) + ectx->audio_pts_offset = realapts - apts; + + av_init_packet(&packet); + packet.data = ac->buffer; + packet.size = ac->buffer_size; + if(data) + { + frame = avcodec_alloc_frame(); + frame->nb_samples = ac->aframesize; + if(avcodec_fill_audio_frame(frame, ao->channels, ac->stream->codec->sample_fmt, data, ac->aframesize * ao->channels * ac->sample_size, 1)) + { + mp_msg(MSGT_ENCODE, MSGL_ERR, "ao-lavc: error filling\n"); + return -1; + } + + if (ao->encode_lavc_ctx->options->rawts) { + // raw audio pts + frame->pts = floor(apts * ac->stream->codec->time_base.den / ac->stream->codec->time_base.num + 0.5); + } else if (ectx->options->copyts) { + // real audio pts + frame->pts = floor((apts + ectx->discontinuity_pts_offset) * ac->stream->codec->time_base.den / ac->stream->codec->time_base.num + 0.5); + } else { + // audio playback time + frame->pts = floor(realapts * ac->stream->codec->time_base.den / ac->stream->codec->time_base.num + 0.5); + } + + int64_t frame_pts = av_rescale_q(frame->pts, ac->stream->codec->time_base, ac->worst_time_base); + if (ac->lastpts != MP_NOPTS_VALUE && frame_pts <= ac->lastpts) { + // this indicates broken video + // (video pts failing to increase fast enough to match audio) + mp_msg(MSGT_ENCODE, MSGL_WARN, "ao-lavc: audio frame pts went backwards " + "(%d <- %d), autofixed\n", (int)frame->pts, + (int)ac->lastpts); + frame_pts = ac->lastpts + 1; + frame->pts = av_rescale_q(frame_pts, ac->worst_time_base, ac->stream->codec->time_base); + } + ac->lastpts = frame_pts; + + frame->quality = ac->stream->codec->global_quality; + status = avcodec_encode_audio2(ac->stream->codec, &packet, frame, &gotpacket); + + if (!status) { + if (ac->savepts == MP_NOPTS_VALUE) + ac->savepts = frame->pts; + } + + av_free(frame); + } + else + { + status = avcodec_encode_audio2(ac->stream->codec, &packet, NULL, &gotpacket); + } + + if(status) + { + mp_msg(MSGT_ENCODE, MSGL_ERR, "ao-lavc: error encoding\n"); + return -1; + } + + if(!gotpacket) + return 0; + + mp_msg(MSGT_ENCODE, MSGL_DBG2, + "ao-lavc: got pts %f (playback time: %f); out size: %d\n", + apts, realapts, packet.size); + + encode_lavc_write_stats(ao->encode_lavc_ctx, ac->stream); + + // Do we need this at all? Better be safe than sorry... + if (packet.pts == AV_NOPTS_VALUE) { + mp_msg(MSGT_ENCODE, MSGL_WARN, "ao-lavc: encoder lost pts, why?\n"); + if (ac->savepts != MP_NOPTS_VALUE) + packet.pts = ac->savepts; + } + + if (packet.pts != AV_NOPTS_VALUE) + packet.pts = av_rescale_q(packet.pts, ac->stream->codec->time_base, + ac->stream->time_base); + + if (packet.dts != AV_NOPTS_VALUE) + packet.dts = av_rescale_q(packet.dts, ac->stream->codec->time_base, + ac->stream->time_base); + + if(packet.duration > 0) + packet.duration = av_rescale_q(packet.duration, ac->stream->codec->time_base, + ac->stream->time_base); + + ac->savepts = MP_NOPTS_VALUE; + + if (encode_lavc_write_frame(ao->encode_lavc_ctx, &packet) < 0) { + mp_msg(MSGT_ENCODE, MSGL_ERR, "ao-lavc: error writing at %f %f/%f\n", + realapts, (double) ac->stream->time_base.num, + (double) ac->stream->time_base.den); + return -1; + } + + return packet.size; +} + +// plays 'len' bytes of 'data' +// it should round it down to outburst*n +// return: number of bytes played +static int play(struct ao *ao, void *data, int len, int flags) +{ + struct priv *ac = ao->priv; + struct encode_lavc_context *ectx = ao->encode_lavc_ctx; + int bufpos = 0; + int64_t ptsoffset; + void *paddingbuf = NULL; + double nextpts; + + len /= ac->sample_size * ao->channels; + + if (!encode_lavc_start(ectx)) { + mp_msg(MSGT_ENCODE, MSGL_WARN, "ao-lavc: NOTE: deferred initial audio frame (probably because video is not there yet)\n"); + return 0; + } + + if (ac->worst_time_base.den == 0) { + //if (ac->stream->codec->time_base.num / ac->stream->codec->time_base.den >= ac->stream->time_base.num / ac->stream->time_base.den) + if (ac->stream->codec->time_base.num * (double) ac->stream->time_base.den >= + ac->stream->time_base.num * (double) ac->stream->codec->time_base.den) { + mp_msg(MSGT_ENCODE, MSGL_V, "ao-lavc: NOTE: using codec time base " + "(%d/%d) for pts adjustment; the stream base (%d/%d) is " + "not worse.\n", (int)ac->stream->codec->time_base.num, + (int)ac->stream->codec->time_base.den, (int)ac->stream->time_base.num, + (int)ac->stream->time_base.den); + ac->worst_time_base = ac->stream->codec->time_base; + ac->worst_time_base_is_stream = 0; + } else { + mp_msg(MSGT_ENCODE, MSGL_WARN, "ao-lavc: NOTE: not using codec time " + "base (%d/%d) for pts adjustment; the stream base (%d/%d) " + "is worse.\n", (int)ac->stream->codec->time_base.num, + (int)ac->stream->codec->time_base.den, (int)ac->stream->time_base.num, + (int)ac->stream->time_base.den); + ac->worst_time_base = ac->stream->time_base; + ac->worst_time_base_is_stream = 1; + } + + // NOTE: we use the following "axiom" of av_rescale_q: + // if time base A is worse than time base B, then + // av_rescale_q(av_rescale_q(x, A, B), B, A) == x + // this can be proven as long as av_rescale_q rounds to nearest, which + // it currently does + + // av_rescale_q(x, A, B) * B = "round x*A to nearest multiple of B" + // and: + // av_rescale_q(av_rescale_q(x, A, B), B, A) * A + // == "round av_rescale_q(x, A, B)*B to nearest multiple of A" + // == "round 'round x*A to nearest multiple of B' to nearest multiple of A" + // + // assume this fails. Then there is a value of x*A, for which the + // nearest multiple of B is outside the range [(x-0.5)*A, (x+0.5)*A[. + // Absurd, as this range MUST contain at least one multiple of B. + } + + ptsoffset = ac->offset; + // this basically just edits ao->apts for syncing purposes + + if (ectx->options->copyts || ectx->options->rawts) { + // we do not send time sync data to the video side, + // but we always need the exact pts, even if zero + } else { + // here we must "simulate" the pts editing + // 1. if we have to skip stuff, we skip it + // 2. if we have to add samples, we add them + // 3. we must still adjust ptsoffset appropriately for AV sync! + // invariant: + // if no partial skipping is done, the first frame gets ao->apts passed as pts! + + if (ac->offset_left < 0) { + if (ac->offset_left <= -len) { + // skip whole frame + ac->offset_left += len; + return len * ac->sample_size * ao->channels; + } else { + // skip part of this frame, buffer/encode the rest + bufpos -= ac->offset_left; + ptsoffset += ac->offset_left; + ac->offset_left = 0; + } + } else if (ac->offset_left > 0) { + // make a temporary buffer, filled with zeroes at the start + // (don't worry, only happens once) + + paddingbuf = talloc_size(ac, ac->sample_size * ao->channels * + (ac->offset_left + len)); + fill_with_padding(paddingbuf, ac->offset_left, ac->sample_size, + ac->sample_padding); + data = (char *) paddingbuf + ac->sample_size * ao->channels * + ac->offset_left; + bufpos -= ac->offset_left; // yes, negative! + ptsoffset += ac->offset_left; + ac->offset_left = 0; + + // now adjust the bufpos so the final value of bufpos is positive! + /* + int cnt = (len - bufpos) / ac->aframesize; + int finalbufpos = bufpos + cnt * ac->aframesize; + */ + int finalbufpos = len - (len - bufpos) % ac->aframesize; + if (finalbufpos < 0) { + mp_msg(MSGT_ENCODE, MSGL_WARN, "ao-lavc: cannot attain the " + "exact requested audio sync; shifting by %d frames\n", + -finalbufpos); + bufpos -= finalbufpos; + } + } + } + + // fix the discontinuity pts offset + if (ectx->discontinuity_pts_offset == MP_NOPTS_VALUE) { + nextpts = ao->pts + ptsoffset / (double) ao->samplerate; + ectx->discontinuity_pts_offset = ectx->next_in_pts - nextpts; + } + + while (len - bufpos >= ac->aframesize) { + encode(ao, ao->pts != MP_NOPTS_VALUE, + ao->pts + (bufpos + ptsoffset) / (double) ao->samplerate + + encode_lavc_getoffset(ectx, ac->stream), + (char *) data + ac->sample_size * bufpos * ao->channels); + bufpos += ac->aframesize; + } + + talloc_free(paddingbuf); + + // set next allowed output pts value + nextpts = ao->pts + ectx->discontinuity_pts_offset + (bufpos + ptsoffset) / (double) ao->samplerate; + if (nextpts > ectx->next_in_pts) + ectx->next_in_pts = nextpts; + + return bufpos * ac->sample_size * ao->channels; +} + +const struct ao_driver audio_out_lavc = { + .is_new = true, + .info = &(const struct ao_info) { + "audio encoding using libavcodec", + "lavc", + "Rudolf Polzer <divVerent@xonotic.org>", + "" + }, + .init = init, + .uninit = uninit, + .get_space = get_space, + .play = play, +}; |