1 files changed, 55 insertions, 185 deletions
diff --git a/audio/out/ao_lavc.c b/audio/out/ao_lavc.c
index aaa9e6422c..e18db667a3 100644
--- a/audio/out/ao_lavc.c
+++ b/audio/out/ao_lavc.c
@@ -40,8 +40,8 @@
 #include "common/encode_lavc.h"
 
 struct priv {
-    AVStream *stream;
-    AVCodecContext *codec;
+    struct encoder_context *enc;
+
     int pcmhack;
     int aframesize;
     int aframecount;
@@ -53,14 +53,13 @@ struct priv {
     double expected_next_pts;
 
     AVRational worst_time_base;
-    int worst_time_base_is_stream;
 
     bool shutdown;
 };
 
 static void encode(struct ao *ao, double apts, void **data);
 
-static bool supports_format(AVCodec *codec, int format)
+static bool supports_format(const AVCodec *codec, int format)
 {
     for (const enum AVSampleFormat *sampleformat = codec->sample_fmts;
          sampleformat && *sampleformat != AV_SAMPLE_FMT_NONE;
@@ -72,7 +71,7 @@ static bool supports_format(AVCodec *codec, int format)
     return false;
 }
 
-static void select_format(struct ao *ao, AVCodec *codec)
+static void select_format(struct ao *ao, const AVCodec *codec)
 {
     int formats[AF_FORMAT_COUNT + 1];
     af_get_best_sample_formats(ao->format, formats);
@@ -88,70 +87,53 @@ static void select_format(struct ao *ao, AVCodec *codec)
 // open & setup audio device
 static int init(struct ao *ao)
 {
-    struct priv *ac = talloc_zero(ao, struct priv);
-    AVCodec *codec;
-
-    ao->priv = ac;
+    struct priv *ac = ao->priv;
 
-    if (!encode_lavc_available(ao->encode_lavc_ctx)) {
-        MP_ERR(ao, "the option --o (output file) must be specified\n");
+    ac->enc = encoder_context_alloc(ao->encode_lavc_ctx, STREAM_AUDIO, ao->log);
+    if (!ac->enc)
         return -1;
-    }
+    talloc_steal(ac, ac->enc);
 
-    pthread_mutex_lock(&ao->encode_lavc_ctx->lock);
-
-    if (encode_lavc_alloc_stream(ao->encode_lavc_ctx,
-                                 AVMEDIA_TYPE_AUDIO,
-                                 &ac->stream, &ac->codec) < 0)
-    {
-        MP_ERR(ao, "could not get a new audio stream\n");
-        goto fail;
-    }
-
-    codec = ao->encode_lavc_ctx->ac;
+    AVCodecContext *encoder = ac->enc->encoder;
+    const AVCodec *codec = encoder->codec;
 
     int samplerate = af_select_best_samplerate(ao->samplerate,
                                                codec->supported_samplerates);
     if (samplerate > 0)
         ao->samplerate = samplerate;
 
-    // TODO: Remove this redundancy with encode_lavc_alloc_stream also
-    // setting the time base.
-    // Using codec->time_base is deprecated, but needed for older lavf.
-    ac->stream->time_base.num = 1;
-    ac->stream->time_base.den = ao->samplerate;
-    ac->codec->time_base.num = 1;
-    ac->codec->time_base.den = ao->samplerate;
+    encoder->time_base.num = 1;
+    encoder->time_base.den = ao->samplerate;
 
-    ac->codec->sample_rate = ao->samplerate;
+    encoder->sample_rate = ao->samplerate;
 
     struct mp_chmap_sel sel = {0};
     mp_chmap_sel_add_any(&sel);
     if (!ao_chmap_sel_adjust2(ao, &sel, &ao->channels, false))
         goto fail;
     mp_chmap_reorder_to_lavc(&ao->channels);
-    ac->codec->channels = ao->channels.num;
-    ac->codec->channel_layout = mp_chmap_to_lavc(&ao->channels);
+    encoder->channels = ao->channels.num;
+    encoder->channel_layout = mp_chmap_to_lavc(&ao->channels);
 
-    ac->codec->sample_fmt = AV_SAMPLE_FMT_NONE;
+    encoder->sample_fmt = AV_SAMPLE_FMT_NONE;
 
     select_format(ao, codec);
 
     ac->sample_size = af_fmt_to_bytes(ao->format);
-    ac->codec->sample_fmt = af_to_avformat(ao->format);
-    ac->codec->bits_per_raw_sample = ac->sample_size * 8;
+    encoder->sample_fmt = af_to_avformat(ao->format);
+    encoder->bits_per_raw_sample = ac->sample_size * 8;
 
-    if (encode_lavc_open_codec(ao->encode_lavc_ctx, ac->codec) < 0)
+    if (!encoder_init_codec_and_muxer(ac->enc))
         goto fail;
 
     ac->pcmhack = 0;
-    if (ac->codec->frame_size <= 1)
-        ac->pcmhack = av_get_bits_per_sample(ac->codec->codec_id) / 8;
+    if (encoder->frame_size <= 1)
+        ac->pcmhack = av_get_bits_per_sample(encoder->codec_id) / 8;
 
     if (ac->pcmhack) {
         ac->aframesize = 16384; // "enough"
     } else {
-        ac->aframesize = ac->codec->frame_size;
+        ac->aframesize = encoder->frame_size;
     }
 
     // enough frames for at least 0.25 seconds
@@ -169,7 +151,6 @@ static int init(struct ao *ao)
     if (ao->channels.num > AV_NUM_DATA_POINTERS)
         goto fail;
 
-    pthread_mutex_unlock(&ao->encode_lavc_ctx->lock);
     return 0;
 
 fail:
@@ -184,28 +165,17 @@ static void uninit(struct ao *ao)
     struct priv *ac = ao->priv;
     struct encode_lavc_context *ectx = ao->encode_lavc_ctx;
 
-    if (!ac || ac->shutdown)
-        return;
-
-    pthread_mutex_lock(&ectx->lock);
+    if (!ac->shutdown) {
+        double outpts = ac->expected_next_pts;
 
-    if (!encode_lavc_start(ectx)) {
-        MP_WARN(ao, "not even ready to encode audio at end -> dropped\n");
+        pthread_mutex_lock(&ectx->lock);
+        if (!ac->enc->options->rawts && ac->enc->options->copyts)
+            outpts += ectx->discontinuity_pts_offset;
         pthread_mutex_unlock(&ectx->lock);
-        return;
-    }
 
-    if (ac->stream) {
-        double outpts = ac->expected_next_pts;
-        if (!ectx->options->rawts && ectx->options->copyts)
-            outpts += ectx->discontinuity_pts_offset;
-        outpts += encode_lavc_getoffset(ectx, ac->codec);
+        outpts += encoder_get_offset(ac->enc);
         encode(ao, outpts, NULL);
     }
-
-    pthread_mutex_unlock(&ectx->lock);
-
-    ac->shutdown = true;
 }
 
 // return: how many samples can be played without blocking
@@ -216,106 +186,21 @@ static int get_space(struct ao *ao)
     return ac->aframesize * ac->framecount;
 }
 
-static void write_packet(struct ao *ao, AVPacket *packet)
-{
-    // TODO: Can we unify this with the equivalent video code path?
-    struct priv *ac = ao->priv;
-
-    packet->stream_index = ac->stream->index;
-    if (packet->pts != AV_NOPTS_VALUE) {
-        packet->pts = av_rescale_q(packet->pts,
-                                   ac->codec->time_base,
-                                   ac->stream->time_base);
-    } else {
-        // Do we need this at all? Better be safe than sorry...
-        MP_WARN(ao, "encoder lost pts, why?\n");
-        if (ac->savepts != MP_NOPTS_VALUE) {
-            packet->pts = av_rescale_q(ac->savepts,
-                                       ac->codec->time_base,
-                                       ac->stream->time_base);
-        }
-    }
-    if (packet->dts != AV_NOPTS_VALUE) {
-        packet->dts = av_rescale_q(packet->dts,
-                                   ac->codec->time_base,
-                                   ac->stream->time_base);
-    }
-    if (packet->duration > 0) {
-        packet->duration = av_rescale_q(packet->duration,
-                                        ac->codec->time_base,
-                                        ac->stream->time_base);
-    }
-
-    ac->savepts = AV_NOPTS_VALUE;
-
-    if (encode_lavc_write_frame(ao->encode_lavc_ctx, ac->stream, packet) < 0) {
-        MP_ERR(ao, "error writing at %d %d/%d\n",
-               (int) packet->pts,
-               ac->stream->time_base.num,
-               ac->stream->time_base.den);
-        return;
-    }
-}
-
-static void encode_audio_and_write(struct ao *ao, AVFrame *frame)
-{
-    // TODO: Can we unify this with the equivalent video code path?
-    struct priv *ac = ao->priv;
-    AVPacket packet = {0};
-
-    int status = avcodec_send_frame(ac->codec, frame);
-    if (status < 0) {
-        MP_ERR(ao, "error encoding at %d %d/%d\n",
-               frame ? (int) frame->pts : -1,
-               ac->codec->time_base.num,
-               ac->codec->time_base.den);
-        return;
-    }
-
-    for (;;) {
-        av_init_packet(&packet);
-        status = avcodec_receive_packet(ac->codec, &packet);
-        if (status == AVERROR(EAGAIN)) { // No more packets for now.
-            if (frame == NULL) {
-                MP_ERR(ao, "sent flush frame, got EAGAIN");
-            }
-            break;
-        }
-        if (status == AVERROR_EOF) { // No more packets, ever.
-            if (frame != NULL) {
-                MP_ERR(ao, "sent audio frame, got EOF");
-            }
-            break;
-        }
-        if (status < 0) {
-            MP_ERR(ao, "error encoding at %d %d/%d\n",
-                   frame ? (int) frame->pts : -1,
-                   ac->codec->time_base.num,
-                   ac->codec->time_base.den);
-            break;
-        }
-        if (frame) {
-            if (ac->savepts == AV_NOPTS_VALUE)
-                ac->savepts = frame->pts;
-        }
-        encode_lavc_write_stats(ao->encode_lavc_ctx, ac->codec);
-        write_packet(ao, &packet);
-        av_packet_unref(&packet);
-    }
-}
-
 // must get exactly ac->aframesize amount of data
 static void encode(struct ao *ao, double apts, void **data)
 {
     struct priv *ac = ao->priv;
     struct encode_lavc_context *ectx = ao->encode_lavc_ctx;
+    AVCodecContext *encoder = ac->enc->encoder;
     double realapts = ac->aframecount * (double) ac->aframesize /
                       ao->samplerate;
 
     ac->aframecount++;
 
+    pthread_mutex_lock(&ectx->lock);
     if (data)
         ectx->audio_pts_offset = realapts - apts;
+    pthread_mutex_unlock(&ectx->lock);
 
     if(data) {
         AVFrame *frame = av_frame_alloc();
@@ -329,17 +214,17 @@ static void encode(struct ao *ao, double apts, void **data)
 
         frame->linesize[0] = frame->nb_samples * ao->sstride;
 
-        if (ectx->options->rawts || ectx->options->copyts) {
+        if (ac->enc->options->rawts || ac->enc->options->copyts) {
             // real audio pts
-            frame->pts = floor(apts * ac->codec->time_base.den /
-                               ac->codec->time_base.num + 0.5);
+            frame->pts = floor(apts * encoder->time_base.den /
+                               encoder->time_base.num + 0.5);
         } else {
             // audio playback time
-            frame->pts = floor(realapts * ac->codec->time_base.den /
-                               ac->codec->time_base.num + 0.5);
+            frame->pts = floor(realapts * encoder->time_base.den /
+                               encoder->time_base.num + 0.5);
         }
 
-        int64_t frame_pts = av_rescale_q(frame->pts, ac->codec->time_base,
+        int64_t frame_pts = av_rescale_q(frame->pts, encoder->time_base,
                                          ac->worst_time_base);
         if (ac->lastpts != AV_NOPTS_VALUE && frame_pts <= ac->lastpts) {
             // this indicates broken video
@@ -348,15 +233,15 @@ static void encode(struct ao *ao, double apts, void **data)
                     (int)frame->pts, (int)ac->lastpts);
             frame_pts = ac->lastpts + 1;
             frame->pts = av_rescale_q(frame_pts, ac->worst_time_base,
-                                      ac->codec->time_base);
+                                      encoder->time_base);
         }
         ac->lastpts = frame_pts;
 
-        frame->quality = ac->codec->global_quality;
-        encode_audio_and_write(ao, frame);
+        frame->quality = encoder->global_quality;
+        encoder_encode(ac->enc, frame);
         av_frame_free(&frame);
     } else {
-        encode_audio_and_write(ao, NULL);
+        encoder_encode(ac->enc, NULL);
     }
 }
 
@@ -365,20 +250,16 @@ static void encode(struct ao *ao, double apts, void **data)
 static int play(struct ao *ao, void **data, int samples, int flags)
 {
     struct priv *ac = ao->priv;
+    struct encoder_context *enc = ac->enc;
     struct encode_lavc_context *ectx = ao->encode_lavc_ctx;
     int bufpos = 0;
     double nextpts;
     double outpts;
     int orig_samples = samples;
 
+    // for ectx PTS fields
     pthread_mutex_lock(&ectx->lock);
 
-    if (!encode_lavc_start(ectx)) {
-        MP_WARN(ao, "not ready yet for encoding audio\n");
-        pthread_mutex_unlock(&ectx->lock);
-        return 0;
-    }
-
     double pts = ectx->last_audio_in_pts;
     pts += ectx->samples_since_last_pts / (double)ao->samplerate;
 
@@ -407,26 +288,10 @@ static int play(struct ao *ao, void **data, int samples, int flags)
     }
 
     if (ac->worst_time_base.den == 0) {
-        if (ac->codec->time_base.num * (double) ac->stream->time_base.den >=
-                ac->stream->time_base.num * (double) ac->codec->time_base.den) {
-            MP_VERBOSE(ao, "NOTE: using codec time base (%d/%d) for pts "
-                       "adjustment; the stream base (%d/%d) is not worse.\n",
-                       (int)ac->codec->time_base.num,
-                       (int)ac->codec->time_base.den,
-                       (int)ac->stream->time_base.num,
-                       (int)ac->stream->time_base.den);
-            ac->worst_time_base = ac->codec->time_base;
-            ac->worst_time_base_is_stream = 0;
-        } else {
-            MP_WARN(ao, "NOTE: not using codec time base (%d/%d) for pts "
-                    "adjustment; the stream base (%d/%d) is worse.\n",
-                    (int)ac->codec->time_base.num,
-                    (int)ac->codec->time_base.den,
-                    (int)ac->stream->time_base.num,
-                    (int)ac->stream->time_base.den);
-            ac->worst_time_base = ac->stream->time_base;
-            ac->worst_time_base_is_stream = 1;
-        }
+        // We don't know the muxer time_base anymore, and can't, because we
+        // might start encoding before the muxer is opened. (The muxer decides
+        // the final AVStream.time_base when opening the muxer.)
+        ac->worst_time_base = enc->encoder->time_base;
 
         // NOTE: we use the following "axiom" of av_rescale_q:
         // if time base A is worse than time base B, then
@@ -446,7 +311,7 @@ static int play(struct ao *ao, void **data, int samples, int flags)
     }
 
     // Fix and apply the discontinuity pts offset.
-    if (!ectx->options->rawts && ectx->options->copyts) {
+    if (!enc->options->rawts && enc->options->copyts) {
         // fix the discontinuity pts offset
         nextpts = pts;
         if (ectx->discontinuity_pts_offset == MP_NOPTS_VALUE) {
@@ -465,8 +330,10 @@ static int play(struct ao *ao, void **data, int samples, int flags)
         outpts = pts;
     }
 
+    pthread_mutex_unlock(&ectx->lock);
+
     // Shift pts by the pts offset first.
-    outpts += encode_lavc_getoffset(ectx, ac->codec);
+    outpts += encoder_get_offset(enc);
 
     while (samples - bufpos >= ac->aframesize) {
         void *start[MP_NUM_CHANNELS] = {0};
@@ -479,8 +346,10 @@ static int play(struct ao *ao, void **data, int samples, int flags)
     // Calculate expected pts of next audio frame (input side).
     ac->expected_next_pts = pts + bufpos / (double) ao->samplerate;
 
+    pthread_mutex_lock(&ectx->lock);
+
     // Set next allowed input pts value (input side).
-    if (!ectx->options->rawts && ectx->options->copyts) {
+    if (!enc->options->rawts && enc->options->copyts) {
         nextpts = ac->expected_next_pts + ectx->discontinuity_pts_offset;
         if (nextpts > ectx->next_in_pts)
             ectx->next_in_pts = nextpts;
@@ -513,6 +382,7 @@ const struct ao_driver audio_out_lavc = {
     .encode = true,
     .description = "audio encoding using libavcodec",
     .name      = "lavc",
+    .priv_size = sizeof(struct priv),
     .init      = init,
     .uninit    = uninit,
     .get_space = get_space,