From d2e7467eb203d3a34bc1111564c7058b5e9c6b12 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Sun, 10 Nov 2013 23:11:40 +0100
Subject: audio/filter: prepare filter chain for non-interleaved audio

Based on earlier work by Stefano Pigozzi.

There are 2 changes:

1. Instead of mp_audio.audio, mp_audio.planes[0] must be used.

2. mp_audio.len used to contain the size of the audio in bytes. Now
   mp_audio.samples must be used. (Where 1 sample is the smallest unit
   of audio that covers all channels.)

Also, some filters need changes to reject non-interleaved formats
properly.

Nothing uses the non-interleaved features yet, but this is needed so
that things don't just break when doing so.
---
 audio/decode/dec_audio.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'audio/decode')

diff --git a/audio/decode/dec_audio.c b/audio/decode/dec_audio.c
index e381a12a3c..ef7993c83a 100644
--- a/audio/decode/dec_audio.c
+++ b/audio/decode/dec_audio.c
@@ -270,20 +270,20 @@ static int filter_n_bytes(sh_audio_t *sh, struct bstr *outbuf, int len)
 
     // Filter
     struct mp_audio filter_input = {
-        .audio = sh->a_buffer,
-        .len = len,
+        .planes = {sh->a_buffer},
         .rate = sh->samplerate,
     };
     mp_audio_set_format(&filter_input, sh->sample_format);
     mp_audio_set_channels(&filter_input, &sh->channels);
+    filter_input.samples = len / filter_input.sstride;
 
     struct mp_audio *filter_output = af_play(sh->afilter, &filter_input);
     if (!filter_output)
         return -1;
-    set_min_out_buffer_size(outbuf, outbuf->len + filter_output->len);
-    memcpy(outbuf->start + outbuf->len, filter_output->audio,
-           filter_output->len);
-    outbuf->len += filter_output->len;
+    int outlen = filter_output->samples * filter_output->sstride;
+    set_min_out_buffer_size(outbuf, outbuf->len + outlen);
+    memcpy(outbuf->start + outbuf->len, filter_output->planes[0], outlen);
+    outbuf->len += outlen;
 
     // remove processed data from decoder buffer:
     sh->a_buffer_len -= len;
-- 
cgit v1.2.3


From 347a86198b214b5e79b45d198c5cd2cc3c3a759a Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Sun, 10 Nov 2013 23:38:18 +0100
Subject: audio: switch output to mp_audio_buffer

Replace the code that used a single buffer with mp_audio_buffer. This
also enables non-interleaved output operation, although it's still
disabled, and no AO supports it yet.
---
 audio/decode/dec_audio.c | 54 +++++++++++++++++-------------------------------
 audio/decode/dec_audio.h |  7 +++----
 2 files changed, 22 insertions(+), 39 deletions(-)

(limited to 'audio/decode')

diff --git a/audio/decode/dec_audio.c b/audio/decode/dec_audio.c
index ef7993c83a..3f92c3c4e6 100644
--- a/audio/decode/dec_audio.c
+++ b/audio/decode/dec_audio.c
@@ -38,6 +38,7 @@
 #include "dec_audio.h"
 #include "ad.h"
 #include "audio/format.h"
+#include "audio/audio_buffer.h"
 
 #include "audio/filter/af.h"
 
@@ -230,18 +231,9 @@ int init_audio_filters(sh_audio_t *sh_audio, int in_samplerate,
     return 1;
 }
 
-static void set_min_out_buffer_size(struct bstr *outbuf, int len)
-{
-    size_t oldlen = talloc_get_size(outbuf->start);
-    if (oldlen < len) {
-        assert(outbuf->start);  // talloc context should be already set
-        mp_msg(MSGT_DECAUDIO, MSGL_V, "Increasing filtered audio buffer size "
-               "from %zd to %d\n", oldlen, len);
-        outbuf->start = talloc_realloc_size(NULL, outbuf->start, len);
-    }
-}
-
-static int filter_n_bytes(sh_audio_t *sh, struct bstr *outbuf, int len)
+// Filter len bytes of input, put result into outbuf.
+static int filter_n_bytes(sh_audio_t *sh, struct mp_audio_buffer *outbuf,
+                          int len)
 {
     assert(len - 1 + sh->audio_out_minsize <= sh->a_buffer_size);
 
@@ -280,10 +272,7 @@ static int filter_n_bytes(sh_audio_t *sh, struct bstr *outbuf, int len)
     struct mp_audio *filter_output = af_play(sh->afilter, &filter_input);
     if (!filter_output)
         return -1;
-    int outlen = filter_output->samples * filter_output->sstride;
-    set_min_out_buffer_size(outbuf, outbuf->len + outlen);
-    memcpy(outbuf->start + outbuf->len, filter_output->planes[0], outlen);
-    outbuf->len += outlen;
+    mp_audio_buffer_append(outbuf, filter_output);
 
     // remove processed data from decoder buffer:
     sh->a_buffer_len -= len;
@@ -292,20 +281,20 @@ static int filter_n_bytes(sh_audio_t *sh, struct bstr *outbuf, int len)
     return error;
 }
 
-/* Try to get at least minlen decoded+filtered bytes in outbuf
+/* Try to get at least minsamples decoded+filtered samples in outbuf
  * (total length including possible existing data).
  * Return 0 on success, -1 on error/EOF (not distinguished).
- * In the former case outbuf->len is always >= minlen on return.
- * In case of EOF/error it might or might not be.
- * Outbuf.start must be talloc-allocated, and will be reallocated
- * if needed to fit all filter output. */
-int decode_audio(sh_audio_t *sh_audio, struct bstr *outbuf, int minlen)
+ * In the former case outbuf has at least minsamples buffered on return.
+ * In case of EOF/error it might or might not be. */
+int decode_audio(sh_audio_t *sh_audio, struct mp_audio_buffer *outbuf,
+                 int minsamples)
 {
     // Indicates that a filter seems to be buffering large amounts of data
     int huge_filter_buffer = 0;
+    int sstride =
+        af_fmt2bits(sh_audio->sample_format) / 8 * sh_audio->channels.num;
     // Decoded audio must be cut at boundaries of this many bytes
-    int bps = af_fmt2bits(sh_audio->sample_format) / 8;
-    int unitsize = sh_audio->channels.num * bps * 16;
+    int unitsize = sstride * 16;
 
     /* Filter output size will be about filter_multiplier times input size.
      * If some filter buffers audio in big blocks this might only hold
@@ -322,9 +311,13 @@ int decode_audio(sh_audio_t *sh_audio, struct bstr *outbuf, int minlen)
         return -1;
     max_decode_len -= max_decode_len % unitsize;
 
-    while (minlen >= 0 && outbuf->len < minlen) {
+    while (minsamples >= 0 && mp_audio_buffer_samples(outbuf) < minsamples) {
+        struct af_stream *afs = sh_audio->afilter;
+        int out_sstride = afs->output.sstride;
+        int declen = (minsamples - mp_audio_buffer_samples(outbuf))
+                     * out_sstride / filter_multiplier;
         // + some extra for possible filter buffering
-        int declen = (minlen - outbuf->len) / filter_multiplier + (unitsize << 5); 
+        declen += unitsize << 5;
         if (huge_filter_buffer)
             /* Some filter must be doing significant buffering if the estimated
              * input length didn't produce enough output from filters.
@@ -349,15 +342,6 @@ int decode_audio(sh_audio_t *sh_audio, struct bstr *outbuf, int minlen)
     return 0;
 }
 
-void decode_audio_prepend_bytes(struct bstr *outbuf, int count, int byte)
-{
-    set_min_out_buffer_size(outbuf, outbuf->len + count);
-    memmove(outbuf->start + count, outbuf->start, outbuf->len);
-    memset(outbuf->start, byte, count);
-    outbuf->len += count;
-}
-
-
 void resync_audio_stream(sh_audio_t *sh_audio)
 {
     sh_audio->pts = MP_NOPTS_VALUE;
diff --git a/audio/decode/dec_audio.h b/audio/decode/dec_audio.h
index b46f4282fb..3ec5954471 100644
--- a/audio/decode/dec_audio.h
+++ b/audio/decode/dec_audio.h
@@ -22,15 +22,14 @@
 #include "audio/chmap.h"
 #include "demux/stheader.h"
 
-struct bstr;
+struct mp_audio_buffer;
 struct mp_decoder_list;
 
 struct mp_decoder_list *mp_audio_decoder_list(void);
 int init_best_audio_codec(sh_audio_t *sh_audio, char *audio_decoders);
-int decode_audio(sh_audio_t *sh_audio, struct bstr *outbuf, int minlen);
-void decode_audio_prepend_bytes(struct bstr *outbuf, int count, int byte);
+int decode_audio(sh_audio_t *sh_audio, struct mp_audio_buffer *outbuf,
+                 int minsamples);
 void resync_audio_stream(sh_audio_t *sh_audio);
-void skip_audio_frame(sh_audio_t *sh_audio);
 void uninit_audio(sh_audio_t *sh_audio);
 
 int init_audio_filters(sh_audio_t *sh_audio, int in_samplerate,
-- 
cgit v1.2.3


From 824e6550f8ef1f361701eae469ada35d3889ab83 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Sun, 10 Nov 2013 23:39:29 +0100
Subject: audio/filter: fix mul/delay scale and values

Before this commit, the af_instance->mul/delay values were in bytes.
Using bytes is confusing for non-interleaved audio, so switch mul to
samples, and delay to seconds. For delay, seconds are more intuitive
than bytes or samples, because it's used for the latency calculation.
We also might want to replace the delay mechanism with real PTS
tracking inside the filter chain some time in the future, and PTS
will also require time-adjustments to be done in seconds.

For most filters, we just remove the redundant mul=1 initialization.
(Setting this used to be required, but not anymore.)
---
 audio/decode/dec_audio.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'audio/decode')

diff --git a/audio/decode/dec_audio.c b/audio/decode/dec_audio.c
index 3f92c3c4e6..ebb54cb55a 100644
--- a/audio/decode/dec_audio.c
+++ b/audio/decode/dec_audio.c
@@ -312,10 +312,9 @@ int decode_audio(sh_audio_t *sh_audio, struct mp_audio_buffer *outbuf,
     max_decode_len -= max_decode_len % unitsize;
 
     while (minsamples >= 0 && mp_audio_buffer_samples(outbuf) < minsamples) {
-        struct af_stream *afs = sh_audio->afilter;
-        int out_sstride = afs->output.sstride;
-        int declen = (minsamples - mp_audio_buffer_samples(outbuf))
-                     * out_sstride / filter_multiplier;
+        int decsamples = (minsamples - mp_audio_buffer_samples(outbuf))
+                         / filter_multiplier;
+        int declen = decsamples * sstride;
         // + some extra for possible filter buffering
         declen += unitsize << 5;
         if (huge_filter_buffer)
-- 
cgit v1.2.3


From 9127aad2fdca0a64f1014c712e19252c67112c47 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Tue, 12 Nov 2013 22:27:19 +0100
Subject: dec_audio: fix behavior on format changes

Decoder overwrites parameters in sh_audio, but we still have old audio
in the old format to filter.
---
 audio/decode/dec_audio.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'audio/decode')

diff --git a/audio/decode/dec_audio.c b/audio/decode/dec_audio.c
index ebb54cb55a..6c4af89ef9 100644
--- a/audio/decode/dec_audio.c
+++ b/audio/decode/dec_audio.c
@@ -263,10 +263,8 @@ static int filter_n_bytes(sh_audio_t *sh, struct mp_audio_buffer *outbuf,
     // Filter
     struct mp_audio filter_input = {
         .planes = {sh->a_buffer},
-        .rate = sh->samplerate,
     };
-    mp_audio_set_format(&filter_input, sh->sample_format);
-    mp_audio_set_channels(&filter_input, &sh->channels);
+    mp_audio_copy_config(&filter_input, &sh->afilter->input);
     filter_input.samples = len / filter_input.sstride;
 
     struct mp_audio *filter_output = af_play(sh->afilter, &filter_input);
-- 
cgit v1.2.3


From 5388a0cd4062ba24f5382f025552422fb6430906 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Tue, 12 Nov 2013 22:27:36 +0100
Subject: ad_mpg123: reduce ifdeffery

Drop support for anything before 1.14.0.
---
 audio/decode/ad_mpg123.c | 49 ++----------------------------------------------
 1 file changed, 2 insertions(+), 47 deletions(-)

(limited to 'audio/decode')

diff --git a/audio/decode/ad_mpg123.c b/audio/decode/ad_mpg123.c
index 47cb5d2039..609e68f1c8 100644
--- a/audio/decode/ad_mpg123.c
+++ b/audio/decode/ad_mpg123.c
@@ -27,34 +27,20 @@
 #include "ad.h"
 #include "mpvcore/mp_msg.h"
 
-/* Reducing the ifdeffery to two main variants:
- *   1. most compatible to any libmpg123 version
- *   2. fastest variant with recent libmpg123 (>=1.14)
- * Running variant 2 on older libmpg123 versions may work in
- * principle, but is not supported.
- * So, please leave the check for MPG123_API_VERSION there, m-kay?
- */
 #include <mpg123.h>
 
-/* Enable faster mode of operation with newer libmpg123, avoiding
- * unnecessary memcpy() calls. */
-#if (defined MPG123_API_VERSION) && (MPG123_API_VERSION >= 33)
-#define AD_MPG123_FRAMEWISE
+#if (defined MPG123_API_VERSION) && (MPG123_API_VERSION < 33)
+#error "This should not happen"
 #endif
 
-/* Switch for updating bitrate info of VBR files. Not essential. */
-#define AD_MPG123_MEAN_BITRATE
-
 struct ad_mpg123_context {
     mpg123_handle *handle;
     char new_format;
-#ifdef AD_MPG123_MEAN_BITRATE
     /* Running mean for bit rate, stream length estimation. */
     float mean_rate;
     unsigned int mean_count;
     /* Time delay for updates. */
     short delay;
-#endif
     /* If the stream is actually VBR. */
     char vbr;
 };
@@ -104,17 +90,10 @@ static int preinit(sh_audio_t *sh)
     /* Example for RVA choice (available since libmpg123 1.0.0):
     mpg123_param(con->handle, MPG123_RVA, MPG123_RVA_MIX, 0.0) */
 
-#ifdef AD_MPG123_FRAMEWISE
     /* Prevent funky automatic resampling.
      * This way, we can be sure that one frame will never produce
      * more than 1152 stereo samples. */
     mpg123_param(con->handle, MPG123_REMOVE_FLAGS, MPG123_AUTO_RESAMPLE, 0.);
-#else
-    /* Older mpg123 is vulnerable to concatenated streams when gapless cutting
-     * is enabled (will only play the jingle of a badly constructed radio
-     * stream). The versions using framewise decoding are fine with that. */
-    mpg123_param(con->handle, MPG123_REMOVE_FLAGS, MPG123_GAPLESS, 0.);
-#endif
 
     return 1;
 
@@ -225,12 +204,10 @@ static int set_format(sh_audio_t *sh, struct ad_mpg123_context *con)
                    "Bad encoding from mpg123: %i.\n", encoding);
             return MPG123_ERR;
         }
-#ifdef AD_MPG123_FRAMEWISE
         /* Going to decode directly to MPlayer's memory. It is important
          * to have MPG123_AUTO_RESAMPLE disabled for the buffer size
          * being an all-time limit. */
         sh->audio_out_minsize = 1152 * 2 * (af_fmt2bits(sh->sample_format) / 8);
-#endif
         con->new_format = 0;
     }
     return ret;
@@ -282,13 +259,8 @@ static int decode_a_bit(sh_audio_t *sh, unsigned char *buf, int count)
                 sh->pts_bytes = 0;
             }
 
-#ifdef AD_MPG123_FRAMEWISE
             /* Have to use mpg123_feed() to avoid decoding here. */
             ret = mpg123_feed(con->handle, pkt->buffer, pkt->len);
-#else
-            /* Do not use mpg123_feed(), added in later libmpg123 versions. */
-            ret = mpg123_decode(con->handle, pkt->buffer, pkt->len, NULL, 0, NULL);
-#endif
             talloc_free(pkt);
             if (ret == MPG123_ERR)
                 break;
@@ -309,7 +281,6 @@ static int decode_a_bit(sh_audio_t *sh, unsigned char *buf, int count)
 
         /* Try to decode a bit. This is the return value that counts
          * for the loop condition. */
-#ifdef AD_MPG123_FRAMEWISE
         if (!buf) { /* fake call just for feeding to get format */
             ret = set_format(sh, con);
         } else { /* This is the decoding. One frame at a time. */
@@ -317,10 +288,6 @@ static int decode_a_bit(sh_audio_t *sh, unsigned char *buf, int count)
             if (ret == MPG123_OK)
                 ret = mpg123_decode_frame(con->handle, NULL, NULL, &got_now);
         }
-#else
-        ret = mpg123_decode(con->handle, NULL, 0, buf + got, count - got,
-                            &got_now);
-#endif
 
         got += got_now;
         sh->pts_bytes += got_now;
@@ -334,11 +301,7 @@ static int decode_a_bit(sh_audio_t *sh, unsigned char *buf, int count)
             ret = set_format(sh, con);
         }
 
-#ifdef AD_MPG123_FRAMEWISE
     } while (ret == MPG123_NEED_MORE || (got == 0 && count != 0));
-#else
-    } while (ret == MPG123_NEED_MORE || got < count);
-#endif
 
     if (ret == MPG123_ERR) {
         mp_msg(MSGT_DECAUDIO, MSGL_ERR, "mpg123 decoding failed: %s\n",
@@ -399,11 +362,9 @@ static int init(sh_audio_t *sh, const char *decoder)
          * For VBR, the first frame will be a bad estimate. */
         sh->i_bps = (finfo.bitrate ? finfo.bitrate : compute_bitrate(&finfo))
                     * 1000 / 8;
-#ifdef AD_MPG123_MEAN_BITRATE
         con->delay      = 1;
         con->mean_rate  = 0.;
         con->mean_count = 0;
-#endif
         con->vbr = (finfo.vbr != MPG123_CBR);
 
         return 1;
@@ -425,7 +386,6 @@ static void uninit(sh_audio_t *sh)
     mpg123_exit();
 }
 
-#ifdef AD_MPG123_MEAN_BITRATE
 /* Update mean bitrate. This could be dropped if accurate time display
  * on audio file playback is not desired. */
 static void update_info(sh_audio_t *sh)
@@ -446,7 +406,6 @@ static void update_info(sh_audio_t *sh)
         }
     }
 }
-#endif
 
 static int decode_audio(sh_audio_t *sh, unsigned char *buf, int minlen,
                         int maxlen)
@@ -458,9 +417,7 @@ static int decode_audio(sh_audio_t *sh, unsigned char *buf, int minlen,
     if (bytes == 0)
         return -1;              /* EOF */
 
-#ifdef AD_MPG123_MEAN_BITRATE
     update_info(sh);
-#endif
     return bytes;
 }
 
@@ -473,9 +430,7 @@ static int control(sh_audio_t *sh, int cmd, void *arg)
          * Otherwise, we would have funny effects from the gapless code.
          * Oh, and it helps to minimize artifacts from jumping in the stream. */
         if (reopen_stream(sh)) {
-#ifdef AD_MPG123_MEAN_BITRATE
             update_info(sh);
-#endif
             return CONTROL_TRUE;
         } else {
             /* MPlayer ignores this case! It just keeps on decoding.
-- 
cgit v1.2.3


From 22b3f522cacfbdba76d311c86efd6091512eb089 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Tue, 12 Nov 2013 22:27:44 +0100
Subject: audio: add support for using non-interleaved audio from decoders
 directly

Most libavcodec decoders output non-interleaved audio. Add direct
support for this, and remove the hack that repacked non-interleaved
audio back to packed audio.

Remove the minlen argument from the decoder callback. Instead of
forcing every decoder to have its own decode loop to fill the buffer
until minlen is reached, leave this to the caller. So if a decoder
doesn't return enough data, it's simply called again. (In future, I
even want to change it so that decoders don't read packets directly,
but instead the caller has to pass packets to the decoders. This fits
well with this change, because now the decoder callback typically
decodes at most one packet.)

ad_mpg123.c receives some heavy refactoring. The main problem is that
it wanted to handle format changes when there was no data in the decode
output buffer yet. This sounds reasonable, but actually it would write
data into a buffer prepared for old data, since the caller doesn't know
about the format change yet. (I.e. the best place for a format change
would be _after_ writing the last sample to the output buffer.) It's
possible that this code was not perfectly sane before this commit,
and perhaps lost one frame of data after a format change, but I didn't
confirm this. Trying to fix this, I ended up rewriting the decoding
and also the probing.
---
 audio/decode/ad.h        |   4 +-
 audio/decode/ad_lavc.c   | 161 +++++++------------
 audio/decode/ad_mpg123.c | 393 ++++++++++++++++++-----------------------------
 audio/decode/ad_spdif.c  |  55 ++++---
 audio/decode/dec_audio.c | 149 +++++++++---------
 5 files changed, 316 insertions(+), 446 deletions(-)

(limited to 'audio/decode')

diff --git a/audio/decode/ad.h b/audio/decode/ad.h
index 30e739d135..6c76e8dfd0 100644
--- a/audio/decode/ad.h
+++ b/audio/decode/ad.h
@@ -24,6 +24,7 @@
 #include "demux/demux.h"
 
 #include "audio/format.h"
+#include "audio/audio.h"
 
 struct mp_decoder_list;
 
@@ -35,8 +36,7 @@ struct ad_functions {
     int (*init)(sh_audio_t *sh, const char *decoder);
     void (*uninit)(sh_audio_t *sh);
     int (*control)(sh_audio_t *sh, int cmd, void *arg);
-    int (*decode_audio)(sh_audio_t *sh, unsigned char *buffer, int minlen,
-                        int maxlen);
+    int (*decode_audio)(sh_audio_t *sh, struct mp_audio *buffer, int maxlen);
 };
 
 enum ad_ctrl {
diff --git a/audio/decode/ad_lavc.c b/audio/decode/ad_lavc.c
index 1e63f0c3f2..c42c430850 100644
--- a/audio/decode/ad_lavc.c
+++ b/audio/decode/ad_lavc.c
@@ -36,25 +36,20 @@
 #include "mpvcore/av_opts.h"
 
 #include "ad.h"
-#include "audio/reorder_ch.h"
 #include "audio/fmt-conversion.h"
 
-#include "compat/mpbswap.h"
 #include "compat/libav.h"
 
 struct priv {
     AVCodecContext *avctx;
     AVFrame *avframe;
-    uint8_t *output;
-    uint8_t *output_packed; // used by deplanarize to store packed audio samples
-    int output_left;
-    int unitsize;
+    struct mp_audio frame;
     bool force_channel_map;
     struct demux_packet *packet;
 };
 
 static void uninit(sh_audio_t *sh);
-static int decode_audio(sh_audio_t *sh,unsigned char *buffer,int minlen,int maxlen);
+static int decode_new_packet(struct sh_audio *sh);
 
 #define OPT_BASE_STRUCT struct MPOpts
 
@@ -150,22 +145,21 @@ static int preinit(sh_audio_t *sh)
     return 1;
 }
 
-/* Prefer playing audio with the samplerate given in container data
- * if available, but take number the number of channels and sample format
- * from the codec, since if the codec isn't using the correct values for
- * those everything breaks anyway.
- */
-static int setup_format(sh_audio_t *sh_audio,
-                        const AVCodecContext *lavc_context)
+static int setup_format(sh_audio_t *sh_audio)
 {
     struct priv *priv = sh_audio->context;
-    int sample_format        =
-        af_from_avformat(av_get_packed_sample_fmt(lavc_context->sample_fmt));
-    int samplerate           = lavc_context->sample_rate;
-    // If not set, try container samplerate
+    AVCodecContext *lavc_context = priv->avctx;
+
+    int sample_format = af_from_avformat(lavc_context->sample_fmt);
+    if (!sample_format)
+        return -1;
+
+    int samplerate = lavc_context->sample_rate;
     if (!samplerate && sh_audio->wf) {
+        // If not set, try container samplerate.
+        // (Maybe this can't happen, and it's an artifact from the past.)
         samplerate = sh_audio->wf->nSamplesPerSec;
-        mp_tmsg(MSGT_DECAUDIO, MSGL_V, "ad_lavc: using container rate.\n");
+        mp_tmsg(MSGT_DECAUDIO, MSGL_WARN, "ad_lavc: using container rate.\n");
     }
 
     struct mp_chmap lavc_chmap;
@@ -178,14 +172,9 @@ static int setup_format(sh_audio_t *sh_audio,
             lavc_chmap = sh_audio->channels;
     }
 
-    if (!mp_chmap_equals(&lavc_chmap, &sh_audio->channels) ||
-        samplerate != sh_audio->samplerate ||
-        sample_format != sh_audio->sample_format) {
-        sh_audio->channels = lavc_chmap;
-        sh_audio->samplerate = samplerate;
-        sh_audio->sample_format = sample_format;
-        return 1;
-    }
+    sh_audio->channels = lavc_chmap;
+    sh_audio->samplerate = samplerate;
+    sh_audio->sample_format = sample_format;
     return 0;
 }
 
@@ -285,15 +274,12 @@ static int init(sh_audio_t *sh_audio, const char *decoder)
     mp_msg(MSGT_DECAUDIO, MSGL_V, "INFO: libavcodec \"%s\" init OK!\n",
            lavc_codec->name);
 
-    // Decode at least 1 byte:  (to get header filled)
-    for (int tries = 0;;) {
-        int x = decode_audio(sh_audio, sh_audio->a_buffer, 1,
-                             sh_audio->a_buffer_size);
-        if (x > 0) {
-            sh_audio->a_buffer_len = x;
+    // Decode at least 1 sample:  (to get header filled)
+    for (int tries = 1; ; tries++) {
+        int x = decode_new_packet(sh_audio);
+        if (x >= 0 && ctx->frame.samples > 0)
             break;
-        }
-        if (++tries >= 5) {
+        if (tries >= 5) {
             mp_msg(MSGT_DECAUDIO, MSGL_ERR,
                    "ad_lavc: initial decode failed\n");
             uninit(sh_audio);
@@ -305,12 +291,6 @@ static int init(sh_audio_t *sh_audio, const char *decoder)
     if (sh_audio->wf && sh_audio->wf->nAvgBytesPerSec)
         sh_audio->i_bps = sh_audio->wf->nAvgBytesPerSec;
 
-    int af_sample_fmt =
-        af_from_avformat(av_get_packed_sample_fmt(lavc_context->sample_fmt));
-    if (af_sample_fmt == AF_FORMAT_UNKNOWN) {
-        uninit(sh_audio);
-        return 0;
-    }
     return 1;
 }
 
@@ -338,7 +318,7 @@ static int control(sh_audio_t *sh, int cmd, void *arg)
     switch (cmd) {
     case ADCTRL_RESYNC_STREAM:
         avcodec_flush_buffers(ctx->avctx);
-        ctx->output_left = 0;
+        ctx->frame.samples = 0;
         talloc_free(ctx->packet);
         ctx->packet = NULL;
         return CONTROL_TRUE;
@@ -346,29 +326,13 @@ static int control(sh_audio_t *sh, int cmd, void *arg)
     return CONTROL_UNKNOWN;
 }
 
-static av_always_inline void deplanarize(struct sh_audio *sh)
-{
-    struct priv *priv = sh->context;
-
-    uint8_t **planes  = priv->avframe->extended_data;
-    size_t bps        = av_get_bytes_per_sample(priv->avctx->sample_fmt);
-    size_t nb_samples = priv->avframe->nb_samples;
-    size_t channels   = priv->avctx->channels;
-    size_t size       = bps * nb_samples * channels;
-
-    if (talloc_get_size(priv->output_packed) != size)
-        priv->output_packed =
-            talloc_realloc_size(priv, priv->output_packed, size);
-
-    reorder_to_packed(priv->output_packed, planes, bps, channels, nb_samples);
-
-    priv->output = priv->output_packed;
-}
-
 static int decode_new_packet(struct sh_audio *sh)
 {
     struct priv *priv = sh->context;
     AVCodecContext *avctx = priv->avctx;
+
+    priv->frame.samples = 0;
+
     struct demux_packet *mpkt = priv->packet;
     if (!mpkt)
         mpkt = demux_read_packet(sh->gsh);
@@ -384,7 +348,7 @@ static int decode_new_packet(struct sh_audio *sh)
 
     if (mpkt->pts != MP_NOPTS_VALUE) {
         sh->pts = mpkt->pts;
-        sh->pts_bytes = 0;
+        sh->pts_offset = 0;
     }
     int got_frame = 0;
     int ret = avcodec_decode_audio4(avctx, priv->avframe, &got_frame, &pkt);
@@ -409,58 +373,39 @@ static int decode_new_packet(struct sh_audio *sh)
     }
     if (!got_frame)
         return 0;
-    uint64_t unitsize = (uint64_t)av_get_bytes_per_sample(avctx->sample_fmt) *
-                        avctx->channels;
-    if (unitsize > 100000)
-        abort();
-    priv->unitsize = unitsize;
-    uint64_t output_left = unitsize * priv->avframe->nb_samples;
-    if (output_left > 500000000)
-        abort();
-    priv->output_left = output_left;
-    if (av_sample_fmt_is_planar(avctx->sample_fmt) && avctx->channels > 1) {
-        deplanarize(sh);
-    } else {
-        priv->output = priv->avframe->data[0];
-    }
-    mp_dbg(MSGT_DECAUDIO, MSGL_DBG2, "Decoded %d -> %d  \n", in_len,
-           priv->output_left);
+
+    if (setup_format(sh) < 0)
+        return -1;
+
+    priv->frame.samples = priv->avframe->nb_samples;
+    mp_audio_set_format(&priv->frame, sh->sample_format);
+    mp_audio_set_channels(&priv->frame, &sh->channels);
+    priv->frame.rate = sh->samplerate;
+    for (int n = 0; n < priv->frame.num_planes; n++)
+        priv->frame.planes[n] = priv->avframe->data[n];
+
+    mp_dbg(MSGT_DECAUDIO, MSGL_DBG2, "Decoded %d -> %d samples\n", in_len,
+           priv->frame.samples);
     return 0;
 }
 
-
-static int decode_audio(sh_audio_t *sh_audio, unsigned char *buf, int minlen,
-                        int maxlen)
+static int decode_audio(sh_audio_t *sh, struct mp_audio *buffer, int maxlen)
 {
-    struct priv *priv = sh_audio->context;
-    AVCodecContext *avctx = priv->avctx;
+    struct priv *priv = sh->context;
 
-    int len = -1;
-    while (len < minlen) {
-        if (!priv->output_left) {
-            if (decode_new_packet(sh_audio) < 0)
-                break;
-            continue;
-        }
-        if (setup_format(sh_audio, avctx))
-            return len;
-        int size = (minlen - len + priv->unitsize - 1);
-        size -= size % priv->unitsize;
-        size = FFMIN(size, priv->output_left);
-        if (size > maxlen)
-            abort();
-        memcpy(buf, priv->output, size);
-        priv->output += size;
-        priv->output_left -= size;
-        if (len < 0)
-            len = size;
-        else
-            len += size;
-        buf += size;
-        maxlen -= size;
-        sh_audio->pts_bytes += size;
+    if (!priv->frame.samples) {
+        if (decode_new_packet(sh) < 0)
+            return -1;
     }
-    return len;
+
+    if (!mp_audio_config_equals(buffer, &priv->frame))
+        return 0;
+
+    buffer->samples = MPMIN(priv->frame.samples, maxlen);
+    mp_audio_copy(buffer, 0, &priv->frame, 0, buffer->samples);
+    mp_audio_skip_samples(&priv->frame, buffer->samples);
+    sh->pts_offset += buffer->samples;
+    return 0;
 }
 
 static void add_decoders(struct mp_decoder_list *list)
diff --git a/audio/decode/ad_mpg123.c b/audio/decode/ad_mpg123.c
index 609e68f1c8..322f45826f 100644
--- a/audio/decode/ad_mpg123.c
+++ b/audio/decode/ad_mpg123.c
@@ -35,7 +35,9 @@
 
 struct ad_mpg123_context {
     mpg123_handle *handle;
-    char new_format;
+    bool new_format;
+    int sample_size;
+    bool need_data;
     /* Running mean for bit rate, stream length estimation. */
     float mean_rate;
     unsigned int mean_count;
@@ -45,6 +47,17 @@ struct ad_mpg123_context {
     char vbr;
 };
 
+static void uninit(sh_audio_t *sh)
+{
+    struct ad_mpg123_context *con = (struct ad_mpg123_context*) sh->context;
+
+    mpg123_close(con->handle);
+    mpg123_delete(con->handle);
+    talloc_free(sh->context);
+    sh->context = NULL;
+    mpg123_exit();
+}
+
 /* This initializes libmpg123 and prepares the handle, including funky
  * parameters. */
 static int preinit(sh_audio_t *sh)
@@ -58,7 +71,7 @@ static int preinit(sh_audio_t *sh)
     if (mpg123_init() != MPG123_OK)
         return 0;
 
-    sh->context = malloc(sizeof(struct ad_mpg123_context));
+    sh->context = talloc_zero(NULL, struct ad_mpg123_context);
     con = sh->context;
     /* Auto-choice of optimized decoder (first argument NULL). */
     con->handle = mpg123_new(NULL, &err);
@@ -92,7 +105,12 @@ static int preinit(sh_audio_t *sh)
 
     /* Prevent funky automatic resampling.
      * This way, we can be sure that one frame will never produce
-     * more than 1152 stereo samples. */
+     * more than 1152 stereo samples.
+     * Background:
+     * Going to decode directly to the output buffer. It is important to have
+     * MPG123_AUTO_RESAMPLE disabled for the buffer size being an all-time
+     * limit.
+     * We need at least 1152 samples. dec_audio.c normally guarantees this. */
     mpg123_param(con->handle, MPG123_REMOVE_FLAGS, MPG123_AUTO_RESAMPLE, 0.);
 
     return 1;
@@ -105,77 +123,21 @@ static int preinit(sh_audio_t *sh)
         mp_msg(MSGT_DECAUDIO, MSGL_ERR, "mpg123 preinit error: %s\n",
                mpg123_strerror(con->handle));
 
-    if (con->handle)
-        mpg123_delete(con->handle);
-    mpg123_exit();
-    free(sh->context);
-    sh->context = NULL;
+    uninit(sh);
     return 0;
 }
 
-/* Compute bitrate from frame size. */
-static int compute_bitrate(struct mpg123_frameinfo *i)
-{
-    static const int samples_per_frame[4][4] = {
-        {-1, 384, 1152, 1152},  /* MPEG 1 */
-        {-1, 384, 1152,  576},  /* MPEG 2 */
-        {-1, 384, 1152,  576},  /* MPEG 2.5 */
-        {-1,  -1,   -1,   -1},  /* Unknown */
-    };
-    return (int) ((i->framesize + 4) * 8 * i->rate * 0.001 /
-                  samples_per_frame[i->version][i->layer] + 0.5);
-}
-
-/* Opted against the header printout from old mp3lib, too much
- * irrelevant info. This is modelled after the mpg123 app's
- * standard output line.
- * If more verbosity is demanded, one can add more detail and
- * also throw in ID3v2 info which libmpg123 collects anyway. */
-static void print_header_compact(struct mpg123_frameinfo *i)
-{
-    static const char *smodes[5] = {
-        "stereo", "joint-stereo", "dual-channel", "mono", "invalid"
-    };
-    static const char *layers[4] = {
-        "Unknown", "I", "II", "III"
-    };
-    static const char *versions[4] = {
-        "1.0", "2.0", "2.5", "x.x"
-    };
-
-    mp_msg(MSGT_DECAUDIO, MSGL_V, "MPEG %s layer %s, ",
-           versions[i->version], layers[i->layer]);
-    switch (i->vbr) {
-    case MPG123_CBR:
-        if (i->bitrate)
-            mp_msg(MSGT_DECAUDIO, MSGL_V, "%d kbit/s", i->bitrate);
-        else
-            mp_msg(MSGT_DECAUDIO, MSGL_V, "%d kbit/s (free format)",
-                   compute_bitrate(i));
-        break;
-    case MPG123_VBR:
-        mp_msg(MSGT_DECAUDIO, MSGL_V, "VBR");
-        break;
-    case MPG123_ABR:
-        mp_msg(MSGT_DECAUDIO, MSGL_V, "%d kbit/s ABR", i->abr_rate);
-        break;
-    default:
-        mp_msg(MSGT_DECAUDIO, MSGL_V, "???");
-    }
-    mp_msg(MSGT_DECAUDIO, MSGL_V, ", %ld Hz %s\n", i->rate,
-           smodes[i->mode]);
-}
-
 /* libmpg123 has a new format ready; query and store, return return value
    of mpg123_getformat() */
-static int set_format(sh_audio_t *sh, struct ad_mpg123_context *con)
+static int set_format(sh_audio_t *sh)
 {
+    struct ad_mpg123_context *con = sh->context;
     int ret;
     long rate;
     int channels;
     int encoding;
     ret = mpg123_getformat(con->handle, &rate, &channels, &encoding);
-    if(ret == MPG123_OK) {
+    if (ret == MPG123_OK) {
         mp_chmap_from_channels(&sh->channels, channels);
         sh->samplerate = rate;
         /* Without external force, mpg123 will always choose signed encoding,
@@ -189,13 +151,10 @@ static int set_format(sh_audio_t *sh, struct ad_mpg123_context *con)
         case MPG123_ENC_SIGNED_16:
             sh->sample_format = AF_FORMAT_S16_NE;
             break;
-        /* To stay compatible with the oldest libmpg123 headers, do not rely
-         * on float and 32 bit encoding symbols being defined.
-         * Those formats came later */
-        case 0x1180: /* MPG123_ENC_SIGNED_32 */
+        case MPG123_ENC_SIGNED_32:
             sh->sample_format = AF_FORMAT_S32_NE;
             break;
-        case 0x200: /* MPG123_ENC_FLOAT_32 */
+        case MPG123_ENC_FLOAT_32:
             sh->sample_format = AF_FORMAT_FLOAT_NE;
             break;
         default:
@@ -204,135 +163,38 @@ static int set_format(sh_audio_t *sh, struct ad_mpg123_context *con)
                    "Bad encoding from mpg123: %i.\n", encoding);
             return MPG123_ERR;
         }
-        /* Going to decode directly to MPlayer's memory. It is important
-         * to have MPG123_AUTO_RESAMPLE disabled for the buffer size
-         * being an all-time limit. */
-        sh->audio_out_minsize = 1152 * 2 * (af_fmt2bits(sh->sample_format) / 8);
+        con->sample_size = channels * (af_fmt2bits(sh->sample_format) / 8);
         con->new_format = 0;
     }
     return ret;
 }
 
-/* This tries to extract a requested amount of decoded data.
- * Even when you request 0 bytes, it will feed enough input so that
- * the decoder _could_ have delivered something.
- * Returns byte count >= 0, -1 on error.
- *
- * Thoughts on exact pts keeping:
- * We have to assume that MPEG frames are cut in pieces by packet boundaries.
- * Also, it might be possible that the first packet does not contain enough
- * data to ensure initial stream sync... or re-sync on erroneous streams.
- * So we need something robust to relate the decoded byte count to the correct
- * time stamp. This is tricky, though. From the outside, you cannot tell if,
- * after having fed two packets until the first output arrives, one should
- * start counting from the first packet's pts or the second packet's.
- * So, let's just count from the last fed package's pts. If the packets are
- * exactly cut to MPEG frames, this will cause one frame mismatch in the
- * beginning (when mpg123 peeks ahead for the following header), but will
- * be corrected with the third frame already. One might add special code to
- * not increment the base pts past the first packet's after a resync before
- * the first decoded bytes arrived. */
-static int decode_a_bit(sh_audio_t *sh, unsigned char *buf, int count)
+static int feed_new_packet(sh_audio_t *sh)
 {
-    int ret = MPG123_OK;
-    int got = 0;
     struct ad_mpg123_context *con = sh->context;
+    int ret;
 
-    /* There will be one MPG123_NEW_FORMAT message on first open.
-     * This will be handled in init(). */
-    do {
-        size_t got_now = 0;
-        /* Fetch new format now, after old data has been used. */
-        if(con->new_format)
-            ret = set_format(sh, con);
-
-        /* Feed the decoder. This will only fire from the second round on. */
-        if (ret == MPG123_NEED_MORE) {
-            /* Feed more input data. */
-            struct demux_packet *pkt = demux_read_packet(sh->gsh);
-            if (!pkt)
-                break;          /* Apparently that's it. EOF. */
-
-            /* Next bytes from that presentation time. */
-            if (pkt->pts != MP_NOPTS_VALUE) {
-                sh->pts       = pkt->pts;
-                sh->pts_bytes = 0;
-            }
-
-            /* Have to use mpg123_feed() to avoid decoding here. */
-            ret = mpg123_feed(con->handle, pkt->buffer, pkt->len);
-            talloc_free(pkt);
-            if (ret == MPG123_ERR)
-                break;
-
-            /* Indication of format change is possible here (from mpg123_decode()). */
-            if(ret == MPG123_NEW_FORMAT) {
-                con->new_format = 1;
-                if(got)
-                    break; /* Do not switch format during a chunk. */
-
-                ret = set_format(sh, con);
-            }
-        }
-        /* Theoretically, mpg123 could return MPG123_DONE, so be prepared.
-         * Should not happen in our usage, but it is a valid return code. */
-        else if (ret == MPG123_ERR || ret == MPG123_DONE)
-            break;
-
-        /* Try to decode a bit. This is the return value that counts
-         * for the loop condition. */
-        if (!buf) { /* fake call just for feeding to get format */
-            ret = set_format(sh, con);
-        } else { /* This is the decoding. One frame at a time. */
-            ret = mpg123_replace_buffer(con->handle, buf, count);
-            if (ret == MPG123_OK)
-                ret = mpg123_decode_frame(con->handle, NULL, NULL, &got_now);
-        }
-
-        got += got_now;
-        sh->pts_bytes += got_now;
-
-        /* Indication of format change should happen here. */
-        if(ret == MPG123_NEW_FORMAT) {
-            con->new_format = 1;
-            if(got)
-                break; /* Do not switch format during a chunk. */
-
-            ret = set_format(sh, con);
-        }
-
-    } while (ret == MPG123_NEED_MORE || (got == 0 && count != 0));
+    struct demux_packet *pkt = demux_read_packet(sh->gsh);
+    if (!pkt)
+        return -1; /* EOF. */
 
-    if (ret == MPG123_ERR) {
-        mp_msg(MSGT_DECAUDIO, MSGL_ERR, "mpg123 decoding failed: %s\n",
-               mpg123_strerror(con->handle));
+    /* Next bytes from that presentation time. */
+    if (pkt->pts != MP_NOPTS_VALUE) {
+        sh->pts        = pkt->pts;
+        sh->pts_offset = 0;
     }
 
-    return got;
-}
+    /* Have to use mpg123_feed() to avoid decoding here. */
+    ret = mpg123_feed(con->handle, pkt->buffer, pkt->len);
+    talloc_free(pkt);
 
-/* Close, reopen stream. Feed data until we know the format of the stream.
- * 1 on success, 0 on error */
-static int reopen_stream(sh_audio_t *sh)
-{
-    struct ad_mpg123_context *con = (struct ad_mpg123_context*) sh->context;
+    if (ret == MPG123_ERR)
+        return -1;
 
-    mpg123_close(con->handle);
-    /* No resetting of the context:
-     * We do not want to loose the mean bitrate data. */
-
-    /* Open and make sure we have fed enough data to get stream properties. */
-    if (MPG123_OK == mpg123_open_feed(con->handle) &&
-        /* Feed data until mpg123 is ready (has found stream beginning). */
-        !decode_a_bit(sh, NULL, 0) &&
-        set_format(sh, con) == MPG123_OK) { /* format setting again just for return value */
-        return 1;
-    } else {
-        mp_msg(MSGT_DECAUDIO, MSGL_ERR,
-               "mpg123 failed to reopen stream: %s\n",
-               mpg123_strerror(con->handle));
-        return 0;
-    }
+    if (ret == MPG123_NEW_FORMAT)
+        con->new_format = 1;
+
+    return 0;
 }
 
 /* Now we really start accessing some data and determining file format.
@@ -341,49 +203,59 @@ static int reopen_stream(sh_audio_t *sh)
  * erros in other places simply cannot occur. */
 static int init(sh_audio_t *sh, const char *decoder)
 {
-    mpg123_id3v2 *v2;
-    struct mpg123_frameinfo finfo;
     struct ad_mpg123_context *con = sh->context;
+    int ret;
 
-    con->new_format = 0;
-    if (reopen_stream(sh) &&
-        /* Get MPEG header info. */
-        MPG123_OK == mpg123_info(con->handle, &finfo) &&
-        /* Since we queried format, mpg123 should have read past ID3v2 tags.
-         * We need to decide if printing of UTF-8 encoded text info is wanted. */
-        MPG123_OK == mpg123_id3(con->handle, NULL, &v2)) {
-        /* If we are here, we passed all hurdles. Yay! Extract the info. */
-        print_header_compact(&finfo);
-        /* Do we want to print out the UTF-8 Id3v2 info?
-        if (v2)
-            print_id3v2(v2); */
-
-        /* Have kb/s, want B/s
-         * For VBR, the first frame will be a bad estimate. */
-        sh->i_bps = (finfo.bitrate ? finfo.bitrate : compute_bitrate(&finfo))
-                    * 1000 / 8;
-        con->delay      = 1;
-        con->mean_rate  = 0.;
-        con->mean_count = 0;
-        con->vbr = (finfo.vbr != MPG123_CBR);
+    ret = mpg123_open_feed(con->handle);
+    if (ret != MPG123_OK)
+        goto fail;
+
+    for (int n = 0; ; n++) {
+        if (feed_new_packet(sh) < 0) {
+            ret = MPG123_NEED_MORE;
+            goto fail;
+        }
+        size_t got_now = 0;
+        ret = mpg123_decode_frame(con->handle, NULL, NULL, &got_now);
+        if (ret == MPG123_OK || ret == MPG123_NEW_FORMAT) {
+            ret = set_format(sh);
+            if (ret == MPG123_OK)
+                break;
+        }
+        if (ret != MPG123_NEED_MORE)
+            goto fail;
+        // max. 16 retries (randomly chosen number)
+        if (n > 16) {
+            ret = MPG123_NEED_MORE;
+            goto fail;
+        }
+    }
+
+    return 1;
 
-        return 1;
+fail:
+    if (ret == MPG123_NEED_MORE) {
+        mp_msg(MSGT_DECAUDIO, MSGL_ERR, "Could not find mp3 stream.\n");
     } else {
         mp_msg(MSGT_DECAUDIO, MSGL_ERR, "mpg123 init error: %s\n",
                mpg123_strerror(con->handle));
-        return 0;
     }
+
+    uninit(sh);
+    return 0;
 }
 
-static void uninit(sh_audio_t *sh)
+/* Compute bitrate from frame size. */
+static int compute_bitrate(struct mpg123_frameinfo *i)
 {
-    struct ad_mpg123_context *con = (struct ad_mpg123_context*) sh->context;
-
-    mpg123_close(con->handle);
-    mpg123_delete(con->handle);
-    free(sh->context);
-    sh->context = NULL;
-    mpg123_exit();
+    static const int samples_per_frame[4][4] = {
+        {-1, 384, 1152, 1152},  /* MPEG 1 */
+        {-1, 384, 1152,  576},  /* MPEG 2 */
+        {-1, 384, 1152,  576},  /* MPEG 2.5 */
+        {-1,  -1,   -1,   -1},  /* Unknown */
+    };
+    return (int) ((i->framesize + 4) * 8 * i->rate * 0.001 /
+                  samples_per_frame[i->version][i->layer] + 0.5);
 }
 
 /* Update mean bitrate. This could be dropped if accurate time display
@@ -391,9 +263,12 @@ static void uninit(sh_audio_t *sh)
 static void update_info(sh_audio_t *sh)
 {
     struct ad_mpg123_context *con = sh->context;
-    if (con->vbr && --con->delay < 1) {
-        struct mpg123_frameinfo finfo;
-        if (MPG123_OK == mpg123_info(con->handle, &finfo)) {
+    struct mpg123_frameinfo finfo;
+    if (mpg123_info(con->handle, &finfo) != MPG123_OK)
+        return;
+
+    if (finfo.vbr != MPG123_CBR) {
+        if (--con->delay < 1) {
             if (++con->mean_count > ((unsigned int) -1) / 2)
                 con->mean_count = ((unsigned int) -1) / 4;
 
@@ -404,42 +279,80 @@ static void update_info(sh_audio_t *sh)
 
             con->delay = 10;
         }
+    } else {
+        sh->i_bps = (finfo.bitrate ? finfo.bitrate : compute_bitrate(&finfo))
+                    * 1000 / 8;
+        con->delay      = 1;
+        con->mean_rate  = 0.;
+        con->mean_count = 0;
     }
 }
 
-static int decode_audio(sh_audio_t *sh, unsigned char *buf, int minlen,
-                        int maxlen)
+static int decode_audio(sh_audio_t *sh, struct mp_audio *buffer, int maxlen)
 {
-    int bytes;
+    struct ad_mpg123_context *con = sh->context;
+    void *buf = buffer->planes[0];
+    int ret;
 
-    bytes = decode_a_bit(sh, buf, maxlen);
-    /* This EOF is ignored, apparently, until input data is exhausted. */
-    if (bytes == 0)
-        return -1;              /* EOF */
+    if (con->new_format) {
+        ret = set_format(sh);
+        if (ret == MPG123_OK) {
+            return 0; // let caller handle format change
+        } else if (ret == MPG123_NEED_MORE) {
+            con->need_data = true;
+        } else {
+            goto mpg123_fail;
+        }
+    }
+
+    if (con->need_data) {
+        if (feed_new_packet(sh) < 0)
+            return -1;
+    }
+
+    size_t got_now = 0;
+    ret = mpg123_replace_buffer(con->handle, buf, maxlen * con->sample_size);
+    if (ret != MPG123_OK)
+        goto mpg123_fail;
+
+    ret = mpg123_decode_frame(con->handle, NULL, NULL, &got_now);
+
+    int got_samples = got_now / con->sample_size;
+    buffer->samples += got_samples;
+    sh->pts_offset += got_samples;
+
+    if (ret == MPG123_NEW_FORMAT) {
+        con->new_format = true;
+    } else if (ret == MPG123_NEED_MORE) {
+        con->need_data = true;
+    } else if (ret != MPG123_OK && ret != MPG123_DONE) {
+        goto mpg123_fail;
+    }
 
     update_info(sh);
-    return bytes;
+    return 0;
+
+mpg123_fail:
+    mp_msg(MSGT_DECAUDIO, MSGL_ERR, "mpg123 decoding error: %s\n",
+           mpg123_strerror(con->handle));
+    return -1;
 }
 
 static int control(sh_audio_t *sh, int cmd, void *arg)
 {
+    struct ad_mpg123_context *con = sh->context;
+
     switch (cmd) {
     case ADCTRL_RESYNC_STREAM:
-        /* Close/reopen the stream for mpg123 to make sure it doesn't
-         * think that it still knows the exact stream position.
-         * Otherwise, we would have funny effects from the gapless code.
-         * Oh, and it helps to minimize artifacts from jumping in the stream. */
-        if (reopen_stream(sh)) {
-            update_info(sh);
-            return CONTROL_TRUE;
-        } else {
-            /* MPlayer ignores this case! It just keeps on decoding.
-             * So we have to make sure resync never fails ... */
+        mpg123_close(con->handle);
+
+        if (mpg123_open_feed(con->handle) != MPG123_OK) {
             mp_msg(MSGT_DECAUDIO, MSGL_ERR,
-                   "mpg123 cannot reopen stream for resync.\n");
+                   "mpg123 failed to reopen stream: %s\n",
+                   mpg123_strerror(con->handle));
             return CONTROL_FALSE;
         }
-        break;
+        return CONTROL_TRUE;
     }
     return CONTROL_UNKNOWN;
 }
diff --git a/audio/decode/ad_spdif.c b/audio/decode/ad_spdif.c
index f03041d6a6..a233286c19 100644
--- a/audio/decode/ad_spdif.c
+++ b/audio/decode/ad_spdif.c
@@ -19,6 +19,7 @@
  */
 
 #include <string.h>
+#include <assert.h>
 
 #include <libavformat/avformat.h>
 #include <libavcodec/avcodec.h>
@@ -184,37 +185,43 @@ fail:
     return 0;
 }
 
-static int decode_audio(sh_audio_t *sh, unsigned char *buf,
-                        int minlen, int maxlen)
+static int decode_audio(sh_audio_t *sh, struct mp_audio *buffer, int maxlen)
 {
     struct spdifContext *spdif_ctx = sh->context;
     AVFormatContext     *lavf_ctx  = spdif_ctx->lavf_ctx;
 
+    int sstride = 2 * sh->channels.num;
+    assert(sstride == buffer->sstride);
+
+    if (maxlen < spdif_ctx->iec61937_packet_size)
+        return 0;
+
     spdif_ctx->out_buffer_len  = 0;
     spdif_ctx->out_buffer_size = maxlen;
-    spdif_ctx->out_buffer      = buf;
-    while (spdif_ctx->out_buffer_len + spdif_ctx->iec61937_packet_size < maxlen
-           && spdif_ctx->out_buffer_len < minlen) {
-        struct demux_packet *mpkt = demux_read_packet(sh->gsh);
-        if (!mpkt)
-            break;
-        AVPacket pkt;
-        mp_set_av_packet(&pkt, mpkt);
-        pkt.pts = pkt.dts = 0;
-        mp_msg(MSGT_DECAUDIO, MSGL_V, "spdif packet, size=%d\n", pkt.size);
-        if (mpkt->pts != MP_NOPTS_VALUE) {
-            sh->pts       = mpkt->pts;
-            sh->pts_bytes = 0;
-        }
-        int out_len = spdif_ctx->out_buffer_len;
-        int ret = av_write_frame(lavf_ctx, &pkt);
-        avio_flush(lavf_ctx->pb);
-        sh->pts_bytes += spdif_ctx->out_buffer_len - out_len;
-        talloc_free(mpkt);
-        if (ret < 0)
-            break;
+    spdif_ctx->out_buffer      = buffer->planes[0];
+
+    struct demux_packet *mpkt = demux_read_packet(sh->gsh);
+    if (!mpkt)
+        return 0;
+
+    AVPacket pkt;
+    mp_set_av_packet(&pkt, mpkt);
+    pkt.pts = pkt.dts = 0;
+    mp_msg(MSGT_DECAUDIO, MSGL_V, "spdif packet, size=%d\n", pkt.size);
+    if (mpkt->pts != MP_NOPTS_VALUE) {
+        sh->pts        = mpkt->pts;
+        sh->pts_offset = 0;
     }
-    return spdif_ctx->out_buffer_len;
+    int out_len = spdif_ctx->out_buffer_len;
+    int ret = av_write_frame(lavf_ctx, &pkt);
+    avio_flush(lavf_ctx->pb);
+    sh->pts_offset += (spdif_ctx->out_buffer_len - out_len) / sstride;
+    talloc_free(mpkt);
+    if (ret < 0)
+        return -1;
+
+    buffer->samples = spdif_ctx->out_buffer_len / sstride;
+    return 0;
 }
 
 static int control(sh_audio_t *sh, int cmd, void *arg)
diff --git a/audio/decode/dec_audio.c b/audio/decode/dec_audio.c
index 6c4af89ef9..19b5d8bdeb 100644
--- a/audio/decode/dec_audio.c
+++ b/audio/decode/dec_audio.c
@@ -38,6 +38,7 @@
 #include "dec_audio.h"
 #include "ad.h"
 #include "audio/format.h"
+#include "audio/audio.h"
 #include "audio/audio_buffer.h"
 
 #include "audio/filter/af.h"
@@ -55,31 +56,29 @@ static const struct ad_functions * const ad_drivers[] = {
     NULL
 };
 
+// At least ad_mpg123 needs to be able to decode this many samples at once
+#define DECODE_MAX_UNIT 1152
+
+// At least 8192 samples, plus hack for ad_mpg123
+#define DECODE_BUFFER_SAMPLES (8192 + DECODE_MAX_UNIT)
+
+// Drop audio buffer and reinit it (after format change)
+static void reinit_audio_buffer(sh_audio_t *sh)
+{
+    mp_audio_buffer_reinit_fmt(sh->decode_buffer, sh->sample_format,
+                               &sh->channels, sh->samplerate);
+    mp_audio_buffer_preallocate_min(sh->decode_buffer, DECODE_BUFFER_SAMPLES);
+}
+
 static int init_audio_codec(sh_audio_t *sh_audio, const char *decoder)
 {
     assert(!sh_audio->initialized);
     resync_audio_stream(sh_audio);
-    sh_audio->sample_format = AF_FORMAT_FLOAT_NE;
-    sh_audio->audio_out_minsize = 8192; // default, preinit() may change it
     if (!sh_audio->ad_driver->preinit(sh_audio)) {
         mp_tmsg(MSGT_DECAUDIO, MSGL_ERR, "Audio decoder preinit failed.\n");
         return 0;
     }
 
-    const int base_size = 65536;
-    // At least 64 KiB plus rounding up to next decodable unit size
-    sh_audio->a_buffer_size = base_size + sh_audio->audio_out_minsize;
-
-    mp_tmsg(MSGT_DECAUDIO, MSGL_V,
-            "dec_audio: Allocating %d + %d = %d bytes for output buffer.\n",
-            sh_audio->audio_out_minsize, base_size,
-            sh_audio->a_buffer_size);
-
-    sh_audio->a_buffer = av_mallocz(sh_audio->a_buffer_size);
-    if (!sh_audio->a_buffer)
-        abort();
-    sh_audio->a_buffer_len = 0;
-
     if (!sh_audio->ad_driver->init(sh_audio, decoder)) {
         mp_tmsg(MSGT_DECAUDIO, MSGL_V, "Audio decoder init failed.\n");
         uninit_audio(sh_audio); // free buffers
@@ -88,13 +87,18 @@ static int init_audio_codec(sh_audio_t *sh_audio, const char *decoder)
 
     sh_audio->initialized = 1;
 
-    if (mp_chmap_is_empty(&sh_audio->channels) || !sh_audio->samplerate) {
+    if (mp_chmap_is_empty(&sh_audio->channels) || !sh_audio->samplerate ||
+        !sh_audio->sample_format)
+    {
         mp_tmsg(MSGT_DECAUDIO, MSGL_ERR, "Audio decoder did not specify "
                 "audio format!\n");
         uninit_audio(sh_audio); // free buffers
         return 0;
     }
 
+    sh_audio->decode_buffer = mp_audio_buffer_create(NULL);
+    reinit_audio_buffer(sh_audio);
+
     return 1;
 }
 
@@ -188,7 +192,8 @@ void uninit_audio(sh_audio_t *sh_audio)
     }
     talloc_free(sh_audio->gsh->decoder_desc);
     sh_audio->gsh->decoder_desc = NULL;
-    av_freep(&sh_audio->a_buffer);
+    talloc_free(sh_audio->decode_buffer);
+    sh_audio->decode_buffer = NULL;
 }
 
 
@@ -235,37 +240,44 @@ int init_audio_filters(sh_audio_t *sh_audio, int in_samplerate,
 static int filter_n_bytes(sh_audio_t *sh, struct mp_audio_buffer *outbuf,
                           int len)
 {
-    assert(len - 1 + sh->audio_out_minsize <= sh->a_buffer_size);
-
     int error = 0;
 
-    // Decode more bytes if needed
-    int old_samplerate = sh->samplerate;
-    struct mp_chmap old_channels = sh->channels;
-    int old_sample_format = sh->sample_format;
-    while (sh->a_buffer_len < len) {
-        unsigned char *buf = sh->a_buffer + sh->a_buffer_len;
-        int minlen = len - sh->a_buffer_len;
-        int maxlen = sh->a_buffer_size - sh->a_buffer_len;
-        int ret = sh->ad_driver->decode_audio(sh, buf, minlen, maxlen);
-        int format_change = sh->samplerate != old_samplerate
-                            || !mp_chmap_equals(&sh->channels, &old_channels)
-                            || sh->sample_format != old_sample_format;
-        if (ret <= 0 || format_change) {
-            error = format_change ? -2 : -1;
-            // samples from format-changing call get discarded too
-            len = sh->a_buffer_len;
+    struct mp_audio config;
+    mp_audio_buffer_get_format(sh->decode_buffer, &config);
+
+    while (mp_audio_buffer_samples(sh->decode_buffer) < len) {
+        int maxlen = mp_audio_buffer_get_write_available(sh->decode_buffer);
+        if (maxlen < DECODE_MAX_UNIT)
+            break;
+        struct mp_audio buffer;
+        mp_audio_buffer_get_write_buffer(sh->decode_buffer, maxlen, &buffer);
+        buffer.samples = 0;
+        error = sh->ad_driver->decode_audio(sh, &buffer, maxlen);
+        if (error < 0)
+            break;
+        // Commit the data just read as valid data
+        mp_audio_buffer_finish_write(sh->decode_buffer, buffer.samples);
+        // Format change
+        if (sh->samplerate != config.rate ||
+            !mp_chmap_equals(&sh->channels, &config.channels) ||
+            sh->sample_format != config.format)
+        {
+            // If there are still samples left in the buffer, let them drain
+            // first, and don't signal a format change to the caller yet.
+            if (mp_audio_buffer_samples(sh->decode_buffer) > 0)
+                break;
+            reinit_audio_buffer(sh);
+            error = -2;
             break;
         }
-        sh->a_buffer_len += ret;
     }
 
     // Filter
-    struct mp_audio filter_input = {
-        .planes = {sh->a_buffer},
-    };
-    mp_audio_copy_config(&filter_input, &sh->afilter->input);
-    filter_input.samples = len / filter_input.sstride;
+    struct mp_audio filter_input;
+    mp_audio_buffer_peek(sh->decode_buffer, &filter_input);
+    filter_input.rate = sh->afilter->input.rate; // due to playback speed change
+    len = MPMIN(filter_input.samples, len);
+    filter_input.samples = len;
 
     struct mp_audio *filter_output = af_play(sh->afilter, &filter_input);
     if (!filter_output)
@@ -273,8 +285,7 @@ static int filter_n_bytes(sh_audio_t *sh, struct mp_audio_buffer *outbuf,
     mp_audio_buffer_append(outbuf, filter_output);
 
     // remove processed data from decoder buffer:
-    sh->a_buffer_len -= len;
-    memmove(sh->a_buffer, sh->a_buffer + len, sh->a_buffer_len);
+    mp_audio_buffer_skip(sh->decode_buffer, len);
 
     return error;
 }
@@ -289,33 +300,27 @@ int decode_audio(sh_audio_t *sh_audio, struct mp_audio_buffer *outbuf,
 {
     // Indicates that a filter seems to be buffering large amounts of data
     int huge_filter_buffer = 0;
-    int sstride =
-        af_fmt2bits(sh_audio->sample_format) / 8 * sh_audio->channels.num;
-    // Decoded audio must be cut at boundaries of this many bytes
-    int unitsize = sstride * 16;
+    // Decoded audio must be cut at boundaries of this many samples
+    // (Note: the reason for this is unknown, possibly a refactoring artifact)
+    int unitsize = 16;
 
     /* Filter output size will be about filter_multiplier times input size.
      * If some filter buffers audio in big blocks this might only hold
      * as average over time. */
     double filter_multiplier = af_calc_filter_multiplier(sh_audio->afilter);
 
-    /* If the decoder set audio_out_minsize then it can do the equivalent of
-     * "while (output_len < target_len) output_len += audio_out_minsize;",
-     * so we must guarantee there is at least audio_out_minsize-1 bytes
-     * more space in the output buffer than the minimum length we try to
-     * decode. */
-    int max_decode_len = sh_audio->a_buffer_size - sh_audio->audio_out_minsize;
-    if (!unitsize)
-        return -1;
-    max_decode_len -= max_decode_len % unitsize;
+    int prev_buffered = -1;
+    while (minsamples >= 0) {
+        int buffered = mp_audio_buffer_samples(outbuf);
+        if (minsamples < buffered || buffered == prev_buffered)
+            break;
+        prev_buffered = buffered;
 
-    while (minsamples >= 0 && mp_audio_buffer_samples(outbuf) < minsamples) {
-        int decsamples = (minsamples - mp_audio_buffer_samples(outbuf))
-                         / filter_multiplier;
-        int declen = decsamples * sstride;
+        int decsamples = (minsamples - buffered) / filter_multiplier;
         // + some extra for possible filter buffering
-        declen += unitsize << 5;
-        if (huge_filter_buffer)
+        decsamples += 1 << unitsize;
+
+        if (huge_filter_buffer) {
             /* Some filter must be doing significant buffering if the estimated
              * input length didn't produce enough output from filters.
              * Feed the filters 2k bytes at a time until we have enough output.
@@ -324,15 +329,14 @@ int decode_audio(sh_audio_t *sh_audio, struct mp_audio_buffer *outbuf,
              * to get audio data and buffer video frames in memory while doing
              * so. However the performance impact of either is probably not too
              * significant as long as the value is not completely insane. */
-            declen = 2000;
-        declen -= declen % unitsize;
-        if (declen > max_decode_len)
-            declen = max_decode_len;
-        else
-            /* if this iteration does not fill buffer, we must have lots
-             * of buffering in filters */
-            huge_filter_buffer = 1;
-        int res = filter_n_bytes(sh_audio, outbuf, declen);
+            decsamples = 2000;
+        }
+
+        /* if this iteration does not fill buffer, we must have lots
+         * of buffering in filters */
+        huge_filter_buffer = 1;
+
+        int res = filter_n_bytes(sh_audio, outbuf, decsamples);
         if (res < 0)
             return res;
     }
@@ -342,6 +346,7 @@ int decode_audio(sh_audio_t *sh_audio, struct mp_audio_buffer *outbuf,
 void resync_audio_stream(sh_audio_t *sh_audio)
 {
     sh_audio->pts = MP_NOPTS_VALUE;
+    sh_audio->pts_offset = 0;
     if (!sh_audio->initialized)
         return;
     sh_audio->ad_driver->control(sh_audio, ADCTRL_RESYNC_STREAM, NULL);
-- 
cgit v1.2.3