audio: add support for using non-interleaved audio from decoders directly

Most libavcodec decoders output non-interleaved audio. Add direct support for this, and remove the hack that repacked non-interleaved audio back to packed audio. Remove the minlen argument from the decoder callback. Instead of forcing every decoder to have its own decode loop to fill the buffer until minlen is reached, leave this to the caller. So if a decoder doesn't return enough data, it's simply called again. (In future, I even want to change it so that decoders don't read packets directly, but instead the caller has to pass packets to the decoders. This fits well with this change, because now the decoder callback typically decodes at most one packet.) ad_mpg123.c receives some heavy refactoring. The main problem is that it wanted to handle format changes when there was no data in the decode output buffer yet. This sounds reasonable, but actually it would write data into a buffer prepared for old data, since the caller doesn't know about the format change yet. (I.e. the best place for a format change would be _after_ writing the last sample to the output buffer.) It's possible that this code was not perfectly sane before this commit, and perhaps lost one frame of data after a format change, but I didn't confirm this. Trying to fix this, I ended up rewriting the decoding and also the probing.
author: wm4 <wm4@nowhere> 2013-11-12 22:27:44 +0100
committer: wm4 <wm4@nowhere> 2013-11-12 23:39:09 +0100
commit: 22b3f522cacfbdba76d311c86efd6091512eb089 (patch)
tree: 1105af44a9403bde554cd4b6041d05ceea4fb39a /audio/decode/ad_mpg123.c
parent: 5388a0cd4062ba24f5382f025552422fb6430906 (diff)
download: mpv-22b3f522cacfbdba76d311c86efd6091512eb089.tar.bz2
mpv-22b3f522cacfbdba76d311c86efd6091512eb089.tar.xz
1 files changed, 153 insertions, 240 deletions
diff --git a/audio/decode/ad_mpg123.c b/audio/decode/ad_mpg123.c
index 609e68f1c8..322f45826f 100644
--- a/audio/decode/ad_mpg123.c
+++ b/audio/decode/ad_mpg123.c
@@ -35,7 +35,9 @@
 
 struct ad_mpg123_context {
     mpg123_handle *handle;
-    char new_format;
+    bool new_format;
+    int sample_size;
+    bool need_data;
     /* Running mean for bit rate, stream length estimation. */
     float mean_rate;
     unsigned int mean_count;
@@ -45,6 +47,17 @@ struct ad_mpg123_context {
     char vbr;
 };
 
+static void uninit(sh_audio_t *sh)
+{
+    struct ad_mpg123_context *con = (struct ad_mpg123_context*) sh->context;
+
+    mpg123_close(con->handle);
+    mpg123_delete(con->handle);
+    talloc_free(sh->context);
+    sh->context = NULL;
+    mpg123_exit();
+}
+
 /* This initializes libmpg123 and prepares the handle, including funky
  * parameters. */
 static int preinit(sh_audio_t *sh)
@@ -58,7 +71,7 @@ static int preinit(sh_audio_t *sh)
     if (mpg123_init() != MPG123_OK)
         return 0;
 
-    sh->context = malloc(sizeof(struct ad_mpg123_context));
+    sh->context = talloc_zero(NULL, struct ad_mpg123_context);
     con = sh->context;
     /* Auto-choice of optimized decoder (first argument NULL). */
     con->handle = mpg123_new(NULL, &err);
@@ -92,7 +105,12 @@ static int preinit(sh_audio_t *sh)
 
     /* Prevent funky automatic resampling.
      * This way, we can be sure that one frame will never produce
-     * more than 1152 stereo samples. */
+     * more than 1152 stereo samples.
+     * Background:
+     * Going to decode directly to the output buffer. It is important to have
+     * MPG123_AUTO_RESAMPLE disabled for the buffer size being an all-time
+     * limit.
+     * We need at least 1152 samples. dec_audio.c normally guarantees this. */
     mpg123_param(con->handle, MPG123_REMOVE_FLAGS, MPG123_AUTO_RESAMPLE, 0.);
 
     return 1;
@@ -105,77 +123,21 @@ static int preinit(sh_audio_t *sh)
         mp_msg(MSGT_DECAUDIO, MSGL_ERR, "mpg123 preinit error: %s\n",
                mpg123_strerror(con->handle));
 
-    if (con->handle)
-        mpg123_delete(con->handle);
-    mpg123_exit();
-    free(sh->context);
-    sh->context = NULL;
+    uninit(sh);
     return 0;
 }
 
-/* Compute bitrate from frame size. */
-static int compute_bitrate(struct mpg123_frameinfo *i)
-{
-    static const int samples_per_frame[4][4] = {
-        {-1, 384, 1152, 1152},  /* MPEG 1 */
-        {-1, 384, 1152,  576},  /* MPEG 2 */
-        {-1, 384, 1152,  576},  /* MPEG 2.5 */
-        {-1,  -1,   -1,   -1},  /* Unknown */
-    };
-    return (int) ((i->framesize + 4) * 8 * i->rate * 0.001 /
-                  samples_per_frame[i->version][i->layer] + 0.5);
-}
-
-/* Opted against the header printout from old mp3lib, too much
- * irrelevant info. This is modelled after the mpg123 app's
- * standard output line.
- * If more verbosity is demanded, one can add more detail and
- * also throw in ID3v2 info which libmpg123 collects anyway. */
-static void print_header_compact(struct mpg123_frameinfo *i)
-{
-    static const char *smodes[5] = {
-        "stereo", "joint-stereo", "dual-channel", "mono", "invalid"
-    };
-    static const char *layers[4] = {
-        "Unknown", "I", "II", "III"
-    };
-    static const char *versions[4] = {
-        "1.0", "2.0", "2.5", "x.x"
-    };
-
-    mp_msg(MSGT_DECAUDIO, MSGL_V, "MPEG %s layer %s, ",
-           versions[i->version], layers[i->layer]);
-    switch (i->vbr) {
-    case MPG123_CBR:
-        if (i->bitrate)
-            mp_msg(MSGT_DECAUDIO, MSGL_V, "%d kbit/s", i->bitrate);
-        else
-            mp_msg(MSGT_DECAUDIO, MSGL_V, "%d kbit/s (free format)",
-                   compute_bitrate(i));
-        break;
-    case MPG123_VBR:
-        mp_msg(MSGT_DECAUDIO, MSGL_V, "VBR");
-        break;
-    case MPG123_ABR:
-        mp_msg(MSGT_DECAUDIO, MSGL_V, "%d kbit/s ABR", i->abr_rate);
-        break;
-    default:
-        mp_msg(MSGT_DECAUDIO, MSGL_V, "???");
-    }
-    mp_msg(MSGT_DECAUDIO, MSGL_V, ", %ld Hz %s\n", i->rate,
-           smodes[i->mode]);
-}
-
 /* libmpg123 has a new format ready; query and store, return return value
    of mpg123_getformat() */
-static int set_format(sh_audio_t *sh, struct ad_mpg123_context *con)
+static int set_format(sh_audio_t *sh)
 {
+    struct ad_mpg123_context *con = sh->context;
     int ret;
     long rate;
     int channels;
     int encoding;
     ret = mpg123_getformat(con->handle, &rate, &channels, &encoding);
-    if(ret == MPG123_OK) {
+    if (ret == MPG123_OK) {
         mp_chmap_from_channels(&sh->channels, channels);
         sh->samplerate = rate;
         /* Without external force, mpg123 will always choose signed encoding,
@@ -189,13 +151,10 @@ static int set_format(sh_audio_t *sh, struct ad_mpg123_context *con)
         case MPG123_ENC_SIGNED_16:
             sh->sample_format = AF_FORMAT_S16_NE;
             break;
-        /* To stay compatible with the oldest libmpg123 headers, do not rely
-         * on float and 32 bit encoding symbols being defined.
-         * Those formats came later */
-        case 0x1180: /* MPG123_ENC_SIGNED_32 */
+        case MPG123_ENC_SIGNED_32:
             sh->sample_format = AF_FORMAT_S32_NE;
             break;
-        case 0x200: /* MPG123_ENC_FLOAT_32 */
+        case MPG123_ENC_FLOAT_32:
             sh->sample_format = AF_FORMAT_FLOAT_NE;
             break;
         default:
@@ -204,135 +163,38 @@ static int set_format(sh_audio_t *sh, struct ad_mpg123_context *con)
                    "Bad encoding from mpg123: %i.\n", encoding);
             return MPG123_ERR;
         }
-        /* Going to decode directly to MPlayer's memory. It is important
-         * to have MPG123_AUTO_RESAMPLE disabled for the buffer size
-         * being an all-time limit. */
-        sh->audio_out_minsize = 1152 * 2 * (af_fmt2bits(sh->sample_format) / 8);
+        con->sample_size = channels * (af_fmt2bits(sh->sample_format) / 8);
         con->new_format = 0;
     }
     return ret;
 }
 
-/* This tries to extract a requested amount of decoded data.
- * Even when you request 0 bytes, it will feed enough input so that
- * the decoder _could_ have delivered something.
- * Returns byte count >= 0, -1 on error.
- *
- * Thoughts on exact pts keeping:
- * We have to assume that MPEG frames are cut in pieces by packet boundaries.
- * Also, it might be possible that the first packet does not contain enough
- * data to ensure initial stream sync... or re-sync on erroneous streams.
- * So we need something robust to relate the decoded byte count to the correct
- * time stamp. This is tricky, though. From the outside, you cannot tell if,
- * after having fed two packets until the first output arrives, one should
- * start counting from the first packet's pts or the second packet's.
- * So, let's just count from the last fed package's pts. If the packets are
- * exactly cut to MPEG frames, this will cause one frame mismatch in the
- * beginning (when mpg123 peeks ahead for the following header), but will
- * be corrected with the third frame already. One might add special code to
- * not increment the base pts past the first packet's after a resync before
- * the first decoded bytes arrived. */
-static int decode_a_bit(sh_audio_t *sh, unsigned char *buf, int count)
+static int feed_new_packet(sh_audio_t *sh)
 {
-    int ret = MPG123_OK;
-    int got = 0;
     struct ad_mpg123_context *con = sh->context;
+    int ret;
 
-    /* There will be one MPG123_NEW_FORMAT message on first open.
-     * This will be handled in init(). */
-    do {
-        size_t got_now = 0;
-        /* Fetch new format now, after old data has been used. */
-        if(con->new_format)
-            ret = set_format(sh, con);
-
-        /* Feed the decoder. This will only fire from the second round on. */
-        if (ret == MPG123_NEED_MORE) {
-            /* Feed more input data. */
-            struct demux_packet *pkt = demux_read_packet(sh->gsh);
-            if (!pkt)
-                break;          /* Apparently that's it. EOF. */
-
-            /* Next bytes from that presentation time. */
-            if (pkt->pts != MP_NOPTS_VALUE) {
-                sh->pts       = pkt->pts;
-                sh->pts_bytes = 0;
-            }
-
-            /* Have to use mpg123_feed() to avoid decoding here. */
-            ret = mpg123_feed(con->handle, pkt->buffer, pkt->len);
-            talloc_free(pkt);
-            if (ret == MPG123_ERR)
-                break;
-
-            /* Indication of format change is possible here (from mpg123_decode()). */
-            if(ret == MPG123_NEW_FORMAT) {
-                con->new_format = 1;
-                if(got)
-                    break; /* Do not switch format during a chunk. */
-
-                ret = set_format(sh, con);
-            }
-        }
-        /* Theoretically, mpg123 could return MPG123_DONE, so be prepared.
-         * Should not happen in our usage, but it is a valid return code. */
-        else if (ret == MPG123_ERR || ret == MPG123_DONE)
-            break;
-
-        /* Try to decode a bit. This is the return value that counts
-         * for the loop condition. */
-        if (!buf) { /* fake call just for feeding to get format */
-            ret = set_format(sh, con);
-        } else { /* This is the decoding. One frame at a time. */
-            ret = mpg123_replace_buffer(con->handle, buf, count);
-            if (ret == MPG123_OK)
-                ret = mpg123_decode_frame(con->handle, NULL, NULL, &got_now);
-        }
-
-        got += got_now;
-        sh->pts_bytes += got_now;
-
-        /* Indication of format change should happen here. */
-        if(ret == MPG123_NEW_FORMAT) {
-            con->new_format = 1;
-            if(got)
-                break; /* Do not switch format during a chunk. */
-
-            ret = set_format(sh, con);
-        }
-
-    } while (ret == MPG123_NEED_MORE || (got == 0 && count != 0));
+    struct demux_packet *pkt = demux_read_packet(sh->gsh);
+    if (!pkt)
+        return -1; /* EOF. */
 
-    if (ret == MPG123_ERR) {
-        mp_msg(MSGT_DECAUDIO, MSGL_ERR, "mpg123 decoding failed: %s\n",
-               mpg123_strerror(con->handle));
+    /* Next bytes from that presentation time. */
+    if (pkt->pts != MP_NOPTS_VALUE) {
+        sh->pts        = pkt->pts;
+        sh->pts_offset = 0;
     }
 
-    return got;
-}
+    /* Have to use mpg123_feed() to avoid decoding here. */
+    ret = mpg123_feed(con->handle, pkt->buffer, pkt->len);
+    talloc_free(pkt);
 
-/* Close, reopen stream. Feed data until we know the format of the stream.
- * 1 on success, 0 on error */
-static int reopen_stream(sh_audio_t *sh)
-{
-    struct ad_mpg123_context *con = (struct ad_mpg123_context*) sh->context;
+    if (ret == MPG123_ERR)
+        return -1;
 
-    mpg123_close(con->handle);
-    /* No resetting of the context:
-     * We do not want to loose the mean bitrate data. */
-
-    /* Open and make sure we have fed enough data to get stream properties. */
-    if (MPG123_OK == mpg123_open_feed(con->handle) &&
-        /* Feed data until mpg123 is ready (has found stream beginning). */
-        !decode_a_bit(sh, NULL, 0) &&
-        set_format(sh, con) == MPG123_OK) { /* format setting again just for return value */
-        return 1;
-    } else {
-        mp_msg(MSGT_DECAUDIO, MSGL_ERR,
-               "mpg123 failed to reopen stream: %s\n",
-               mpg123_strerror(con->handle));
-        return 0;
-    }
+    if (ret == MPG123_NEW_FORMAT)
+        con->new_format = 1;
+
+    return 0;
 }
 
 /* Now we really start accessing some data and determining file format.
@@ -341,49 +203,59 @@ static int reopen_stream(sh_audio_t *sh)
  * erros in other places simply cannot occur. */
 static int init(sh_audio_t *sh, const char *decoder)
 {
-    mpg123_id3v2 *v2;
-    struct mpg123_frameinfo finfo;
     struct ad_mpg123_context *con = sh->context;
+    int ret;
 
-    con->new_format = 0;
-    if (reopen_stream(sh) &&
-        /* Get MPEG header info. */
-        MPG123_OK == mpg123_info(con->handle, &finfo) &&
-        /* Since we queried format, mpg123 should have read past ID3v2 tags.
-         * We need to decide if printing of UTF-8 encoded text info is wanted. */
-        MPG123_OK == mpg123_id3(con->handle, NULL, &v2)) {
-        /* If we are here, we passed all hurdles. Yay! Extract the info. */
-        print_header_compact(&finfo);
-        /* Do we want to print out the UTF-8 Id3v2 info?
-        if (v2)
-            print_id3v2(v2); */
-
-        /* Have kb/s, want B/s
-         * For VBR, the first frame will be a bad estimate. */
-        sh->i_bps = (finfo.bitrate ? finfo.bitrate : compute_bitrate(&finfo))
-                    * 1000 / 8;
-        con->delay      = 1;
-        con->mean_rate  = 0.;
-        con->mean_count = 0;
-        con->vbr = (finfo.vbr != MPG123_CBR);
+    ret = mpg123_open_feed(con->handle);
+    if (ret != MPG123_OK)
+        goto fail;
+
+    for (int n = 0; ; n++) {
+        if (feed_new_packet(sh) < 0) {
+            ret = MPG123_NEED_MORE;
+            goto fail;
+        }
+        size_t got_now = 0;
+        ret = mpg123_decode_frame(con->handle, NULL, NULL, &got_now);
+        if (ret == MPG123_OK || ret == MPG123_NEW_FORMAT) {
+            ret = set_format(sh);
+            if (ret == MPG123_OK)
+                break;
+        }
+        if (ret != MPG123_NEED_MORE)
+            goto fail;
+        // max. 16 retries (randomly chosen number)
+        if (n > 16) {
+            ret = MPG123_NEED_MORE;
+            goto fail;
+        }
+    }
+
+    return 1;
 
-        return 1;
+fail:
+    if (ret == MPG123_NEED_MORE) {
+        mp_msg(MSGT_DECAUDIO, MSGL_ERR, "Could not find mp3 stream.\n");
     } else {
         mp_msg(MSGT_DECAUDIO, MSGL_ERR, "mpg123 init error: %s\n",
                mpg123_strerror(con->handle));
-        return 0;
     }
+
+    uninit(sh);
+    return 0;
 }
 
-static void uninit(sh_audio_t *sh)
+/* Compute bitrate from frame size. */
+static int compute_bitrate(struct mpg123_frameinfo *i)
 {
-    struct ad_mpg123_context *con = (struct ad_mpg123_context*) sh->context;
-
-    mpg123_close(con->handle);
-    mpg123_delete(con->handle);
-    free(sh->context);
-    sh->context = NULL;
-    mpg123_exit();
+    static const int samples_per_frame[4][4] = {
+        {-1, 384, 1152, 1152},  /* MPEG 1 */
+        {-1, 384, 1152,  576},  /* MPEG 2 */
+        {-1, 384, 1152,  576},  /* MPEG 2.5 */
+        {-1,  -1,   -1,   -1},  /* Unknown */
+    };
+    return (int) ((i->framesize + 4) * 8 * i->rate * 0.001 /
+                  samples_per_frame[i->version][i->layer] + 0.5);
 }
 
 /* Update mean bitrate. This could be dropped if accurate time display
@@ -391,9 +263,12 @@ static void uninit(sh_audio_t *sh)
 static void update_info(sh_audio_t *sh)
 {
     struct ad_mpg123_context *con = sh->context;
-    if (con->vbr && --con->delay < 1) {
-        struct mpg123_frameinfo finfo;
-        if (MPG123_OK == mpg123_info(con->handle, &finfo)) {
+    struct mpg123_frameinfo finfo;
+    if (mpg123_info(con->handle, &finfo) != MPG123_OK)
+        return;
+
+    if (finfo.vbr != MPG123_CBR) {
+        if (--con->delay < 1) {
             if (++con->mean_count > ((unsigned int) -1) / 2)
                 con->mean_count = ((unsigned int) -1) / 4;
 
@@ -404,42 +279,80 @@ static void update_info(sh_audio_t *sh)
 
             con->delay = 10;
         }
+    } else {
+        sh->i_bps = (finfo.bitrate ? finfo.bitrate : compute_bitrate(&finfo))
+                    * 1000 / 8;
+        con->delay      = 1;
+        con->mean_rate  = 0.;
+        con->mean_count = 0;
     }
 }
 
-static int decode_audio(sh_audio_t *sh, unsigned char *buf, int minlen,
-                        int maxlen)
+static int decode_audio(sh_audio_t *sh, struct mp_audio *buffer, int maxlen)
 {
-    int bytes;
+    struct ad_mpg123_context *con = sh->context;
+    void *buf = buffer->planes[0];
+    int ret;
 
-    bytes = decode_a_bit(sh, buf, maxlen);
-    /* This EOF is ignored, apparently, until input data is exhausted. */
-    if (bytes == 0)
-        return -1;              /* EOF */
+    if (con->new_format) {
+        ret = set_format(sh);
+        if (ret == MPG123_OK) {
+            return 0; // let caller handle format change
+        } else if (ret == MPG123_NEED_MORE) {
+            con->need_data = true;
+        } else {
+            goto mpg123_fail;
+        }
+    }
+
+    if (con->need_data) {
+        if (feed_new_packet(sh) < 0)
+            return -1;
+    }
+
+    size_t got_now = 0;
+    ret = mpg123_replace_buffer(con->handle, buf, maxlen * con->sample_size);
+    if (ret != MPG123_OK)
+        goto mpg123_fail;
+
+    ret = mpg123_decode_frame(con->handle, NULL, NULL, &got_now);
+
+    int got_samples = got_now / con->sample_size;
+    buffer->samples += got_samples;
+    sh->pts_offset += got_samples;
+
+    if (ret == MPG123_NEW_FORMAT) {
+        con->new_format = true;
+    } else if (ret == MPG123_NEED_MORE) {
+        con->need_data = true;
+    } else if (ret != MPG123_OK && ret != MPG123_DONE) {
+        goto mpg123_fail;
+    }
 
     update_info(sh);
-    return bytes;
+    return 0;
+
+mpg123_fail:
+    mp_msg(MSGT_DECAUDIO, MSGL_ERR, "mpg123 decoding error: %s\n",
+           mpg123_strerror(con->handle));
+    return -1;
 }
 
 static int control(sh_audio_t *sh, int cmd, void *arg)
 {
+    struct ad_mpg123_context *con = sh->context;
+
     switch (cmd) {
     case ADCTRL_RESYNC_STREAM:
-        /* Close/reopen the stream for mpg123 to make sure it doesn't
-         * think that it still knows the exact stream position.
-         * Otherwise, we would have funny effects from the gapless code.
-         * Oh, and it helps to minimize artifacts from jumping in the stream. */
-        if (reopen_stream(sh)) {
-            update_info(sh);
-            return CONTROL_TRUE;
-        } else {
-            /* MPlayer ignores this case! It just keeps on decoding.
-             * So we have to make sure resync never fails ... */
+        mpg123_close(con->handle);
+
+        if (mpg123_open_feed(con->handle) != MPG123_OK) {
             mp_msg(MSGT_DECAUDIO, MSGL_ERR,
-                   "mpg123 cannot reopen stream for resync.\n");
+                   "mpg123 failed to reopen stream: %s\n",
+                   mpg123_strerror(con->handle));
             return CONTROL_FALSE;
         }
-        break;
+        return CONTROL_TRUE;
     }
     return CONTROL_UNKNOWN;
 }
author	wm4 <wm4@nowhere>	2013-11-12 22:27:44 +0100
committer	wm4 <wm4@nowhere>	2013-11-12 23:39:09 +0100
commit	22b3f522cacfbdba76d311c86efd6091512eb089 (patch)
tree	1105af44a9403bde554cd4b6041d05ceea4fb39a /audio/decode/ad_mpg123.c
parent	5388a0cd4062ba24f5382f025552422fb6430906 (diff)
download	mpv-22b3f522cacfbdba76d311c86efd6091512eb089.tar.bz2 mpv-22b3f522cacfbdba76d311c86efd6091512eb089.tar.xz