img_format: fight ffmpeg pixdesc some more

This change attempts to fix detection of how endian swapping is to be performed. Details can be found in the code comments. It should not change anything, other than fixing handling of the X2RGB10BE ffmpeg pixel format. This format was detected incorrectly, and the component location metadata was discarded due to an internal consistency check. With this commit, it is handled correctly. At first I thought the X2RGB10BE ffmpeg pixdesc metadata was wrong, but it appears to be correct. The problem with this format is that it's the first packed RGB format that requires bit shift to access, and where the endian word size is larger than the (rounded up) component size, all while pixdesc would "require" you to perform 3 memory accesses (instead of 1), and the code tries to reverse this. It appears that trying to use the pixdesc metadata is much, much more work than just duplicating it in a saner form. To be fair, most problems are with big endian, and the mpv internal format does not care much about endian _hosts_.
author: wm4 <wm4@nowhere> 2020-06-17 16:15:15 +0200
committer: wm4 <wm4@nowhere> 2020-06-17 19:43:09 +0200
commit: 7d755020f3656a6b7d899a768743db33e700743f (patch)
tree: d6eaaef1586eca7d3bbd2c5c2a9904c07c4e374a
parent: 5e323333cf128bc159c3bac9e412ff8c6de41693 (diff)
download: mpv-7d755020f3656a6b7d899a768743db33e700743f.tar.bz2
mpv-7d755020f3656a6b7d899a768743db33e700743f.tar.xz
1 files changed, 49 insertions, 28 deletions
diff --git a/video/img_format.c b/video/img_format.c
index 7fef60457f..8f1642a5ee 100644
--- a/video/img_format.c
+++ b/video/img_format.c
@@ -209,6 +209,22 @@ static void fill_pixdesc_layout(struct mp_imgfmt_desc *desc,
     if (is_packed_ss_yuv)
         desc->bpp[0] = pd->comp[1].step * 8;
 
+    // Determine if there are any byte overlaps => relevant for determining
+    // access unit for endian, since pixdesc does not expose this, and assumes
+    // a weird model where you do separate memory fetches for each component.
+    bool any_shared_bytes = !!(pd->flags & AV_PIX_FMT_FLAG_BITSTREAM);
+    for (int c = 0; c < pd->nb_components; c++) {
+        for (int i = 0; i < c; i++) {
+            const AVComponentDescriptor *d1 = &pd->comp[c];
+            const AVComponentDescriptor *d2 = &pd->comp[i];
+            if (d1->plane == d2->plane) {
+                if (d1->offset + (d1->depth + 7) / 8u > d2->offset &&
+                    d2->offset + (d2->depth + 7) / 8u > d1->offset)
+                    any_shared_bytes = true;
+            }
+        }
+    }
+
     int el_bits = (pd->flags & AV_PIX_FMT_FLAG_BITSTREAM) ? 1 : 8;
     for (int c = 0; c < pd->nb_components; c++) {
         const AVComponentDescriptor *d = &pd->comp[c];
@@ -246,37 +262,46 @@ static void fill_pixdesc_layout(struct mp_imgfmt_desc *desc,
         //   component at offset 0 is read as 8 bit; BG is read as 16 bits)
         // - if BE flag is set, swap the word before proceeding
         // - extract via shift and mask derived by depth
-        int word = mp_round_next_power_of_2(MPMAX(d->depth + shift, 8)) / 8;
+        int word = mp_round_next_power_of_2(MPMAX(d->depth + shift, 8));
         // The purpose of this is unknown. It's an absurdity fished out of
         // av_read_image_line2()'s implementation. It seems technically
         // unnecessary, and provides no information. On the other hand, it
         // compensates for seemingly bogus packed integer pixdescs; this
         // is "why" some formats use d->offset = -1.
-        if (is_be && el_bits == 8 && word == 1)
+        if (is_be && el_bits == 8 && word == 8)
             offset += 8;
-        // Pixdesc's model requires accesses with varying word-sizes. This
-        // is complete bullshit, so we transform it into word swaps before
-        // further processing.
-        if (is_be && word == 1) {
-            // Probably packed RGB formats with varying word sizes. Assume
-            // the word access size is the entire pixel.
-            int logend = mp_log2(plane_bits) - 3;
-            if (!MP_IS_POWER_OF_2(plane_bits) || logend < 0 || logend > 3)
-                goto fail;
-            if (!desc->endian_shift)
-                desc->endian_shift = logend;
-            if (desc->endian_shift != logend)
-                goto fail;
-            offset = (1 << desc->endian_shift) * 8 - 8 - offset;
-        }
-        if (is_be && word > 1) {
-            int logend = mp_log2(word);
-            if (desc->endian_shift && desc->endian_shift != logend)
-                goto fail; // fortunately not needed/never happens
-            if (logend > 3)
-                goto fail;
-            desc->endian_shift = logend;
+        // Pixdesc's model sometimes requires accesses with varying word-sizes,
+        // as seen in bgr565 and other formats. Also, it makes you read some
+        // formats with multiple endian-dependent accesses, where accessing a
+        // larger unit would make more sense. (Consider X2RGB10BE, for which
+        // pixdesc wants you to perform 3 * 2 byte accesses, and swap each of
+        // the read 16 bit words. What you really want is to swap the entire 4
+        // byte thing, and then extract the components with bit shifts).
+        // This is complete bullshit, so we transform it into word swaps before
+        // further processing. Care needs to be taken to not change formats like
+        // P010 or YA16 (prefer component accesses for them; P010 isn't even
+        // representable, because endian_shift is for all planes).
+        // As a heuristic, assume that if any components share a byte, the whole
+        // pixel is read as a single memory access and endian swapped at once.
+        int endian_size = 8;
+        if (is_be && plane_bits > 8) {
+            if (any_shared_bytes) {
+                endian_size = plane_bits;
+                if (word != endian_size) {
+                    // Before: offset = 8*byte_offset (with word bits of data)
+                    // After: offset = bit_offset into swapped endian_size word
+                    offset = endian_size - word - offset;
+                }
+            } else {
+                endian_size = word;
+            }
         }
+        int endian_shift = mp_log2(endian_size) - 3;
+        if (!MP_IS_POWER_OF_2(endian_size) || endian_shift < 0 || endian_shift > 3)
+            goto fail;
+        if (desc->endian_shift && desc->endian_shift != endian_shift)
+            goto fail;
+        desc->endian_shift = endian_shift;
 
         // We always use bit offsets; this doesn't lose any information,
         // and pixdesc is merely more redundant.
@@ -310,7 +335,6 @@ static void fill_pixdesc_layout(struct mp_imgfmt_desc *desc,
     // can represent (or it's something like Bayer: components in the same bits,
     // but different alternating lines).
     bool any_shared_bits = false;
-    bool any_shared_bytes = false;
     for (int c = 0; c < pd->nb_components; c++) {
         for (int i = 0; i < c; i++) {
             struct mp_imgfmt_comp_desc *c1 = &desc->comps[c];
@@ -319,9 +343,6 @@ static void fill_pixdesc_layout(struct mp_imgfmt_desc *desc,
                 if (c1->offset + c1->size > c2->offset &&
                     c2->offset + c2->size > c1->offset)
                     any_shared_bits = true;
-                if ((c1->offset + c1->size + 7) / 8u > c2->offset / 8u &&
-                    (c2->offset + c2->size + 7) / 8u > c1->offset / 8u)
-                    any_shared_bytes = true;
             }
         }
     }
author	wm4 <wm4@nowhere>	2020-06-17 16:15:15 +0200
committer	wm4 <wm4@nowhere>	2020-06-17 19:43:09 +0200
commit	7d755020f3656a6b7d899a768743db33e700743f (patch)
tree	d6eaaef1586eca7d3bbd2c5c2a9904c07c4e374a
parent	5e323333cf128bc159c3bac9e412ff8c6de41693 (diff)
download	mpv-7d755020f3656a6b7d899a768743db33e700743f.tar.bz2 mpv-7d755020f3656a6b7d899a768743db33e700743f.tar.xz