video: separate repacking code from zimg and make it independent

For whatever purpose. If anything, this makes the zimg wrapper cleaner. The added tests are not particular exhaustive, but nice to have. This also makes the scale_zimg.c test pretty useless, because it only tests repacking (going through the zimg wrapper). In theory, the repack_tests things could also be used on scalers, but I guess it doesn't matter. Some things are added over the previous zimg wrapper code. For example, some fringe formats can now be expanded to 8 bit per component for convenience.
author: wm4 <wm4@nowhere> 2020-05-09 17:56:44 +0200
committer: wm4 <wm4@nowhere> 2020-05-09 18:02:57 +0200
commit: d8002f1dde94771952b301f4ebe331c52bc71871 (patch)
tree: 446e52500afe9548d4a88b128b32c415c1909972
parent: d61ced37ae39b4a2dcd49e783f3c292a8d97b14a (diff)
download: mpv-d8002f1dde94771952b301f4ebe331c52bc71871.tar.bz2
mpv-d8002f1dde94771952b301f4ebe331c52bc71871.tar.xz
9 files changed, 1688 insertions, 895 deletions
diff --git a/test/ref/repack.txt b/test/ref/repack.txt
new file mode 100644
index 0000000000..3946a6eb47
--- /dev/null
+++ b/test/ref/repack.txt
@@ -0,0 +1,163 @@
+0bgr            => [pa] [un] gbrp            | a=1:1 [tu] [tp]
+0rgb            => [pa] [un] gbrp            | a=1:1 [tu] [tp]
+abgr            => [pa] [un] gbrap           | a=1:1 [tu] [tp]
+argb            => [pa] [un] gbrap           | a=1:1 [tu] [tp]
+ayuv64          => [pa] [un] yuva444p16      | a=1:1 [tu] [tp]
+ayuv64be        => [pa] [un] yuva444p16      | a=1:1 [tu] [tp]
+bayer_bggr16    => no
+bayer_bggr16be  => no
+bayer_bggr8     => no
+bayer_gbrg16    => no
+bayer_gbrg16be  => no
+bayer_gbrg8     => no
+bayer_grbg16    => no
+bayer_grbg16be  => no
+bayer_grbg8     => no
+bayer_rggb16    => no
+bayer_rggb16be  => no
+bayer_rggb8     => no
+bgr0            => [pa] [un] gbrp            | a=1:1 [tu] [tp]
+bgr24           => [pa] [un] gbrp            | a=1:1
+bgr4            => no
+bgr444          => [pa] [un] gbrp4           | a=1:1
+bgr444          => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+bgr444be        => [pa] [un] gbrp4           | a=1:1
+bgr444be        => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+bgr48           => [pa] [un] gbrp16          | a=1:1
+bgr48be         => [pa] [un] gbrp16          | a=1:1
+bgr4_byte       => [pa] [un] gbrp2           | a=1:1
+bgr4_byte       => [pa] [un] gbrp1           | a=1:1 [round-down]
+bgr4_byte       => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+bgr555          => [pa] [un] gbrp5           | a=1:1
+bgr555          => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+bgr555be        => [pa] [un] gbrp5           | a=1:1
+bgr555be        => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+bgr565          => [pa] [un] gbrp6           | a=1:1
+bgr565          => [pa] [un] gbrp5           | a=1:1 [round-down]
+bgr565          => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+bgr565be        => [pa] [un] gbrp6           | a=1:1
+bgr565be        => [pa] [un] gbrp5           | a=1:1 [round-down]
+bgr565be        => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+bgr8            => [pa] [un] gbrp3           | a=1:1
+bgr8            => [pa] [un] gbrp2           | a=1:1 [round-down]
+bgr8            => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+bgra            => [pa] [un] gbrap           | a=1:1 [tu] [tp]
+bgra64          => [pa] [un] gbrap16         | a=1:1
+bgra64be        => [pa] [un] gbrap16         | a=1:1
+cuda            => no
+d3d11           => no
+d3d11va_vld     => no
+drm_prime       => no
+dxva2_vld       => no
+gbrap10be       => [pa] [un] gbrap10         | a=1:1
+gbrap12be       => [pa] [un] gbrap12         | a=1:1
+gbrap16be       => [pa] [un] gbrap16         | a=1:1
+gbrapf32be      => [pa] [un] gbrapf32        | a=1:1
+gbrp10be        => [pa] [un] gbrp10          | a=1:1
+gbrp12be        => [pa] [un] gbrp12          | a=1:1
+gbrp14be        => [pa] [un] gbrp14          | a=1:1
+gbrp16be        => [pa] [un] gbrp16          | a=1:1
+gbrp9be         => [pa] [un] gbrp9           | a=1:1
+gbrpf32be       => [pa] [un] gbrpf32         | a=1:1
+gray10be        => [pa] [un] gray10          | a=1:1
+gray12be        => [pa] [un] gray12          | a=1:1
+gray14be        => [pa] [un] gray14          | a=1:1
+gray16be        => [pa] [un] gray16          | a=1:1
+gray9be         => [pa] [un] gray9           | a=1:1
+grayf32be       => [pa] [un] grayf32         | a=1:1
+mediacodec      => no
+mmal            => no
+monob           => [pa] [un] y1              | a=8:1 [tu] [tp]
+monob           => [pa] [un] gray            | a=8:1 [expand-8bit]
+monow           => [pa] [un] y1              | a=8:1 [tu] [tp]
+monow           => [pa] [un] gray            | a=8:1 [expand-8bit]
+nv12            => [pa] [un] yuv420p         | a=2:2 [tu] [tp]
+nv16            => [pa] [un] yuv422p         | a=2:1
+nv20            => [pa] [un] yuv422p10       | a=2:1
+nv20be          => [pa] [un] yuv422p10       | a=2:1
+nv21            => [pa] [un] yuv420p         | a=2:2 [tu] [tp]
+nv24            => [pa] [un] yuv444p         | a=1:1
+nv42            => [pa] [un] yuv444p         | a=1:1
+opencl          => no
+p010            => [pa] [un] yuv420p16       | a=2:2
+p010be          => [pa] [un] yuv420p16       | a=2:2
+p016            => [pa] [un] yuv420p16       | a=2:2
+p016be          => [pa] [un] yuv420p16       | a=2:2
+pal8            =>      [un] gbrap           | a=1:1
+qsv             => no
+rgb0            => [pa] [un] gbrp            | a=1:1 [tu] [tp]
+rgb24           => [pa] [un] gbrp            | a=1:1
+rgb30           => [pa] [un] gbrp10          | a=1:1
+rgb4            => no
+rgb444          => [pa] [un] gbrp4           | a=1:1
+rgb444          => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+rgb444be        => [pa] [un] gbrp4           | a=1:1
+rgb444be        => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+rgb48           => [pa] [un] gbrp16          | a=1:1
+rgb48be         => [pa] [un] gbrp16          | a=1:1 [tu] [tp]
+rgb4_byte       => [pa] [un] gbrp2           | a=1:1
+rgb4_byte       => [pa] [un] gbrp1           | a=1:1 [round-down]
+rgb4_byte       => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+rgb555          => [pa] [un] gbrp5           | a=1:1
+rgb555          => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+rgb555be        => [pa] [un] gbrp5           | a=1:1
+rgb555be        => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+rgb565          => [pa] [un] gbrp6           | a=1:1
+rgb565          => [pa] [un] gbrp5           | a=1:1 [round-down]
+rgb565          => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+rgb565be        => [pa] [un] gbrp6           | a=1:1
+rgb565be        => [pa] [un] gbrp5           | a=1:1 [round-down]
+rgb565be        => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+rgb8            => [pa] [un] gbrp3           | a=1:1
+rgb8            => [pa] [un] gbrp2           | a=1:1 [round-down]
+rgb8            => [pa] [un] gbrp            | a=1:1 [expand-8bit]
+rgba            => [pa] [un] gbrap           | a=1:1 [tu] [tp]
+rgba64          => [pa] [un] gbrap16         | a=1:1 [tu] [tp]
+rgba64be        => [pa] [un] gbrap16         | a=1:1
+uyvy422         => [pa] [un] yuv422p         | a=2:1
+uyyvyy411       => no
+vaapi           => no
+vaapi_idct      => no
+vaapi_moco      => no
+vdpau           => no
+vdpau_output    => no
+videotoolbox    => no
+vulkan          => no
+xvmc            => no
+xyz12           => [pa] [un] gbrp16          | a=1:1
+xyz12be         => [pa] [un] gbrp16          | a=1:1
+y210            => [pa] [un] yuv422p16       | a=2:1
+y210be          => [pa] [un] yuv422p16       | a=2:1
+ya16            => [pa] [un] yap16           | a=1:1 [tu] [tp]
+ya16be          => [pa] [un] yap16           | a=1:1
+ya8             => [pa] [un] yap8            | a=1:1
+yuv420p10be     => [pa] [un] yuv420p10       | a=2:2
+yuv420p12be     => [pa] [un] yuv420p12       | a=2:2
+yuv420p14be     => [pa] [un] yuv420p14       | a=2:2
+yuv420p16be     => [pa] [un] yuv420p16       | a=2:2
+yuv420p9be      => [pa] [un] yuv420p9        | a=2:2
+yuv422p10be     => [pa] [un] yuv422p10       | a=2:1
+yuv422p12be     => [pa] [un] yuv422p12       | a=2:1
+yuv422p14be     => [pa] [un] yuv422p14       | a=2:1
+yuv422p16be     => [pa] [un] yuv422p16       | a=2:1 [tu] [tp]
+yuv422p9be      => [pa] [un] yuv422p9        | a=2:1
+yuv440p10be     => [pa] [un] yuv440p10       | a=1:2
+yuv440p12be     => [pa] [un] yuv440p12       | a=1:2
+yuv444p10be     => [pa] [un] yuv444p10       | a=1:1
+yuv444p12be     => [pa] [un] yuv444p12       | a=1:1
+yuv444p14be     => [pa] [un] yuv444p14       | a=1:1
+yuv444p16be     => [pa] [un] yuv444p16       | a=1:1
+yuv444p9be      => [pa] [un] yuv444p9        | a=1:1
+yuva420p10be    => [pa] [un] yuva420p10      | a=2:2
+yuva420p16be    => [pa] [un] yuva420p16      | a=2:2
+yuva420p9be     => [pa] [un] yuva420p9       | a=2:2
+yuva422p10be    => [pa] [un] yuva422p10      | a=2:1
+yuva422p12be    => [pa] [un] yuva422p12      | a=2:1
+yuva422p16be    => [pa] [un] yuva422p16      | a=2:1
+yuva422p9be     => [pa] [un] yuva422p9       | a=2:1
+yuva444p10be    => [pa] [un] yuva444p10      | a=1:1
+yuva444p12be    => [pa] [un] yuva444p12      | a=1:1
+yuva444p16be    => [pa] [un] yuva444p16      | a=1:1
+yuva444p9be     => [pa] [un] yuva444p9       | a=1:1
+yuyv422         => [pa] [un] yuv422p         | a=2:1
+yvyu422         => [pa] [un] yuv422p         | a=2:1 [tu] [tp]
diff --git a/test/repack.c b/test/repack.c
new file mode 100644
index 0000000000..ede6046350
--- /dev/null
+++ b/test/repack.c
@@ -0,0 +1,249 @@
+#include <libavutil/pixfmt.h>
+
+#include "common/common.h"
+#include "tests.h"
+#include "video/fmt-conversion.h"
+#include "video/img_format.h"
+#include "video/repack.h"
+#include "video/zimg.h"
+
+// Excuse the utter stupidity.
+#define UNFUCK(v) ((v) > 0 ? (v) : pixfmt2imgfmt(-(v)))
+static_assert(IMGFMT_START > 0, "");
+#define IMGFMT_GBRP (-AV_PIX_FMT_GBRP)
+#define IMGFMT_GBRAP (-AV_PIX_FMT_GBRAP)
+
+struct entry {
+    int w, h;
+    int fmt_a;
+    const void *const a[4];
+    int fmt_b;
+    const void *const b[4];
+    int flags;
+};
+
+#define P8(...) (const uint8_t[]){__VA_ARGS__}
+#define P16(...) (const uint16_t[]){__VA_ARGS__}
+
+// Warning: only entries that match existing conversions are tested.
+static const struct entry repack_tests[] = {
+    // Note: the '0' tests rely on 0 being written, although by definition the
+    //       contents of this padding is undefined. The repacker always writes
+    //       it this way, though.
+    {1, 1, IMGFMT_RGB0,             {P8(1, 2, 3, 0)},
+           IMGFMT_GBRP,             {P8(2), P8(3), P8(1)}},
+    {1, 1, IMGFMT_BGR0,             {P8(1, 2, 3, 0)},
+           IMGFMT_GBRP,             {P8(2), P8(1), P8(3)}},
+    {1, 1, IMGFMT_0RGB,             {P8(0, 1, 2, 3)},
+           IMGFMT_GBRP,             {P8(2), P8(3), P8(1)}},
+    {1, 1, IMGFMT_0BGR,             {P8(0, 1, 2, 3)},
+           IMGFMT_GBRP,             {P8(2), P8(1), P8(3)}},
+    {1, 1, IMGFMT_RGBA,             {P8(1, 2, 3, 4)},
+           IMGFMT_GBRAP,            {P8(2), P8(3), P8(1), P8(4)}},
+    {1, 1, IMGFMT_BGRA,             {P8(1, 2, 3, 4)},
+           IMGFMT_GBRAP,            {P8(2), P8(1), P8(3), P8(4)}},
+    {1, 1, IMGFMT_ARGB,             {P8(4, 1, 2, 3)},
+           IMGFMT_GBRAP,            {P8(2), P8(3), P8(1), P8(4)}},
+    {1, 1, IMGFMT_ABGR,             {P8(4, 1, 2, 3)},
+           IMGFMT_GBRAP,            {P8(2), P8(1), P8(3), P8(4)}},
+    {1, 1, IMGFMT_RGBA64,           {P16(0x1a1b, 0x2a2b, 0x3a3b, 0x4a4b)},
+           -AV_PIX_FMT_GBRAP16,     {P16(0x2a2b), P16(0x3a3b),
+                                     P16(0x1a1b), P16(0x4a4b)}},
+    {1, 1, -AV_PIX_FMT_RGB48BE,     {P16(0x1a1b, 0x2a2b, 0x3a3b)},
+           -AV_PIX_FMT_GBRP16,      {P16(0x2b2a), P16(0x3b3a),
+                                     P16(0x1b1a)}},
+    {8, 1, -AV_PIX_FMT_MONOWHITE,   {P8(0xAA)},
+           IMGFMT_Y1,               {P8(0, 1, 0, 1, 0, 1, 0, 1)}},
+    {8, 1, -AV_PIX_FMT_MONOBLACK,   {P8(0xAA)},
+           IMGFMT_Y1,               {P8(1, 0, 1, 0, 1, 0, 1, 0)}},
+    {2, 2, IMGFMT_NV12,             {P8(1, 2, 3, 4), P8(5, 6)},
+           IMGFMT_420P,             {P8(1, 2, 3, 4), P8(5), P8(6)}},
+    {2, 2, -AV_PIX_FMT_NV21,        {P8(1, 2, 3, 4), P8(5, 6)},
+           IMGFMT_420P,             {P8(1, 2, 3, 4), P8(6), P8(5)}},
+    {1, 1, -AV_PIX_FMT_AYUV64,      {P16(1, 2, 3, 4)},
+           -AV_PIX_FMT_YUVA444P16,  {P16(2), P16(3), P16(4), P16(1)}},
+    {1, 1, -AV_PIX_FMT_AYUV64BE,    {P16(0x0100, 0x0200, 0x0300, 0x0400)},
+           -AV_PIX_FMT_YUVA444P16,  {P16(2), P16(3), P16(4), P16(1)}},
+    {2, 1, -AV_PIX_FMT_YVYU422,     {P8(1, 2, 3, 4)},
+           -AV_PIX_FMT_YUV422P,     {P8(1, 3), P8(4), P8(2)}},
+    {1, 1, -AV_PIX_FMT_YA16,        {P16(1, 2)},
+           IMGFMT_YAP16,            {P16(1), P16(2)}},
+    {2, 1, -AV_PIX_FMT_YUV422P16BE, {P16(0x1a1b, 0x2a2b), P16(0x3a3b),
+                                     P16(0x4a4b)},
+           -AV_PIX_FMT_YUV422P16,   {P16(0x1b1a, 0x2b2a), P16(0x3b3a),
+                                     P16(0x4b4a)}},
+};
+
+static bool is_true_planar(int imgfmt)
+{
+    struct mp_regular_imgfmt desc;
+    if (!mp_get_regular_imgfmt(&desc, imgfmt))
+        return false;
+
+    for (int n = 0; n < desc.num_planes; n++) {
+        if (desc.planes[n].num_components != 1)
+            return false;
+    }
+
+    return true;
+}
+
+static int try_repack(struct test_ctx *ctx, FILE *f, int imgfmt, int flags,
+                      int not_if_fmt)
+{
+    char *head = mp_tprintf(80, "%-15s =>", mp_imgfmt_to_name(imgfmt));
+    struct mp_repack *un = mp_repack_create_planar(imgfmt, false, flags);
+    struct mp_repack *pa = mp_repack_create_planar(imgfmt, true, flags);
+
+    // If both exists, they must be always symmetric.
+    if (un && pa) {
+        assert(mp_repack_get_format_src(pa) == mp_repack_get_format_dst(un));
+        assert(mp_repack_get_format_src(un) == mp_repack_get_format_dst(pa));
+        assert(mp_repack_get_align_x(pa) == mp_repack_get_align_x(un));
+        assert(mp_repack_get_align_y(pa) == mp_repack_get_align_y(un));
+    }
+
+    int a = 0;
+    int b = 0;
+    if (un) {
+        a = mp_repack_get_format_src(un);
+        b = mp_repack_get_format_dst(un);
+    } else if (pa) {
+        a = mp_repack_get_format_dst(pa);
+        b = mp_repack_get_format_src(pa);
+    }
+
+    // Skip the identity ones because they're uninteresting, and add too much
+    // noise. But still make sure they behave as expected.
+    if (is_true_planar(imgfmt)) {
+        // (note that we require alpha-enabled zimg)
+        assert(mp_zimg_supports_in_format(imgfmt));
+        assert(un && pa);
+        assert(a == imgfmt && b == imgfmt);
+        talloc_free(pa);
+        talloc_free(un);
+        return 0;
+    }
+
+    struct mp_repack *rp = pa ? pa : un;
+    if (!rp) {
+        if (!flags)
+            fprintf(f, "%s no\n", head);
+        return 0;
+    }
+
+    assert(a == imgfmt);
+    if (b && b == not_if_fmt) {
+        talloc_free(pa);
+        talloc_free(un);
+        return 0;
+    }
+
+    fprintf(f, "%s %4s %4s %-15s |", head, pa ? "[pa]" : "", un ? "[un]" : "",
+            mp_imgfmt_to_name(b));
+
+    fprintf(f, " a=%d:%d", mp_repack_get_align_x(rp), mp_repack_get_align_y(rp));
+
+    if (flags & REPACK_CREATE_ROUND_DOWN)
+        fprintf(f, " [round-down]");
+    if (flags & REPACK_CREATE_EXPAND_8BIT)
+        fprintf(f, " [expand-8bit]");
+
+    // LCM of alignment of all packers.
+    int ax = mp_repack_get_align_x(rp);
+    int ay = mp_repack_get_align_y(rp);
+    if (pa && un) {
+        ax = MPMAX(mp_repack_get_align_x(pa), mp_repack_get_align_x(un));
+        ay = MPMAX(mp_repack_get_align_y(pa), mp_repack_get_align_y(un));
+    }
+
+    for (int n = 0; n < MP_ARRAY_SIZE(repack_tests); n++) {
+        const struct entry *e = &repack_tests[n];
+        int fmt_a = UNFUCK(e->fmt_a);
+        int fmt_b = UNFUCK(e->fmt_b);
+        if (!(fmt_a == a && fmt_b == b && e->flags == flags))
+            continue;
+
+        // We convert a "random" macro pixel to catch potential addressing bugs
+        // that might be ignored with (0, 0) origins.
+        struct mp_image *ia = mp_image_alloc(fmt_a, e->w * 5 * ax, e->h * 5 * ay);
+        struct mp_image *ib = mp_image_alloc(fmt_b, e->w * 7 * ax, e->h * 6 * ay);
+        int sx = 4 * ax, sy = 3 * ay, dx = 3 * ax, dy = 2 * ay;
+
+        assert(ia && ib);
+
+        for (int pack = 0; pack < 2; pack++) {
+            struct mp_repack *repacker = pack ? pa : un;
+            if (!repacker)
+                continue;
+
+            mp_image_clear(ia, 0, 0, ia->w, ia->h);
+            mp_image_clear(ib, 0, 0, ib->w, ib->h);
+
+            const void *const *dstd = pack ? e->a : e->b;
+            const void *const *srcd = pack ? e->b : e->a;
+            struct mp_image *dsti = pack ? ia : ib;
+            struct mp_image *srci = pack ? ib : ia;
+
+            bool r = repack_config_buffers(repacker, 0, dsti, 0, srci, NULL);
+            assert(r);
+
+            for (int p = 0; p < srci->num_planes; p++) {
+                uint8_t *ptr = mp_image_pixel_ptr(srci, p, sx, sy);
+                for (int y = 0; y < e->h >> srci->fmt.ys[p]; y++) {
+                    int w = e->w >> srci->fmt.xs[p];
+                    int wb = (w * srci->fmt.bpp[p] + 7) / 8;
+                    const void *cptr = (uint8_t *)srcd[p] + wb * y;
+                    memcpy(ptr + srci->stride[p] * y, cptr, wb);
+                }
+            }
+
+            repack_line(repacker, dx, dy, sx, sy, e->w);
+
+            for (int p = 0; p < dsti->num_planes; p++) {
+                uint8_t *ptr = mp_image_pixel_ptr(dsti, p, dx, dy);
+                for (int y = 0; y < e->h >> dsti->fmt.ys[p]; y++) {
+                    int w = e->w >> dsti->fmt.xs[p];
+                    int wb = (w * dsti->fmt.bpp[p] + 7) / 8;
+                    const void *cptr = (uint8_t *)dstd[p] + wb * y;
+                    assert_memcmp(ptr + dsti->stride[p] * y, cptr, wb);
+                }
+            }
+
+            fprintf(f, " [t%s]", pack ? "p" : "u");
+        }
+
+        talloc_free(ia);
+        talloc_free(ib);
+    }
+
+    fprintf(f, "\n");
+
+    talloc_free(pa);
+    talloc_free(un);
+    return b;
+}
+
+static void run(struct test_ctx *ctx)
+{
+    FILE *f = test_open_out(ctx, "repack.txt");
+
+    init_imgfmts_list();
+    for (int n = 0; n < num_imgfmts; n++) {
+        int imgfmt = imgfmts[n];
+
+        int other = try_repack(ctx, f, imgfmt, 0, 0);
+        try_repack(ctx, f, imgfmt, REPACK_CREATE_ROUND_DOWN, other);
+        try_repack(ctx, f, imgfmt, REPACK_CREATE_EXPAND_8BIT, other);
+    }
+
+    fclose(f);
+
+    assert_text_files_equal(ctx, "repack.txt", "repack.txt",
+                "This can fail if FFmpeg/libswscale adds or removes pixfmts.");
+}
+
+const struct unittest test_repack = {
+    .name = "repack",
+    .run = run,
+};
diff --git a/test/tests.c b/test/tests.c
index 9ef88f4a8d..d8df43f319 100644
--- a/test/tests.c
+++ b/test/tests.c
@@ -12,6 +12,7 @@ static const struct unittest *unittests[] = {
     &test_paths,
     &test_repack_sws,
 #if HAVE_ZIMG
+    &test_repack, // zimg only due to cross-checking with zimg.c
     &test_repack_zimg,
 #endif
     NULL
@@ -128,3 +129,25 @@ void assert_text_files_equal_impl(const char *file, int line,
         abort();
     }
 }
+
+static void hexdump(const uint8_t *d, size_t size)
+{
+    printf("|");
+    while (size--) {
+        printf(" %02x", d[0]);
+        d++;
+    }
+    printf(" |\n");
+}
+
+void assert_memcmp_impl(const char *file, int line,
+                        const void *a, const void *b, size_t size)
+{
+    if (memcmp(a, b, size) == 0)
+        return;
+
+    printf("%s:%d: mismatching data:\n", file, line);
+    hexdump(a, size);
+    hexdump(b, size);
+    abort();
+}
diff --git a/test/tests.h b/test/tests.h
index f4065f596f..8b2eb98174 100644
--- a/test/tests.h
+++ b/test/tests.h
@@ -43,6 +43,7 @@ extern const struct unittest test_json;
 extern const struct unittest test_linked_list;
 extern const struct unittest test_repack_sws;
 extern const struct unittest test_repack_zimg;
+extern const struct unittest test_repack;
 extern const struct unittest test_paths;
 
 #define assert_true(x) assert(x)
@@ -54,6 +55,10 @@ extern const struct unittest test_paths;
 #define assert_float_equal(a, b, tolerance) \
     assert_float_equal_impl(__FILE__, __LINE__, (a), (b), (tolerance))
 
+// Assert that memcmp(a,b,s)==0, or hexdump output on failure.
+#define assert_memcmp(a, b, s) \
+    assert_memcmp_impl(__FILE__, __LINE__, (a), (b), (s))
+
 // Require that the files "ref" and "new" are the same. The paths can be
 // relative to ref_path and out_path respectively. If they're not the same,
 // the output of "diff" is shown, the err message (if not NULL), and the test
@@ -69,6 +74,8 @@ void assert_float_equal_impl(const char *file, int line,
 void assert_text_files_equal_impl(const char *file, int line,
                                   struct test_ctx *ctx, const char *ref,
                                   const char *new, const char *err);
+void assert_memcmp_impl(const char *file, int line,
+                        const void *a, const void *b, size_t size);
 
 // Open a new file in the out_path. Always succeeds.
 FILE *test_open_out(struct test_ctx *ctx, const char *name);
diff --git a/video/img_format.h b/video/img_format.h
index 8e55cc9493..b0fdef8a50 100644
--- a/video/img_format.h
+++ b/video/img_format.h
@@ -69,8 +69,9 @@ struct mp_imgfmt_desc {
     int flags;              // MP_IMGFLAG_* bitfield
     int8_t num_planes;
     int8_t chroma_xs, chroma_ys; // chroma shift (i.e. log2 of chroma pixel size)
-    int8_t align_x, align_y;     // pixel size to get byte alignment and to get
+    int8_t align_x, align_y;     // pixel count to get byte alignment and to get
                                  // to a pixel pos where luma & chroma aligns
+                                 // always power of 2
     int8_t bytes[MP_MAX_PLANES]; // bytes per pixel (MP_IMGFLAG_BYTE_ALIGNED)
     int8_t bpp[MP_MAX_PLANES];   // bits per pixel
     int8_t plane_bits;           // number of bits in use for plane 0
diff --git a/video/repack.c b/video/repack.c
new file mode 100644
index 0000000000..359e32996d
--- /dev/null
+++ b/video/repack.c
@@ -0,0 +1,1110 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+
+#include <libavutil/bswap.h>
+#include <libavutil/pixfmt.h>
+
+#include "common/common.h"
+#include "repack.h"
+#include "video/fmt-conversion.h"
+#include "video/img_format.h"
+#include "video/mp_image.h"
+
+enum repack_step_type {
+    REPACK_STEP_REPACK,
+    REPACK_STEP_ENDIAN,
+};
+
+struct repack_step {
+    enum repack_step_type type;
+    // 0=input, 1=output
+    struct mp_image *buf[2];
+    bool user_buf[2]; // user_buf[n]==true if buf[n] = user src/dst buffer
+    struct mp_imgfmt_desc fmt[2];
+    struct mp_image *tmp; // output buffer, if needed
+};
+
+struct mp_repack {
+    bool pack;                  // if false, this is for unpacking
+    int flags;
+    int imgfmt_user;            // original mp format (unchanged endian)
+    int imgfmt_a;               // original mp format (possibly packed format,
+                                // swapped endian)
+    int imgfmt_b;               // equivalent unpacked/planar format
+    struct mp_imgfmt_desc fmt_a;// ==imgfmt_a
+    struct mp_imgfmt_desc fmt_b;// ==imgfmt_b
+
+    void (*repack)(struct mp_repack *rp,
+                   struct mp_image *a, int a_x, int a_y,
+                   struct mp_image *b, int b_x, int b_y, int w);
+
+    bool passthrough_y;         // possible luma plane optimization for e.g. nv12
+    int endian_size;            // endian swap; 0=none, 2/4=swap word size
+
+    // For packed_repack.
+    int components[4];          // b[n] = mp_image.planes[components[n]]
+    //  pack:   a is dst, b is src
+    //  unpack: a is src, b is dst
+    void (*packed_repack_scanline)(void *a, void *b[], int w);
+
+    // Fringe RGB/YUV.
+    uint8_t comp_size;
+    uint8_t *comp_map;
+    uint8_t comp_shifts[3];
+    uint8_t *comp_lut;
+
+    // REPACK_STEP_REPACK: if true, need to copy this plane
+    bool copy_buf[4];
+
+    struct repack_step steps[4];
+    int num_steps;
+
+    bool configured;
+};
+
+// depth = number of LSB in use
+static int find_gbrp_format(int depth, int num_planes)
+{
+    if (num_planes != 3 && num_planes != 4)
+        return 0;
+    struct mp_regular_imgfmt desc = {
+        .component_type = MP_COMPONENT_TYPE_UINT,
+        .forced_csp = MP_CSP_RGB,
+        .component_size = depth > 8 ? 2 : 1,
+        .component_pad = depth - (depth > 8 ? 16 : 8),
+        .num_planes = num_planes,
+        .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} },
+    };
+    return mp_find_regular_imgfmt(&desc);
+}
+
+// depth = number of LSB in use
+static int find_yuv_format(int depth, int num_planes)
+{
+    if (num_planes < 1 || num_planes > 4)
+        return 0;
+    struct mp_regular_imgfmt desc = {
+        .component_type = MP_COMPONENT_TYPE_UINT,
+        .component_size = depth > 8 ? 2 : 1,
+        .component_pad = depth - (depth > 8 ? 16 : 8),
+        .num_planes = num_planes,
+        .planes = { {1, {1}}, {1, {2}}, {1, {3}}, {1, {4}} },
+    };
+    if (num_planes == 2)
+        desc.planes[1].components[0] = 4;
+    return mp_find_regular_imgfmt(&desc);
+}
+
+// Copy one line on the plane p.
+static void copy_plane(struct mp_image *dst, int dst_x, int dst_y,
+                       struct mp_image *src, int src_x, int src_y,
+                       int w, int p)
+{
+    // Number of lines on this plane.
+    int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1;
+    size_t size = mp_image_plane_bytes(dst, p, dst_x, w);
+
+    assert(dst->fmt.bpp[p] == src->fmt.bpp[p]);
+
+    for (int y = 0; y < h; y++) {
+        void *pd = mp_image_pixel_ptr(dst, p, dst_x, dst_y + y);
+        void *ps = mp_image_pixel_ptr(src, p, src_x, src_y + y);
+        memcpy(pd, ps, size);
+    }
+}
+
+// Swap endian for one line.
+static void swap_endian(struct mp_image *dst, int dst_x, int dst_y,
+                        struct mp_image *src, int src_x, int src_y,
+                        int w, int endian_size)
+{
+    assert(src->fmt.num_planes == dst->fmt.num_planes);
+
+    for (int p = 0; p < dst->fmt.num_planes; p++) {
+        int xs = dst->fmt.xs[p];
+        int bpp = dst->fmt.bytes[p];
+        int words_per_pixel = bpp / endian_size;
+        int num_words = ((w + (1 << xs) - 1) >> xs) * words_per_pixel;
+        // Number of lines on this plane.
+        int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1;
+
+        assert(src->fmt.bytes[p] == bpp);
+
+        for (int y = 0; y < h; y++) {
+            void *s = mp_image_pixel_ptr(src, p, src_x, src_y + y);
+            void *d = mp_image_pixel_ptr(dst, p, dst_x, dst_y + y);
+            switch (endian_size) {
+            case 2:
+                for (int x = 0; x < num_words; x++)
+                    ((uint16_t *)d)[x] = av_bswap16(((uint16_t *)s)[x]);
+                break;
+            case 4:
+                for (int x = 0; x < num_words; x++)
+                    ((uint32_t *)d)[x] = av_bswap32(((uint32_t *)s)[x]);
+                break;
+            default:
+                assert(0);
+            }
+        }
+    }
+}
+
+// PA = PAck, copy planar input to single packed array
+// UN = UNpack, copy packed input to planar output
+// Naming convention:
+//  pa_/un_ prefix to identify conversion direction.
+//  Left (LSB, lowest byte address) -> Right (MSB, highest byte address).
+//      (This is unusual; MSB to LSB is more commonly used to describe formats,
+//       but our convention makes more sense for byte access in little endian.)
+//  "c" identifies a color component.
+//  "z" identifies known zero padding.
+//  "x" identifies uninitialized padding.
+//  A component is followed by its size in bits.
+//  Size can be omitted for multiple uniform components (c8c8c8 == ccc8).
+// Unpackers will often use "x" for padding, because they ignore it, while
+// packers will use "z" because they write zero.
+
+#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3)      \
+    static void name(void *dst, void *src[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            ((packed_t *)dst)[x] =                                          \
+                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
+                ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) |             \
+                ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) |             \
+                ((packed_t)((plane_t *)src[3])[x] << (sh_c3));              \
+        }                                                                   \
+    }
+
+#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\
+    static void name(void *src, void *dst[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            packed_t c = ((packed_t *)src)[x];                              \
+            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
+            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
+            ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask);               \
+            ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask);               \
+        }                                                                   \
+    }
+
+
+#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad)        \
+    static void name(void *dst, void *src[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            ((packed_t *)dst)[x] = (pad) |                                  \
+                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
+                ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) |             \
+                ((packed_t)((plane_t *)src[2])[x] << (sh_c2));              \
+        }                                                                   \
+    }
+
+UN_WORD_4(un_cccc8,  uint32_t, uint8_t,  0, 8,  16, 24, 0xFFu)
+PA_WORD_4(pa_cccc8,  uint32_t, uint8_t,  0, 8,  16, 24)
+// Not sure if this is a good idea; there may be no alignment guarantee.
+UN_WORD_4(un_cccc16,  uint64_t, uint16_t,  0, 16,  32, 48, 0xFFFFu)
+PA_WORD_4(pa_cccc16,  uint64_t, uint16_t,  0, 16,  32, 48)
+
+#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask)       \
+    static void name(void *src, void *dst[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            packed_t c = ((packed_t *)src)[x];                              \
+            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
+            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
+            ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask);               \
+        }                                                                   \
+    }
+
+UN_WORD_3(un_ccc8x8,  uint32_t, uint8_t,  0, 8,  16, 0xFFu)
+PA_WORD_3(pa_ccc8z8,  uint32_t, uint8_t,  0, 8,  16, 0)
+UN_WORD_3(un_x8ccc8,  uint32_t, uint8_t,  8, 16, 24, 0xFFu)
+PA_WORD_3(pa_z8ccc8,  uint32_t, uint8_t,  8, 16, 24, 0)
+UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu)
+PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 20, 10, 0, 0)
+
+#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad)               \
+    static void name(void *dst, void *src[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            ((packed_t *)dst)[x] = (pad) |                                  \
+                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
+                ((packed_t)((plane_t *)src[1])[x] << (sh_c1));              \
+        }                                                                   \
+    }
+
+#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask)              \
+    static void name(void *src, void *dst[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            packed_t c = ((packed_t *)src)[x];                              \
+            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
+            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
+        }                                                                   \
+    }
+
+UN_WORD_2(un_cc8,  uint16_t, uint8_t,  0, 8,  0xFFu)
+PA_WORD_2(pa_cc8,  uint16_t, uint8_t,  0, 8,  0)
+UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu)
+PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0)
+
+#define PA_SEQ_3(name, comp_t)                                              \
+    static void name(void *dst, void *src[], int w) {                       \
+        comp_t *r = dst;                                                    \
+        for (int x = 0; x < w; x++) {                                       \
+            *r++ = ((comp_t *)src[0])[x];                                   \
+            *r++ = ((comp_t *)src[1])[x];                                   \
+            *r++ = ((comp_t *)src[2])[x];                                   \
+        }                                                                   \
+    }
+
+#define UN_SEQ_3(name, comp_t)                                              \
+    static void name(void *src, void *dst[], int w) {                       \
+        comp_t *r = src;                                                    \
+        for (int x = 0; x < w; x++) {                                       \
+            ((comp_t *)dst[0])[x] = *r++;                                   \
+            ((comp_t *)dst[1])[x] = *r++;                                   \
+            ((comp_t *)dst[2])[x] = *r++;                                   \
+        }                                                                   \
+    }
+
+UN_SEQ_3(un_ccc8,  uint8_t)
+PA_SEQ_3(pa_ccc8,  uint8_t)
+UN_SEQ_3(un_ccc16, uint16_t)
+PA_SEQ_3(pa_ccc16, uint16_t)
+
+// "regular": single packed plane, all components have same width (except padding)
+struct regular_repacker {
+    int packed_width;       // num
author	wm4 <wm4@nowhere>	2020-05-09 17:56:44 +0200
committer	wm4 <wm4@nowhere>	2020-05-09 18:02:57 +0200
commit	d8002f1dde94771952b301f4ebe331c52bc71871 (patch)
tree	446e52500afe9548d4a88b128b32c415c1909972
parent	d61ced37ae39b4a2dcd49e783f3c292a8d97b14a (diff)
download	mpv-d8002f1dde94771952b301f4ebe331c52bc71871.tar.bz2 mpv-d8002f1dde94771952b301f4ebe331c52bc71871.tar.xz