video: separate repacking code from zimg and make it independent

For whatever purpose. If anything, this makes the zimg wrapper cleaner. The added tests are not particular exhaustive, but nice to have. This also makes the scale_zimg.c test pretty useless, because it only tests repacking (going through the zimg wrapper). In theory, the repack_tests things could also be used on scalers, but I guess it doesn't matter. Some things are added over the previous zimg wrapper code. For example, some fringe formats can now be expanded to 8 bit per component for convenience.
author: wm4 <wm4@nowhere> 2020-05-09 17:56:44 +0200
committer: wm4 <wm4@nowhere> 2020-05-09 18:02:57 +0200
commit: d8002f1dde94771952b301f4ebe331c52bc71871 (patch)
tree: 446e52500afe9548d4a88b128b32c415c1909972 /video/zimg.c
parent: d61ced37ae39b4a2dcd49e783f3c292a8d97b14a (diff)
download: mpv-d8002f1dde94771952b301f4ebe331c52bc71871.tar.bz2
mpv-d8002f1dde94771952b301f4ebe331c52bc71871.tar.xz
1 files changed, 60 insertions, 894 deletions
diff --git a/video/zimg.c b/video/zimg.c
index ae3602d297..4e7711f61a 100644
--- a/video/zimg.c
+++ b/video/zimg.c
@@ -25,6 +25,7 @@
 #include "csputils.h"
 #include "options/m_config.h"
 #include "options/m_option.h"
+#include "repack.h"
 #include "video/fmt-conversion.h"
 #include "video/img_format.h"
 #include "zimg.h"
@@ -83,50 +84,14 @@ struct mp_zimg_repack {
     int num_planes;             // number of planes involved
     unsigned zmask[4];          // zmask[mp_index] = zimg mask (using mp index!)
     int z_planes[4];            // z_planes[zimg_index] = mp_index (or -1)
-    bool pass_through_y;        // luma plane optimization for e.g. nv12
 
-    // If set, the pack/unpack callback to pass to zimg.
-    // Called with user==mp_zimg_repack.
-    zimg_filter_graph_callback repack;
-
-    // Endian-swap (done before/after actual repacker).
-    int endian_size;            // 0=no swapping, 2/4=word byte size to swap
-    int endian_items[4];        // number of words per pixel/plane
-
-    // For packed_repack.
-    int components[4];          // p2[n] = mp_image.planes[components[n]]
-    //  pack:   p1 is dst, p2 is src
-    //  unpack: p1 is src, p2 is dst
-    void (*packed_repack_scanline)(void *p1, void *p2[], int x0, int x1);
-
-    // Fringe RGB/YUV.
-    uint8_t comp_size;
-    uint8_t *comp_map;
-    uint8_t comp_shifts[3];
-    uint8_t *comp_lut; // 256 * 3
+    struct mp_repack *repack;   // converting to/from planar
 
     // Temporary memory for slice-wise repacking. This may be set even if repack
     // is not set (then it may be used to avoid alignment issues). This has
     // about one slice worth of data.
     struct mp_image *tmp;
 
-    // Temporary memory for endian swapping. This has about one slice worth
-    // of data; set and used only if endian swapping is used (endian_size>0).
-    // It's also used only for pack==false; packers do this in-place.
-    struct mp_image *tmp_endian;
-
-    // Temporary, per-call source/target frame.
-    struct mp_image *mpi;
-    // Y coordinate of first line in mpi; usually 0 if mpi==user_mpi, or the
-    // start of the current slice (in the current repack cb).
-    // repackers should use: mpi->data[p] + mpi->stride[p] * (i - mpi_y0)
-    int mpi_y0;
-
-    struct mp_image *user_mpi;
-
-    // Also temporary, per-call. use_buf[n] == plane n uses tmp (and not mpi).
-    bool use_buf[4];
-
     int real_w, real_h;         // aligned size
 };
 
@@ -243,532 +208,44 @@ void mp_zimg_enable_cmdline_opts(struct mp_zimg_context *ctx,
     mp_zimg_update_from_cmdline(ctx); // first update
 }
 
-static int repack_align(void *user, unsigned i, unsigned x0, unsigned x1)
-{
-    struct mp_zimg_repack *r = user;
-
-    for (int p = 0; p < r->mpi->fmt.num_planes; p++) {
-        if (!r->use_buf[p])
-            continue;
-
-        int bpp = r->mpi->fmt.bytes[p];
-        int xs = r->mpi->fmt.xs[p];
-        int ys = r->mpi->fmt.ys[p];
-        // Number of lines on this plane.
-        int h = (1 << r->mpi->fmt.chroma_ys) - (1 << ys) + 1;
-
-        for (int y = i; y < i + h; y++) {
-            void *a = r->mpi->planes[p] +
-                      r->mpi->stride[p] * (ptrdiff_t)((y - r->mpi_y0) >> ys) +
-                      bpp * (x0 >> xs);
-            void *b = r->tmp->planes[p] +
-                      r->tmp->stride[p] * (ptrdiff_t)((y >> ys) & r->zmask[p]) +
-                      bpp * (x0 >> xs);
-            size_t size = ((x1 - x0) >> xs) * bpp;
-            if (r->pack) {
-                memcpy(a, b, size);
-            } else {
-                memcpy(b, a, size);
-            }
-        }
-    }
-
-    return 0;
-}
-
-// Swap endian for one line.
-static void swap_endian(struct mp_zimg_repack *r, struct mp_image *dst, int dst_y,
-                        struct mp_image *src, int src_y, int x0, int x1)
-{
-    for (int p = 0; p < dst->fmt.num_planes; p++) {
-        int xs = dst->fmt.xs[p];
-        int ys = dst->fmt.ys[p];
-        int words_per_pixel = r->endian_items[p];
-        int bpp = words_per_pixel * r->endian_size;
-        // Number of lines on this plane.
-        int h = (1 << dst->fmt.chroma_ys) - (1 << ys) + 1;
-        int num_words = ((x1 - x0) >> xs) * words_per_pixel;
-
-        for (int y = 0; y < h; y++) {
-            void *s = src->planes[p] +
-                      src->stride[p] * (ptrdiff_t)((y + src_y) >> ys) +
-                      bpp * (x0 >> xs);
-            void *d = dst->planes[p] +
-                      dst->stride[p] * (ptrdiff_t)((y + dst_y) >> ys) +
-                      bpp * (x0 >> xs);
-            switch (r->endian_size) {
-            case 2:
-                for (int w = 0; w < num_words; w++)
-                    ((uint16_t *)d)[w] = av_bswap16(((uint16_t *)s)[w]);
-                break;
-            case 4:
-                for (int w = 0; w < num_words; w++)
-                    ((uint32_t *)d)[w] = av_bswap32(((uint32_t *)s)[w]);
-                break;
-            default:
-                assert(0);
-            }
-        }
-    }
-}
-
-// PA = PAck, copy planar input to single packed array
-// UN = UNpack, copy packed input to planar output
-// Naming convention:
-//  pa_/un_ prefix to identify conversion direction.
-//  Left (LSB, lowest byte address) -> Right (MSB, highest byte address).
-//      (This is unusual; MSB to LSB is more commonly used to describe formats,
-//       but our convention makes more sense for byte access in little endian.)
-//  "c" identifies a color component.
-//  "z" identifies known zero padding.
-//  "x" identifies uninitialized padding.
-//  A component is followed by its size in bits.
-//  Size can be omitted for multiple uniform components (c8c8c8 == ccc8).
-// Unpackers will often use "x" for padding, because they ignore it, while
-// packers will use "z" because they write zero.
-
-#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3)      \
-    static void name(void *dst, void *src[], int x0, int x1) {              \
-        for (int x = x0; x < x1; x++) {                                     \
-            ((packed_t *)dst)[x] =                                          \
-                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
-                ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) |             \
-                ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) |             \
-                ((packed_t)((plane_t *)src[3])[x] << (sh_c3));              \
-        }                                                                   \
-    }
-
-#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\
-    static void name(void *src, void *dst[], int x0, int x1) {              \
-        for (int x = x0; x < x1; x++) {                                     \
-            packed_t c = ((packed_t *)src)[x];                              \
-            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
-            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
-            ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask);               \
-            ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask);               \
-        }                                                                   \
-    }
-
-
-#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad)        \
-    static void name(void *dst, void *src[], int x0, int x1) {              \
-        for (int x = x0; x < x1; x++) {                                     \
-            ((packed_t *)dst)[x] = (pad) |                                  \
-                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
-                ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) |             \
-                ((packed_t)((plane_t *)src[2])[x] << (sh_c2));              \
-        }                                                                   \
-    }
-
-UN_WORD_4(un_cccc8,  uint32_t, uint8_t,  0, 8,  16, 24, 0xFFu)
-PA_WORD_4(pa_cccc8,  uint32_t, uint8_t,  0, 8,  16, 24)
-// Not sure if this is a good idea; there may be no alignment guarantee.
-UN_WORD_4(un_cccc16,  uint64_t, uint16_t,  0, 16,  32, 48, 0xFFFFu)
-PA_WORD_4(pa_cccc16,  uint64_t, uint16_t,  0, 16,  32, 48)
-
-#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask)       \
-    static void name(void *src, void *dst[], int x0, int x1) {              \
-        for (int x = x0; x < x1; x++) {                                     \
-            packed_t c = ((packed_t *)src)[x];                              \
-            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
-            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
-            ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask);               \
-        }                                                                   \
-    }
-
-UN_WORD_3(un_ccc8x8,  uint32_t, uint8_t,  0, 8,  16, 0xFFu)
-PA_WORD_3(pa_ccc8z8,  uint32_t, uint8_t,  0, 8,  16, 0)
-UN_WORD_3(un_x8ccc8,  uint32_t, uint8_t,  8, 16, 24, 0xFFu)
-PA_WORD_3(pa_z8ccc8,  uint32_t, uint8_t,  8, 16, 24, 0)
-UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu)
-PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 20, 10, 0, 0)
-
-#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad)               \
-    static void name(void *dst, void *src[], int x0, int x1) {              \
-        for (int x = x0; x < x1; x++) {                                     \
-            ((packed_t *)dst)[x] = (pad) |                                  \
-                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
-                ((packed_t)((plane_t *)src[1])[x] << (sh_c1));              \
-        }                                                                   \
-    }
-
-#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask)              \
-    static void name(void *src, void *dst[], int x0, int x1) {              \
-        for (int x = x0; x < x1; x++) {                                     \
-            packed_t c = ((packed_t *)src)[x];                              \
-            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
-            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
-        }                                                                   \
-    }
-
-UN_WORD_2(un_cc8,  uint16_t, uint8_t,  0, 8,  0xFFu)
-PA_WORD_2(pa_cc8,  uint16_t, uint8_t,  0, 8,  0)
-UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu)
-PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0)
-
-#define PA_SEQ_3(name, comp_t)                                              \
-    static void name(void *dst, void *src[], int x0, int x1) {              \
-        comp_t *r = dst;                                                    \
-        for (int x = x0; x < x1; x++) {                                     \
-            *r++ = ((comp_t *)src[0])[x];                                   \
-            *r++ = ((comp_t *)src[1])[x];                                   \
-            *r++ = ((comp_t *)src[2])[x];                                   \
-        }                                                                   \
-    }
-
-#define UN_SEQ_3(name, comp_t)                                              \
-    static void name(void *src, void *dst[], int x0, int x1) {              \
-        comp_t *r = src;                                                    \
-        for (int x = x0; x < x1; x++) {                                     \
-            ((comp_t *)dst[0])[x] = *r++;                                   \
-            ((comp_t *)dst[1])[x] = *r++;                                   \
-            ((comp_t *)dst[2])[x] = *r++;                                   \
-        }                                                                   \
-    }
-
-UN_SEQ_3(un_ccc8,  uint8_t)
-PA_SEQ_3(pa_ccc8,  uint8_t)
-UN_SEQ_3(un_ccc16, uint16_t)
-PA_SEQ_3(pa_ccc16, uint16_t)
-
-// "regular": single packed plane, all components have same width (except padding)
-struct regular_repacker {
-    int packed_width;       // number of bits of the packed pixel
-    int component_width;    // number of bits for a single component
-    int prepadding;         // number of bits of LSB padding
-    int num_components;     // number of components that can be accessed
-    void (*pa_scanline)(void *p1, void *p2[], int x0, int x1);
-    void (*un_scanline)(void *p1, void *p2[], int x0, int x1);
-};
-
-static const struct regular_repacker regular_repackers[] = {
-    {32, 8,  0, 3, pa_ccc8z8,  un_ccc8x8},
-    {32, 8,  8, 3, pa_z8ccc8,  un_x8ccc8},
-    {32, 8,  0, 4, pa_cccc8,   un_cccc8},
-    {64, 16, 0, 4, pa_cccc16,  un_cccc16},
-    {24, 8,  0, 3, pa_ccc8,    un_ccc8},
-    {48, 16, 0, 3, pa_ccc16,   un_ccc16},
-    {16, 8,  0, 2, pa_cc8,     un_cc8},
-    {32, 16, 0, 2, pa_cc16,    un_cc16},
-    {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2},
-};
-
-static int packed_repack(void *user, unsigned i, unsigned x0, unsigned x1)
-{
-    struct mp_zimg_repack *r = user;
-
-    uint32_t *p1 = (void *)(r->mpi->planes[0] +
-                            r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0));
-
-    void *p2[4] = {0};
-    for (int p = 0; p < r->num_planes; p++) {
-        int s = r->components[p];
-        p2[p] = r->tmp->planes[s] +
-                r->tmp->stride[s] * (ptrdiff_t)(i & r->zmask[s]);
-    }
-
-    r->packed_repack_scanline(p1, p2, x0, x1);
-
-    return 0;
-}
-
-struct fringe_rgb_repacker {
-    // To avoid making a mess of IMGFMT_*, we use av formats directly.
-    enum AVPixelFormat avfmt;
-    // If true, use BGR instead of RGB.
-    //  False:  LSB - R - G - B - pad - MSB
-    //  True:   LSB - B - G - R - pad - MSB
-    bool rev_order;
-    // Size in bit for each component, strictly from LSB to MSB.
-    int bits[3];
-    bool be;
-};
-
-static const struct fringe_rgb_repacker fringe_rgb_repackers[] = {
-    {AV_PIX_FMT_BGR4_BYTE,  false,  {1, 2, 1}},
-    {AV_PIX_FMT_RGB4_BYTE,  true,   {1, 2, 1}},
-    {AV_PIX_FMT_BGR8,       false,  {3, 3, 2}},
-    {AV_PIX_FMT_RGB8,       true,   {2, 3, 3}}, // pixdesc desc. and doc. bug?
-    {AV_PIX_FMT_RGB444LE,   true,   {4, 4, 4}},
-    {AV_PIX_FMT_RGB444BE,   true,   {4, 4, 4}, .be = true},
-    {AV_PIX_FMT_BGR444LE,   false,  {4, 4, 4}},
-    {AV_PIX_FMT_BGR444BE,   false,  {4, 4, 4}, .be = true},
-    {AV_PIX_FMT_BGR565LE,   false,  {5, 6, 5}},
-    {AV_PIX_FMT_BGR565BE,   false,  {5, 6, 5}, .be = true},
-    {AV_PIX_FMT_RGB565LE,   true,   {5, 6, 5}},
-    {AV_PIX_FMT_RGB565BE,   true,   {5, 6, 5}, .be = true},
-    {AV_PIX_FMT_BGR555LE,   false,  {5, 5, 5}},
-    {AV_PIX_FMT_BGR555BE,   false,  {5, 5, 5}, .be = true},
-    {AV_PIX_FMT_RGB555LE,   true,   {5, 5, 5}},
-    {AV_PIX_FMT_RGB555BE,   true,   {5, 5, 5}, .be = true},
-};
-
-#define PA_SHIFT_LUT8(name, packed_t)                                       \
-    static void name(void *dst, void *src[], int x0, int x1, uint8_t *lut,  \
-                     uint8_t s0, uint8_t s1, uint8_t s2) {                  \
-        for (int x = x0; x < x1; x++) {                                     \
-            ((packed_t *)dst)[x] =                                          \
-                (lut[((uint8_t *)src[0])[x] + 256 * 0] << s0) |             \
-                (lut[((uint8_t *)src[1])[x] + 256 * 1] << s1) |             \
-                (lut[((uint8_t *)src[2])[x] + 256 * 2] << s2);              \
-        }                                                                   \
-    }
-
-
-#define UN_SHIFT_LUT8(name, packed_t)                                       \
-    static void name(void *src, void *dst[], int x0, int x1, uint8_t *lut,  \
-                     uint8_t s0, uint8_t s1, uint8_t s2) {                  \
-        for (int x = x0; x < x1; x++) {                                     \
-            packed_t c = ((packed_t *)src)[x];                              \
-            ((uint8_t *)dst[0])[x] = lut[((c >> s0) & 0xFF) + 256 * 0];     \
-            ((uint8_t *)dst[1])[x] = lut[((c >> s1) & 0xFF) + 256 * 1];     \
-            ((uint8_t *)dst[2])[x] = lut[((c >> s2) & 0xFF) + 256 * 2];     \
-        }                                                                   \
-    }
-
-PA_SHIFT_LUT8(pa_shift_lut8_8,  uint8_t)
-PA_SHIFT_LUT8(pa_shift_lut8_16, uint16_t)
-UN_SHIFT_LUT8(un_shift_lut8_8,  uint8_t)
-UN_SHIFT_LUT8(un_shift_lut8_16, uint16_t)
-
-static int fringe_rgb_repack(void *user, unsigned i, unsigned x0, unsigned x1)
-{
-    struct mp_zimg_repack *r = user;
-
-    void *p1 = r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0);
-
-    void *p2[4] = {0};
-    for (int p = 0; p < r->num_planes; p++) {
-        int s = r->components[p];
-        p2[p] = r->tmp->planes[s] +
-                r->tmp->stride[s] * (ptrdiff_t)(i & r->zmask[s]);
-    }
-
-    assert(r->comp_size == 1 || r->comp_size == 2);
-
-    void (*repack)(void *p1, void *p2[], int x0, int x1, uint8_t *lut,
-                   uint8_t s0, uint8_t s1, uint8_t s2) = NULL;
-    if (r->pack) {
-        repack = r->comp_size == 1 ? pa_shift_lut8_8 : pa_shift_lut8_16;
-    } else {
-        repack = r->comp_size == 1 ? un_shift_lut8_8 : un_shift_lut8_16;
-    }
-    repack(p1, p2, x0, x1, r->comp_lut,
-           r->comp_shifts[0], r->comp_shifts[1], r->comp_shifts[2]);
-
-    return 0;
-}
-
-static int bitmap_repack(void *user, unsigned i, unsigned x0, unsigned x1)
-{
-    struct mp_zimg_repack *r = user;
-
-    uint8_t *p1 =
-        r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0);
-    uint8_t *p2 =
-        r->tmp->planes[0] + r->tmp->stride[0] * (ptrdiff_t)(i & r->zmask[0]);
-
-    uint8_t swap = r->comp_size ? 0xFF : 0;
-    if (r->pack) {
-        // Supposedly zimg aligns this at least on 64 byte boundaries. Simplifies a
-        // lot for us.
-        assert(!(x0 & 7));
-
-        for (int x = x0; x < x1; x += 8) {
-            uint8_t d = 0;
-            int max_b = MPMIN(8, x1 - x);
-            for (int b = 0; b < max_b; b++)
-                d |= (!!p2[x + b]) << (7 - b);
-            p1[x / 8] = d ^ swap;
-        }
-    } else {
-        x0 &= ~0x7;
-
-        for (int x = x0; x < x1; x += 8) {
-            uint8_t d = p1[x / 8] ^ swap;
-            int max_b = MPMIN(8, x1 - x);
-            for (int b = 0; b < max_b; b++)
-                p2[x + b] = !!(d & (1 << (7 - b)));
-        }
-    }
-
-    return 0;
-}
-
-static int unpack_pal(void *user, unsigned i, unsigned x0, unsigned x1)
-{
-    struct mp_zimg_repack *r = user;
-
-    uint8_t *src = (void *)(r->mpi->planes[0] +
-                            r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0));
-    uint32_t *pal = (void *)r->mpi->planes[1];
-
-    uint8_t *dst[4] = {0};
-    for (int p = 0; p < r->num_planes; p++) {
-        dst[p] = r->tmp->planes[p] +
-                 r->tmp->stride[p] * (ptrdiff_t)(i & r->zmask[p]);
-    }
-
-    for (int x = x0; x < x1; x++) {
-        uint32_t c = pal[src[x]];
-        dst[0][x] = (c >>  8) & 0xFF; // G
-        dst[1][x] = (c >>  0) & 0xFF; // B
-        dst[2][x] = (c >> 16) & 0xFF; // R
-        dst[3][x] = (c >> 24) & 0xFF; // A
-    }
-
-    return 0;
-}
-
-struct fringe_yuv422_repacker {
-    // To avoid making a mess of IMGFMT_*, we use av formats directly.
-    enum AVPixelFormat avfmt;
-    // In bits (depth/8 rounded up gives byte size)
-    int8_t depth;
-    // Word index of each sample: {y0, y1, cb, cr}
-    uint8_t comp[4];
-    bool be;
-};
-
-static const struct fringe_yuv422_repacker fringe_yuv422_repackers[] = {
-    {AV_PIX_FMT_YUYV422,  8, {0, 2, 1, 3}},
-    {AV_PIX_FMT_UYVY422,  8, {1, 3, 0, 2}},
-    {AV_PIX_FMT_YVYU422,  8, {0, 2, 3, 1}},
-#ifdef AV_PIX_FMT_Y210
-    {AV_PIX_FMT_Y210LE,  10, {0, 2, 1, 3}},
-    {AV_PIX_FMT_Y210BE,  10, {0, 2, 1, 3}, .be = true},
-#endif
-};
-
-#define PA_P422(name, comp_t)                                               \
-    static void name(void *dst, void *src[], int x0, int x1, uint8_t *c) {  \
-        for (int x = x0; x < x1; x += 2) {                                  \
-            ((comp_t *)dst)[x * 2 + c[0]] = ((comp_t *)src[0])[x + 0];      \
-            ((comp_t *)dst)[x * 2 + c[1]] = ((comp_t *)src[0])[x + 1];      \
-            ((comp_t *)dst)[x * 2 + c[2]] = ((comp_t *)src[1])[x >> 1];     \
-            ((comp_t *)dst)[x * 2 + c[3]] = ((comp_t *)src[2])[x >> 1];     \
-        }                                                                   \
-    }
-
-
-#define UN_P422(name, comp_t)                                               \
-    static void name(void *src, void *dst[], int x0, int x1, uint8_t *c) {  \
-        for (int x = x0; x < x1; x += 2) {                                  \
-            ((comp_t *)dst[0])[x + 0]  = ((comp_t *)src)[x * 2 + c[0]];     \
-            ((comp_t *)dst[0])[x + 1]  = ((comp_t *)src)[x * 2 + c[1]];     \
-            ((comp_t *)dst[1])[x >> 1] = ((comp_t *)src)[x * 2 + c[2]];     \
-            ((comp_t *)dst[2])[x >> 1] = ((comp_t *)src)[x * 2 + c[3]];     \
-        }                                                                   \
-    }
-
-PA_P422(pa_p422_8,  uint8_t)
-PA_P422(pa_p422_16, uint16_t)
-UN_P422(un_p422_8,  uint8_t)
-UN_P422(un_p422_16, uint16_t)
-
-static int fringe_yuv422_repack(void *user, unsigned i, unsigned x0, unsigned x1)
-{
-    struct mp_zimg_repack *r = user;
-
-    void *p1 = r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0);
-
-    void *p2[4] = {0};
-    for (int p = 0; p < r->num_planes; p++) {
-        p2[p] = r->tmp->planes[p] +
-                r->tmp->stride[p] * (ptrdiff_t)(i & r->zmask[p]);
-    }
-
-    assert(r->comp_size == 1 || r->comp_size == 2);
-
-    void (*repack)(void *p1, void *p2[], int x0, int x1, uint8_t *c) = NULL;
-    if (r->pack) {
-        repack = r->comp_size == 1 ? pa_p422_8 : pa_p422_16;
-    } else {
-        repack = r->comp_size == 1 ? un_p422_8 : un_p422_16;
-    }
-    repack(p1, p2, x0, x1, r->comp_map);
-
-    return 0;
-}
-
-static int repack_nv(void *user, unsigned i, unsigned x0, unsigned x1)
-{
-    struct mp_zimg_repack *r = user;
-
-    int xs = r->mpi->fmt.chroma_xs;
-    int ys = r->mpi->fmt.chroma_ys;
-
-    if (r->use_buf[0]) {
-        // Copy Y.
-        int l_h = 1 << ys;
-        for (int y = i; y < i + l_h; y++) {
-            ptrdiff_t bpp = r->mpi->fmt.bytes[0];
-            void *a = r->mpi->planes[0] +
-                    r->mpi->stride[0] * (ptrdiff_t)(y - r->mpi_y0) + bpp * x0;
-            void *b = r->tmp->planes[0] +
-                    r->tmp->stride[0] * (ptrdiff_t)(y & r->zmask[0]) + bpp * x0;
-            size_t size = (x1 - x0) * bpp;
-            if (r->pack) {
-                memcpy(a, b, size);
-            } else {
-                memcpy(b, a, size);
-            }
-        }
-    }
-
-    uint32_t *p1 = (void *)(r->mpi->planes[1] +
-                            r->mpi->stride[1] * (ptrdiff_t)((i - r->mpi_y0) >> ys));
-
-    void *p2[2];
-    for (int p = 0; p < 2; p++) {
-        int s = r->components[p];
-        p2[p] = r->tmp->planes[s] +
-                r->tmp->stride[s] * (ptrdiff_t)((i >> ys) & r->zmask[s]);
-    }
-
-    r->packed_repack_scanline(p1, p2, x0 >> xs, x1 >> xs);
-
-    return 0;
-}
-
 static int repack_entrypoint(void *user, unsigned i, unsigned x0, unsigned x1)
 {
     struct mp_zimg_repack *r = user;
 
-    if (r->endian_size && !r->pack) {
-        r->mpi = r->tmp_endian;
-        r->mpi_y0 = i;
-        swap_endian(r, r->mpi, 0, r->user_mpi, i, x0, x1);
-    } else {
-        r->mpi = r->user_mpi;
-        r->mpi_y0 = 0;
-    }
+    // If reading is not aligned, just read slightly more data.
+    if (!r->pack)
+        x0 &= ~(unsigned)(mp_repack_get_align_x(r->repack) - 1);
 
-    if (r->repack) {
-        r->repack(r, i, x0, x1);
-    } else {
-        repack_align(r, i, x0, x1);
-    }
+    // mp_repack requirements and zimg guarantees.
+    assert(!(i & (mp_repack_get_align_y(r->repack) - 1)));
+    assert(!(x0 & (mp_repack_get_align_x(r->repack) - 1)));
+
+    unsigned i_src = i & (r->pack ? r->zmask[0] : ZIMG_BUFFER_MAX);
+    unsigned i_dst = i & (r->pack ? ZIMG_BUFFER_MAX : r->zmask[0]);
 
-    if (r->endian_size && r->pack)
-        swap_endian(r, r->user_mpi, i, r->mpi, i - r->mpi_y0, x0, x1);
+    repack_line(r->repack, x0, i_dst, x0, i_src, x1 - x0);
 
-    r->mpi = NULL;
     return 0;
 }
 
-static void wrap_buffer(struct mp_zimg_repack *r,
+static bool wrap_buffer(struct mp_zimg_repack *r,
                         zimg_image_buffer *buf,
                         struct mp_image *mpi)
 {
     *buf = (zimg_image_buffer){ZIMG_API_VERSION};
 
-    bool plane_aligned[4] = {0};
-    for (int n = 0; n < r->num_planes; n++) {
-        plane_aligned[n] = !((uintptr_t)mpi->planes[n] % ZIMG_ALIGN) &&
-                           !(mpi->stride[n] % ZIMG_ALIGN);
+    bool direct[MP_MAX_PLANES] = {0};
+
+    for (int p = 0; p < mpi->num_planes; p++) {
+        // If alignment is good, try to avoid copy.
+        direct[p] = !((uintptr_t)mpi->planes[p] % ZIMG_ALIGN) &&
+                    !(mpi->stride[p] % ZIMG_ALIGN);
     }
 
+    if (!repack_config_buffers(r->repack, 0, r->pack ? mpi : r->tmp,
+                                          0, r->pack ? r->tmp : mpi, direct))
+        return false;
+
     for (int n = 0; n < MP_ARRAY_SIZE(buf->plane); n++) {
         // Note: this is really the only place we have to care about plane
         // permutation (zimg_image_buffer may have a different plane order
@@ -778,355 +255,67 @@ static void wrap_buffer(struct mp_zimg_repack *r,
         if (mplane < 0)
             continue;
 
-        r->use_buf[mplane] = !plane_aligned[mplane] || r->endian_size;
-        if (!(r->pass_through_y && mplane == 0))
-            r->use_buf[mplane] |= !!r->repack;
-
-        struct mp_image *tmpi = r->use_buf[mplane] ? r->tmp : mpi;
+        struct mp_image *tmpi = direct[mplane] ? mpi : r->tmp;
         buf->plane[n].data = tmpi->planes[mplane];
         buf->plane[n].stride = tmpi->stride[mplane];
-        buf->plane[n].mask = r->use_buf[mplane] ? r->zmask[mplane]
-                                                : ZIMG_BUFFER_MAX;
+        buf->plane[n].mask = direct[mplane] ? ZIMG_BUFFER_MAX : r->zmask[mplane];
     }
 
-    r->user_mpi = mpi;
-}
-
-// depth = number of LSB in use
-static int find_gbrp_format(int depth, int num_planes)
-{
-    if (num_planes != 3 && num_planes != 4)
-        return 0;
-    struct mp_regular_imgfmt desc = {
-        .component_type = MP_COMPONENT_TYPE_UINT,
-        .forced_csp = MP_CSP_RGB,
-        .component_size = depth > 8 ? 2 : 1,
-        .component_pad = depth - (depth > 8 ? 16 : 8),
-        .num_planes = num_planes,
-        .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} },
-    };
-    return mp_find_regular_imgfmt(&desc);
-}
-
-// depth = number of LSB in use
-static int find_gray_format(int depth, int num_planes)
-{
-    if (num_planes != 1 && num_planes != 2)
-        return 0;
-    struct mp_regular_imgfmt desc = {
-        .component_type = MP_COMPONENT_TYPE_UINT,
-        .component_size = depth > 8 ? 2 : 1,
-        .component_pad = depth - (depth > 8 ? 16 : 8),
-        .num_planes = num_planes,
-        .planes = { {1, {1}}, {1, {4}} },
-    };
-    return mp_find_regular_imgfmt(&desc);
+    return true;
 }
 
-static void setup_fringe_rgb_packer(struct mp_zimg_repack *r,
-                                    struct mp_zimg_context *ctx)
+// (ctx can be NULL for probing.)
+static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r,
+                         bool pack, struct mp_image_params *user_fmt,
+                         struct mp_zimg_context *ctx)
 {
-    enum AVPixelFormat avfmt = imgfmt2pixfmt(r->zimgfmt);
+    r->fmt = *user_fmt;
+    r->pack = pack;
 
-    const struct fringe_rgb_repacker *fmt = NULL;
-    for (int n = 0; n < MP_ARRAY_SIZE(fringe_rgb_repackers); n++) {
-        if (fringe_rgb_repackers[n].avfmt == avfmt) {
-            fmt = &fringe_rgb_repackers[n];
-            break;
-        }
-    }
+    zimg_image_format_default(zfmt, ZIMG_API_VERSION);
 
-    if (!fmt)
-        return;
+    int rp_flags = 0;
 
-    int depth = 8;
+    // For e.g. RGB565, go to lowest depth on pack for less weird dithering.
     if (r->pack) {
-        // Dither to lowest depth - loses some precision, but result is saner.
-        depth = fmt->bits[0];
-        for (int n = 0; n < 3; n++)
-            depth = MPMIN(depth, fmt->bits[n]);
-    }
-
-    r->zimgfmt = find_gbrp_format(depth, 3);
-    if (!r->zimgfmt)
-        return;
-    if (ctx)
-        r->comp_lut = talloc_array(ctx, uint8_t, 256 * 3);
-    r->repack = fringe_rgb_repack;
-    static const int c_order_rgb[] = {3, 1, 2};
-    static const int c_order_bgr[] = {2, 1, 3};
-    for (int n = 0; n < 3; n++)
-        r->components[n] = (fmt->rev_order ? c_order_bgr : c_order_rgb)[n] - 1;
-
-    int bitpos = 0;
-    for (int n = 0; n < 3; n++) {
-        int bits = fmt->bits[n];
-        r->comp_shifts[n] = bitpos;
-        if (r->comp_lut) {
-            uint8_t *lut = r->comp_lut + 256 * n;
-            uint8_t zmax = (1 << depth) - 1;
-            uint8_t cmax = (1 << bits) - 1;
-            for (int v = 0; v < 256; v++) {
-                if (r->pack) {
-                    lut[v] = (v * cmax + zmax / 2) / zmax;
-                } else {
-                    lut[v] = (v & cmax) * zmax / cmax;
-                }
-            }
-        }
-        bitpos += bits;
-    }
-
-    r->comp_size = (bitpos + 7) / 8;
-    assert(r->comp_size == 1 || r->comp_size == 2);
-
-    if (fmt->be) {
-        assert(r->comp_size == 2);
-        r->endian_size = 2;
-        r->endian_items[0] = 1;
-    }
-}
-
-static void setup_fringe_yuv422_packer(struct mp_zimg_repack *r)
-{
-    enum AVPixelFormat avfmt = imgfmt2pixfmt(r->zimgfmt);
-
-    const struct fringe_yuv422_repacker *fmt = NULL;
-    for (int n = 0; n < MP_ARRAY_SIZE(fringe_yuv422_repackers); n++) {
-        if (fringe_yuv422_repackers[n].avfmt == avfmt) {
-            fmt = &fringe_yuv422_repackers[n];
-            break;
-        }
-    }
-
-    if (!fmt)
-        return;
-
-    r->comp_size = (fmt->depth + 7) / 8;
-    assert(r->comp_size == 1 || r->comp_size == 2);
-
-    struct mp_regular_imgfmt yuvfmt = {
-        .component_type = MP_COMPONENT_TYPE_UINT,
-        // NB: same problem with P010 and not clearing padding.
-        .component_size = r->comp_size,
-        .num_planes = 3,
-        .planes = { {1, {1}}, {1, {2}}, {1, {3}} },
-        .chroma_xs = 1,
-        .chroma_ys = 0,
-    };
-    r->zimgfmt = mp_find_regular_imgfmt(&yuvfmt);
-    r->repack = fringe_yuv422_repack;
-    r->comp_map = (uint8_t *)fmt->comp;
-
-    if (fmt->be) {
-        assert(r->comp_size == 2);
-        r->endian_size = 2;
-        r->endian_items[0] = 4;
-    }
-}
-
-static void setup_nv_packer(struct mp_zimg_repack *r)
-{
-    struct mp_regular_imgfmt desc;
-    if (!mp_get_regular_imgfmt(&desc, r->zimgfmt))
-        return;
-
-    // Check for NV.
-    if (desc.num_planes != 2)
-        return;
-    if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1)
-        return;
-    if (desc.planes[1].num_components != 2)
-        return;
-    int cr0 = desc.planes[1].components[0];
-    int cr1 = desc.planes[1].components[1];
-    if (cr0 > cr1)
-        MPSWAP(int, cr0, cr1);
-    if (cr0 != 2 || cr1 != 3)
-        return;
-
-    // Construct equivalent planar format.
-    struct mp_regular_imgfmt desc2 = desc;
-    desc2.num_planes = 3;
-    desc2.planes[1].num_components = 1;
-    desc2.planes[1].components[0] = 2;
-    desc2.planes[2].num_components = 1;
-    desc2.planes[2].components[0] = 3;
-    // For P010. Strangely this concept exists only for the NV format.
-    if (desc2.component_pad > 0)
-        desc2.component_pad = 0;
-
-    int planar_fmt = mp_find_regular_imgfmt(&desc2);
-    if (!planar_fmt)
-        return;
-
-    for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
-        const struct regular_repacker *pa = &regular_repackers[i];
-
-        void (*repack_cb)(void *p1, void *p2[], int x0, int x1) =
-            r->pack ? pa->pa_scanline : pa->un_scanline;
-
-        if (pa->packed_width != desc.component_size * 2 * 8 ||
-            pa->component_width != desc.component_size * 8 ||
-            pa->num_components != 2 ||
-            pa->prepadding != 0 ||
-            !repack_cb)
-            continue;
-
-        r->repack = repack_nv;
-        r->pass_through_y = true;
-        r->packed_repack_scanline = repack_cb;
-        r->zimgfmt = planar_fmt;
-        r->components[0] = desc.planes[1].components[0] - 1;
-        r->components[1] = desc.planes[1].components[1] - 1;
-        return;
-    }
-}
-
-static void setup_misc_packer(struct mp_zimg_repack *r)
-{
-    // Although it's in regular_repackers[], the generic mpv imgfmt metadata
-    // can't handle it yet.
-    if (r->zimgfmt == IMGFMT_RGB30) {
-        int planar_fmt = find_gbrp_format(10, 3);
-        if (!planar_fmt)
-            return;
-        r->zimgfmt = planar_fmt;
-        r->repack = packed_repack;
-        r->packed_repack_scanline = r->pack ? pa_ccc10z2 : un_ccc10x2;
-        static int c_order[] = {3, 2, 1};
-        for (int n = 0; n < 3; n++)
-            r->components[n] = c_order[n] - 1;
-    } else if (r->zimgfmt == IMGFMT_PAL8 && !r->pack) {
-        int grap_fmt = find_gbrp_format(8, 4);
-        if (!grap_fmt)
-            return;
-        r->zimgfmt = grap_fmt;
-        r->repack = unpack_pal;
+        rp_flags |= REPACK_CREATE_ROUND_DOWN;
     } else {
-        enum AVPixelFormat avfmt = imgfmt2pixfmt(r->zimgfmt);
-        if (avfmt == AV_PIX_FMT_MONOWHITE || avfmt == AV_PIX_FMT_MONOBLACK) {
-            r->zimgfmt = IMGFMT_Y1;
-            r->repack = bitmap_repack;
-            r->comp_size = avfmt == AV_PIX_FMT_MONOWHITE; // abuse to pass a flag
-            return;
-        }
+        rp_flags |= REPACK_CREATE_EXPAND_8BIT;
     }
-}
-
-// Tries to set a packer/unpacker for component-wise byte aligned RGB formats.
-static void setup_regular_rgb_packer(struct mp_zimg_repack *r)
-{
-    struct mp_regular_imgfmt desc;
-    if (!mp_get_regular_imgfmt(&desc, r->zimgfmt))
-        return;
 
-    if (desc.num_planes != 1 || desc.planes[0].num_components < 2)
-        return;
-    struct mp_regular_imgfmt_plane *p = &desc.planes[0];
-
-    int num_real_components = 0;
-    bool has_alpha = false;
-    for (int n = 0; n < p->num_components; n++) {
-        if (p->components[n]) {
-            has_alpha |= p->components[n] == 4;
-            num_real_components += 1;
-        } else {
-            // padding must be in MSB or LSB
-            if (n != 0 && n != p->num_components - 1)
-                return;
-        }
-    }
+    r->repack = mp_repack_create_planar(r->fmt.imgfmt, r->pack, rp_flags);
+    if (!r->repack)
+        return false;
 
-    int depth = desc.component_size * 8 + MPMIN(0, desc.component_pad);
+    int align_x = mp_repack_get_align_x(r->repack);
 
-    int planar_fmt = num_real_componen
author	wm4 <wm4@nowhere>	2020-05-09 17:56:44 +0200
committer	wm4 <wm4@nowhere>	2020-05-09 18:02:57 +0200
commit	d8002f1dde94771952b301f4ebe331c52bc71871 (patch)
tree	446e52500afe9548d4a88b128b32c415c1909972 /video/zimg.c
parent	d61ced37ae39b4a2dcd49e783f3c292a8d97b14a (diff)
download	mpv-d8002f1dde94771952b301f4ebe331c52bc71871.tar.bz2 mpv-d8002f1dde94771952b301f4ebe331c52bc71871.tar.xz