summaryrefslogtreecommitdiffstats
path: root/video/zimg.c
diff options
context:
space:
mode:
Diffstat (limited to 'video/zimg.c')
-rw-r--r--video/zimg.c954
1 files changed, 60 insertions, 894 deletions
diff --git a/video/zimg.c b/video/zimg.c
index ae3602d297..4e7711f61a 100644
--- a/video/zimg.c
+++ b/video/zimg.c
@@ -25,6 +25,7 @@
#include "csputils.h"
#include "options/m_config.h"
#include "options/m_option.h"
+#include "repack.h"
#include "video/fmt-conversion.h"
#include "video/img_format.h"
#include "zimg.h"
@@ -83,50 +84,14 @@ struct mp_zimg_repack {
int num_planes; // number of planes involved
unsigned zmask[4]; // zmask[mp_index] = zimg mask (using mp index!)
int z_planes[4]; // z_planes[zimg_index] = mp_index (or -1)
- bool pass_through_y; // luma plane optimization for e.g. nv12
- // If set, the pack/unpack callback to pass to zimg.
- // Called with user==mp_zimg_repack.
- zimg_filter_graph_callback repack;
-
- // Endian-swap (done before/after actual repacker).
- int endian_size; // 0=no swapping, 2/4=word byte size to swap
- int endian_items[4]; // number of words per pixel/plane
-
- // For packed_repack.
- int components[4]; // p2[n] = mp_image.planes[components[n]]
- // pack: p1 is dst, p2 is src
- // unpack: p1 is src, p2 is dst
- void (*packed_repack_scanline)(void *p1, void *p2[], int x0, int x1);
-
- // Fringe RGB/YUV.
- uint8_t comp_size;
- uint8_t *comp_map;
- uint8_t comp_shifts[3];
- uint8_t *comp_lut; // 256 * 3
+ struct mp_repack *repack; // converting to/from planar
// Temporary memory for slice-wise repacking. This may be set even if repack
// is not set (then it may be used to avoid alignment issues). This has
// about one slice worth of data.
struct mp_image *tmp;
- // Temporary memory for endian swapping. This has about one slice worth
- // of data; set and used only if endian swapping is used (endian_size>0).
- // It's also used only for pack==false; packers do this in-place.
- struct mp_image *tmp_endian;
-
- // Temporary, per-call source/target frame.
- struct mp_image *mpi;
- // Y coordinate of first line in mpi; usually 0 if mpi==user_mpi, or the
- // start of the current slice (in the current repack cb).
- // repackers should use: mpi->data[p] + mpi->stride[p] * (i - mpi_y0)
- int mpi_y0;
-
- struct mp_image *user_mpi;
-
- // Also temporary, per-call. use_buf[n] == plane n uses tmp (and not mpi).
- bool use_buf[4];
-
int real_w, real_h; // aligned size
};
@@ -243,532 +208,44 @@ void mp_zimg_enable_cmdline_opts(struct mp_zimg_context *ctx,
mp_zimg_update_from_cmdline(ctx); // first update
}
-static int repack_align(void *user, unsigned i, unsigned x0, unsigned x1)
-{
- struct mp_zimg_repack *r = user;
-
- for (int p = 0; p < r->mpi->fmt.num_planes; p++) {
- if (!r->use_buf[p])
- continue;
-
- int bpp = r->mpi->fmt.bytes[p];
- int xs = r->mpi->fmt.xs[p];
- int ys = r->mpi->fmt.ys[p];
- // Number of lines on this plane.
- int h = (1 << r->mpi->fmt.chroma_ys) - (1 << ys) + 1;
-
- for (int y = i; y < i + h; y++) {
- void *a = r->mpi->planes[p] +
- r->mpi->stride[p] * (ptrdiff_t)((y - r->mpi_y0) >> ys) +
- bpp * (x0 >> xs);
- void *b = r->tmp->planes[p] +
- r->tmp->stride[p] * (ptrdiff_t)((y >> ys) & r->zmask[p]) +
- bpp * (x0 >> xs);
- size_t size = ((x1 - x0) >> xs) * bpp;
- if (r->pack) {
- memcpy(a, b, size);
- } else {
- memcpy(b, a, size);
- }
- }
- }
-
- return 0;
-}
-
-// Swap endian for one line.
-static void swap_endian(struct mp_zimg_repack *r, struct mp_image *dst, int dst_y,
- struct mp_image *src, int src_y, int x0, int x1)
-{
- for (int p = 0; p < dst->fmt.num_planes; p++) {
- int xs = dst->fmt.xs[p];
- int ys = dst->fmt.ys[p];
- int words_per_pixel = r->endian_items[p];
- int bpp = words_per_pixel * r->endian_size;
- // Number of lines on this plane.
- int h = (1 << dst->fmt.chroma_ys) - (1 << ys) + 1;
- int num_words = ((x1 - x0) >> xs) * words_per_pixel;
-
- for (int y = 0; y < h; y++) {
- void *s = src->planes[p] +
- src->stride[p] * (ptrdiff_t)((y + src_y) >> ys) +
- bpp * (x0 >> xs);
- void *d = dst->planes[p] +
- dst->stride[p] * (ptrdiff_t)((y + dst_y) >> ys) +
- bpp * (x0 >> xs);
- switch (r->endian_size) {
- case 2:
- for (int w = 0; w < num_words; w++)
- ((uint16_t *)d)[w] = av_bswap16(((uint16_t *)s)[w]);
- break;
- case 4:
- for (int w = 0; w < num_words; w++)
- ((uint32_t *)d)[w] = av_bswap32(((uint32_t *)s)[w]);
- break;
- default:
- assert(0);
- }
- }
- }
-}
-
-// PA = PAck, copy planar input to single packed array
-// UN = UNpack, copy packed input to planar output
-// Naming convention:
-// pa_/un_ prefix to identify conversion direction.
-// Left (LSB, lowest byte address) -> Right (MSB, highest byte address).
-// (This is unusual; MSB to LSB is more commonly used to describe formats,
-// but our convention makes more sense for byte access in little endian.)
-// "c" identifies a color component.
-// "z" identifies known zero padding.
-// "x" identifies uninitialized padding.
-// A component is followed by its size in bits.
-// Size can be omitted for multiple uniform components (c8c8c8 == ccc8).
-// Unpackers will often use "x" for padding, because they ignore it, while
-// packers will use "z" because they write zero.
-
-#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3) \
- static void name(void *dst, void *src[], int x0, int x1) { \
- for (int x = x0; x < x1; x++) { \
- ((packed_t *)dst)[x] = \
- ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \
- ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \
- ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) | \
- ((packed_t)((plane_t *)src[3])[x] << (sh_c3)); \
- } \
- }
-
-#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\
- static void name(void *src, void *dst[], int x0, int x1) { \
- for (int x = x0; x < x1; x++) { \
- packed_t c = ((packed_t *)src)[x]; \
- ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \
- ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \
- ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \
- ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask); \
- } \
- }
-
-
-#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad) \
- static void name(void *dst, void *src[], int x0, int x1) { \
- for (int x = x0; x < x1; x++) { \
- ((packed_t *)dst)[x] = (pad) | \
- ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \
- ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \
- ((packed_t)((plane_t *)src[2])[x] << (sh_c2)); \
- } \
- }
-
-UN_WORD_4(un_cccc8, uint32_t, uint8_t, 0, 8, 16, 24, 0xFFu)
-PA_WORD_4(pa_cccc8, uint32_t, uint8_t, 0, 8, 16, 24)
-// Not sure if this is a good idea; there may be no alignment guarantee.
-UN_WORD_4(un_cccc16, uint64_t, uint16_t, 0, 16, 32, 48, 0xFFFFu)
-PA_WORD_4(pa_cccc16, uint64_t, uint16_t, 0, 16, 32, 48)
-
-#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask) \
- static void name(void *src, void *dst[], int x0, int x1) { \
- for (int x = x0; x < x1; x++) { \
- packed_t c = ((packed_t *)src)[x]; \
- ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \
- ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \
- ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \
- } \
- }
-
-UN_WORD_3(un_ccc8x8, uint32_t, uint8_t, 0, 8, 16, 0xFFu)
-PA_WORD_3(pa_ccc8z8, uint32_t, uint8_t, 0, 8, 16, 0)
-UN_WORD_3(un_x8ccc8, uint32_t, uint8_t, 8, 16, 24, 0xFFu)
-PA_WORD_3(pa_z8ccc8, uint32_t, uint8_t, 8, 16, 24, 0)
-UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu)
-PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 20, 10, 0, 0)
-
-#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad) \
- static void name(void *dst, void *src[], int x0, int x1) { \
- for (int x = x0; x < x1; x++) { \
- ((packed_t *)dst)[x] = (pad) | \
- ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \
- ((packed_t)((plane_t *)src[1])[x] << (sh_c1)); \
- } \
- }
-
-#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask) \
- static void name(void *src, void *dst[], int x0, int x1) { \
- for (int x = x0; x < x1; x++) { \
- packed_t c = ((packed_t *)src)[x]; \
- ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \
- ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \
- } \
- }
-
-UN_WORD_2(un_cc8, uint16_t, uint8_t, 0, 8, 0xFFu)
-PA_WORD_2(pa_cc8, uint16_t, uint8_t, 0, 8, 0)
-UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu)
-PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0)
-
-#define PA_SEQ_3(name, comp_t) \
- static void name(void *dst, void *src[], int x0, int x1) { \
- comp_t *r = dst; \
- for (int x = x0; x < x1; x++) { \
- *r++ = ((comp_t *)src[0])[x]; \
- *r++ = ((comp_t *)src[1])[x]; \
- *r++ = ((comp_t *)src[2])[x]; \
- } \
- }
-
-#define UN_SEQ_3(name, comp_t) \
- static void name(void *src, void *dst[], int x0, int x1) { \
- comp_t *r = src; \
- for (int x = x0; x < x1; x++) { \
- ((comp_t *)dst[0])[x] = *r++; \
- ((comp_t *)dst[1])[x] = *r++; \
- ((comp_t *)dst[2])[x] = *r++; \
- } \
- }
-
-UN_SEQ_3(un_ccc8, uint8_t)
-PA_SEQ_3(pa_ccc8, uint8_t)
-UN_SEQ_3(un_ccc16, uint16_t)
-PA_SEQ_3(pa_ccc16, uint16_t)
-
-// "regular": single packed plane, all components have same width (except padding)
-struct regular_repacker {
- int packed_width; // number of bits of the packed pixel
- int component_width; // number of bits for a single component
- int prepadding; // number of bits of LSB padding
- int num_components; // number of components that can be accessed
- void (*pa_scanline)(void *p1, void *p2[], int x0, int x1);
- void (*un_scanline)(void *p1, void *p2[], int x0, int x1);
-};
-
-static const struct regular_repacker regular_repackers[] = {
- {32, 8, 0, 3, pa_ccc8z8, un_ccc8x8},
- {32, 8, 8, 3, pa_z8ccc8, un_x8ccc8},
- {32, 8, 0, 4, pa_cccc8, un_cccc8},
- {64, 16, 0, 4, pa_cccc16, un_cccc16},
- {24, 8, 0, 3, pa_ccc8, un_ccc8},
- {48, 16, 0, 3, pa_ccc16, un_ccc16},
- {16, 8, 0, 2, pa_cc8, un_cc8},
- {32, 16, 0, 2, pa_cc16, un_cc16},
- {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2},
-};
-
-static int packed_repack(void *user, unsigned i, unsigned x0, unsigned x1)
-{
- struct mp_zimg_repack *r = user;
-
- uint32_t *p1 = (void *)(r->mpi->planes[0] +
- r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0));
-
- void *p2[4] = {0};
- for (int p = 0; p < r->num_planes; p++) {
- int s = r->components[p];
- p2[p] = r->tmp->planes[s] +
- r->tmp->stride[s] * (ptrdiff_t)(i & r->zmask[s]);
- }
-
- r->packed_repack_scanline(p1, p2, x0, x1);
-
- return 0;
-}
-
-struct fringe_rgb_repacker {
- // To avoid making a mess of IMGFMT_*, we use av formats directly.
- enum AVPixelFormat avfmt;
- // If true, use BGR instead of RGB.
- // False: LSB - R - G - B - pad - MSB
- // True: LSB - B - G - R - pad - MSB
- bool rev_order;
- // Size in bit for each component, strictly from LSB to MSB.
- int bits[3];
- bool be;
-};
-
-static const struct fringe_rgb_repacker fringe_rgb_repackers[] = {
- {AV_PIX_FMT_BGR4_BYTE, false, {1, 2, 1}},
- {AV_PIX_FMT_RGB4_BYTE, true, {1, 2, 1}},
- {AV_PIX_FMT_BGR8, false, {3, 3, 2}},
- {AV_PIX_FMT_RGB8, true, {2, 3, 3}}, // pixdesc desc. and doc. bug?
- {AV_PIX_FMT_RGB444LE, true, {4, 4, 4}},
- {AV_PIX_FMT_RGB444BE, true, {4, 4, 4}, .be = true},
- {AV_PIX_FMT_BGR444LE, false, {4, 4, 4}},
- {AV_PIX_FMT_BGR444BE, false, {4, 4, 4}, .be = true},
- {AV_PIX_FMT_BGR565LE, false, {5, 6, 5}},
- {AV_PIX_FMT_BGR565BE, false, {5, 6, 5}, .be = true},
- {AV_PIX_FMT_RGB565LE, true, {5, 6, 5}},
- {AV_PIX_FMT_RGB565BE, true, {5, 6, 5}, .be = true},
- {AV_PIX_FMT_BGR555LE, false, {5, 5, 5}},
- {AV_PIX_FMT_BGR555BE, false, {5, 5, 5}, .be = true},
- {AV_PIX_FMT_RGB555LE, true, {5, 5, 5}},
- {AV_PIX_FMT_RGB555BE, true, {5, 5, 5}, .be = true},
-};
-
-#define PA_SHIFT_LUT8(name, packed_t) \
- static void name(void *dst, void *src[], int x0, int x1, uint8_t *lut, \
- uint8_t s0, uint8_t s1, uint8_t s2) { \
- for (int x = x0; x < x1; x++) { \
- ((packed_t *)dst)[x] = \
- (lut[((uint8_t *)src[0])[x] + 256 * 0] << s0) | \
- (lut[((uint8_t *)src[1])[x] + 256 * 1] << s1) | \
- (lut[((uint8_t *)src[2])[x] + 256 * 2] << s2); \
- } \
- }
-
-
-#define UN_SHIFT_LUT8(name, packed_t) \
- static void name(void *src, void *dst[], int x0, int x1, uint8_t *lut, \
- uint8_t s0, uint8_t s1, uint8_t s2) { \
- for (int x = x0; x < x1; x++) { \
- packed_t c = ((packed_t *)src)[x]; \
- ((uint8_t *)dst[0])[x] = lut[((c >> s0) & 0xFF) + 256 * 0]; \
- ((uint8_t *)dst[1])[x] = lut[((c >> s1) & 0xFF) + 256 * 1]; \
- ((uint8_t *)dst[2])[x] = lut[((c >> s2) & 0xFF) + 256 * 2]; \
- } \
- }
-
-PA_SHIFT_LUT8(pa_shift_lut8_8, uint8_t)
-PA_SHIFT_LUT8(pa_shift_lut8_16, uint16_t)
-UN_SHIFT_LUT8(un_shift_lut8_8, uint8_t)
-UN_SHIFT_LUT8(un_shift_lut8_16, uint16_t)
-
-static int fringe_rgb_repack(void *user, unsigned i, unsigned x0, unsigned x1)
-{
- struct mp_zimg_repack *r = user;
-
- void *p1 = r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0);
-
- void *p2[4] = {0};
- for (int p = 0; p < r->num_planes; p++) {
- int s = r->components[p];
- p2[p] = r->tmp->planes[s] +
- r->tmp->stride[s] * (ptrdiff_t)(i & r->zmask[s]);
- }
-
- assert(r->comp_size == 1 || r->comp_size == 2);
-
- void (*repack)(void *p1, void *p2[], int x0, int x1, uint8_t *lut,
- uint8_t s0, uint8_t s1, uint8_t s2) = NULL;
- if (r->pack) {
- repack = r->comp_size == 1 ? pa_shift_lut8_8 : pa_shift_lut8_16;
- } else {
- repack = r->comp_size == 1 ? un_shift_lut8_8 : un_shift_lut8_16;
- }
- repack(p1, p2, x0, x1, r->comp_lut,
- r->comp_shifts[0], r->comp_shifts[1], r->comp_shifts[2]);
-
- return 0;
-}
-
-static int bitmap_repack(void *user, unsigned i, unsigned x0, unsigned x1)
-{
- struct mp_zimg_repack *r = user;
-
- uint8_t *p1 =
- r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0);
- uint8_t *p2 =
- r->tmp->planes[0] + r->tmp->stride[0] * (ptrdiff_t)(i & r->zmask[0]);
-
- uint8_t swap = r->comp_size ? 0xFF : 0;
- if (r->pack) {
- // Supposedly zimg aligns this at least on 64 byte boundaries. Simplifies a
- // lot for us.
- assert(!(x0 & 7));
-
- for (int x = x0; x < x1; x += 8) {
- uint8_t d = 0;
- int max_b = MPMIN(8, x1 - x);
- for (int b = 0; b < max_b; b++)
- d |= (!!p2[x + b]) << (7 - b);
- p1[x / 8] = d ^ swap;
- }
- } else {
- x0 &= ~0x7;
-
- for (int x = x0; x < x1; x += 8) {
- uint8_t d = p1[x / 8] ^ swap;
- int max_b = MPMIN(8, x1 - x);
- for (int b = 0; b < max_b; b++)
- p2[x + b] = !!(d & (1 << (7 - b)));
- }
- }
-
- return 0;
-}
-
-static int unpack_pal(void *user, unsigned i, unsigned x0, unsigned x1)
-{
- struct mp_zimg_repack *r = user;
-
- uint8_t *src = (void *)(r->mpi->planes[0] +
- r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0));
- uint32_t *pal = (void *)r->mpi->planes[1];
-
- uint8_t *dst[4] = {0};
- for (int p = 0; p < r->num_planes; p++) {
- dst[p] = r->tmp->planes[p] +
- r->tmp->stride[p] * (ptrdiff_t)(i & r->zmask[p]);
- }
-
- for (int x = x0; x < x1; x++) {
- uint32_t c = pal[src[x]];
- dst[0][x] = (c >> 8) & 0xFF; // G
- dst[1][x] = (c >> 0) & 0xFF; // B
- dst[2][x] = (c >> 16) & 0xFF; // R
- dst[3][x] = (c >> 24) & 0xFF; // A
- }
-
- return 0;
-}
-
-struct fringe_yuv422_repacker {
- // To avoid making a mess of IMGFMT_*, we use av formats directly.
- enum AVPixelFormat avfmt;
- // In bits (depth/8 rounded up gives byte size)
- int8_t depth;
- // Word index of each sample: {y0, y1, cb, cr}
- uint8_t comp[4];
- bool be;
-};
-
-static const struct fringe_yuv422_repacker fringe_yuv422_repackers[] = {
- {AV_PIX_FMT_YUYV422, 8, {0, 2, 1, 3}},
- {AV_PIX_FMT_UYVY422, 8, {1, 3, 0, 2}},
- {AV_PIX_FMT_YVYU422, 8, {0, 2, 3, 1}},
-#ifdef AV_PIX_FMT_Y210
- {AV_PIX_FMT_Y210LE, 10, {0, 2, 1, 3}},
- {AV_PIX_FMT_Y210BE, 10, {0, 2, 1, 3}, .be = true},
-#endif
-};
-
-#define PA_P422(name, comp_t) \
- static void name(void *dst, void *src[], int x0, int x1, uint8_t *c) { \
- for (int x = x0; x < x1; x += 2) { \
- ((comp_t *)dst)[x * 2 + c[0]] = ((comp_t *)src[0])[x + 0]; \
- ((comp_t *)dst)[x * 2 + c[1]] = ((comp_t *)src[0])[x + 1]; \
- ((comp_t *)dst)[x * 2 + c[2]] = ((comp_t *)src[1])[x >> 1]; \
- ((comp_t *)dst)[x * 2 + c[3]] = ((comp_t *)src[2])[x >> 1]; \
- } \
- }
-
-
-#define UN_P422(name, comp_t) \
- static void name(void *src, void *dst[], int x0, int x1, uint8_t *c) { \
- for (int x = x0; x < x1; x += 2) { \
- ((comp_t *)dst[0])[x + 0] = ((comp_t *)src)[x * 2 + c[0]]; \
- ((comp_t *)dst[0])[x + 1] = ((comp_t *)src)[x * 2 + c[1]]; \
- ((comp_t *)dst[1])[x >> 1] = ((comp_t *)src)[x * 2 + c[2]]; \
- ((comp_t *)dst[2])[x >> 1] = ((comp_t *)src)[x * 2 + c[3]]; \
- } \
- }
-
-PA_P422(pa_p422_8, uint8_t)
-PA_P422(pa_p422_16, uint16_t)
-UN_P422(un_p422_8, uint8_t)
-UN_P422(un_p422_16, uint16_t)
-
-static int fringe_yuv422_repack(void *user, unsigned i, unsigned x0, unsigned x1)
-{
- struct mp_zimg_repack *r = user;
-
- void *p1 = r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0);
-
- void *p2[4] = {0};
- for (int p = 0; p < r->num_planes; p++) {
- p2[p] = r->tmp->planes[p] +
- r->tmp->stride[p] * (ptrdiff_t)(i & r->zmask[p]);
- }
-
- assert(r->comp_size == 1 || r->comp_size == 2);
-
- void (*repack)(void *p1, void *p2[], int x0, int x1, uint8_t *c) = NULL;
- if (r->pack) {
- repack = r->comp_size == 1 ? pa_p422_8 : pa_p422_16;
- } else {
- repack = r->comp_size == 1 ? un_p422_8 : un_p422_16;
- }
- repack(p1, p2, x0, x1, r->comp_map);
-
- return 0;
-}
-
-static int repack_nv(void *user, unsigned i, unsigned x0, unsigned x1)
-{
- struct mp_zimg_repack *r = user;
-
- int xs = r->mpi->fmt.chroma_xs;
- int ys = r->mpi->fmt.chroma_ys;
-
- if (r->use_buf[0]) {
- // Copy Y.
- int l_h = 1 << ys;
- for (int y = i; y < i + l_h; y++) {
- ptrdiff_t bpp = r->mpi->fmt.bytes[0];
- void *a = r->mpi->planes[0] +
- r->mpi->stride[0] * (ptrdiff_t)(y - r->mpi_y0) + bpp * x0;
- void *b = r->tmp->planes[0] +
- r->tmp->stride[0] * (ptrdiff_t)(y & r->zmask[0]) + bpp * x0;
- size_t size = (x1 - x0) * bpp;
- if (r->pack) {
- memcpy(a, b, size);
- } else {
- memcpy(b, a, size);
- }
- }
- }
-
- uint32_t *p1 = (void *)(r->mpi->planes[1] +
- r->mpi->stride[1] * (ptrdiff_t)((i - r->mpi_y0) >> ys));
-
- void *p2[2];
- for (int p = 0; p < 2; p++) {
- int s = r->components[p];
- p2[p] = r->tmp->planes[s] +
- r->tmp->stride[s] * (ptrdiff_t)((i >> ys) & r->zmask[s]);
- }
-
- r->packed_repack_scanline(p1, p2, x0 >> xs, x1 >> xs);
-
- return 0;
-}
-
static int repack_entrypoint(void *user, unsigned i, unsigned x0, unsigned x1)
{
struct mp_zimg_repack *r = user;
- if (r->endian_size && !r->pack) {
- r->mpi = r->tmp_endian;
- r->mpi_y0 = i;
- swap_endian(r, r->mpi, 0, r->user_mpi, i, x0, x1);
- } else {
- r->mpi = r->user_mpi;
- r->mpi_y0 = 0;
- }
+ // If reading is not aligned, just read slightly more data.
+ if (!r->pack)
+ x0 &= ~(unsigned)(mp_repack_get_align_x(r->repack) - 1);
- if (r->repack) {
- r->repack(r, i, x0, x1);
- } else {
- repack_align(r, i, x0, x1);
- }
+ // mp_repack requirements and zimg guarantees.
+ assert(!(i & (mp_repack_get_align_y(r->repack) - 1)));
+ assert(!(x0 & (mp_repack_get_align_x(r->repack) - 1)));
+
+ unsigned i_src = i & (r->pack ? r->zmask[0] : ZIMG_BUFFER_MAX);
+ unsigned i_dst = i & (r->pack ? ZIMG_BUFFER_MAX : r->zmask[0]);
- if (r->endian_size && r->pack)
- swap_endian(r, r->user_mpi, i, r->mpi, i - r->mpi_y0, x0, x1);
+ repack_line(r->repack, x0, i_dst, x0, i_src, x1 - x0);
- r->mpi = NULL;
return 0;
}
-static void wrap_buffer(struct mp_zimg_repack *r,
+static bool wrap_buffer(struct mp_zimg_repack *r,
zimg_image_buffer *buf,
struct mp_image *mpi)
{
*buf = (zimg_image_buffer){ZIMG_API_VERSION};
- bool plane_aligned[4] = {0};
- for (int n = 0; n < r->num_planes; n++) {
- plane_aligned[n] = !((uintptr_t)mpi->planes[n] % ZIMG_ALIGN) &&
- !(mpi->stride[n] % ZIMG_ALIGN);
+ bool direct[MP_MAX_PLANES] = {0};
+
+ for (int p = 0; p < mpi->num_planes; p++) {
+ // If alignment is good, try to avoid copy.
+ direct[p] = !((uintptr_t)mpi->planes[p] % ZIMG_ALIGN) &&
+ !(mpi->stride[p] % ZIMG_ALIGN);
}
+ if (!repack_config_buffers(r->repack, 0, r->pack ? mpi : r->tmp,
+ 0, r->pack ? r->tmp : mpi, direct))
+ return false;
+
for (int n = 0; n < MP_ARRAY_SIZE(buf->plane); n++) {
// Note: this is really the only place we have to care about plane
// permutation (zimg_image_buffer may have a different plane order
@@ -778,355 +255,67 @@ static void wrap_buffer(struct mp_zimg_repack *r,
if (mplane < 0)
continue;
- r->use_buf[mplane] = !plane_aligned[mplane] || r->endian_size;
- if (!(r->pass_through_y && mplane == 0))
- r->use_buf[mplane] |= !!r->repack;
-
- struct mp_image *tmpi = r->use_buf[mplane] ? r->tmp : mpi;
+ struct mp_image *tmpi = direct[mplane] ? mpi : r->tmp;
buf->plane[n].data = tmpi->planes[mplane];
buf->plane[n].stride = tmpi->stride[mplane];
- buf->plane[n].mask = r->use_buf[mplane] ? r->zmask[mplane]
- : ZIMG_BUFFER_MAX;
+ buf->plane[n].mask = direct[mplane] ? ZIMG_BUFFER_MAX : r->zmask[mplane];
}
- r->user_mpi = mpi;
-}
-
-// depth = number of LSB in use
-static int find_gbrp_format(int depth, int num_planes)
-{
- if (num_planes != 3 && num_planes != 4)
- return 0;
- struct mp_regular_imgfmt desc = {
- .component_type = MP_COMPONENT_TYPE_UINT,
- .forced_csp = MP_CSP_RGB,
- .component_size = depth > 8 ? 2 : 1,
- .component_pad = depth - (depth > 8 ? 16 : 8),
- .num_planes = num_planes,
- .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} },
- };
- return mp_find_regular_imgfmt(&desc);
-}
-
-// depth = number of LSB in use
-static int find_gray_format(int depth, int num_planes)
-{
- if (num_planes != 1 && num_planes != 2)
- return 0;
- struct mp_regular_imgfmt desc = {
- .component_type = MP_COMPONENT_TYPE_UINT,
- .component_size = depth > 8 ? 2 : 1,
- .component_pad = depth - (depth > 8 ? 16 : 8),
- .num_planes = num_planes,
- .planes = { {1, {1}}, {1, {4}} },
- };
- return mp_find_regular_imgfmt(&desc);
+ return true;
}
-static void setup_fringe_rgb_packer(struct mp_zimg_repack *r,
- struct mp_zimg_context *ctx)
+// (ctx can be NULL for probing.)
+static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r,
+ bool pack, struct mp_image_params *user_fmt,
+ struct mp_zimg_context *ctx)
{
- enum AVPixelFormat avfmt = imgfmt2pixfmt(r->zimgfmt);
+ r->fmt = *user_fmt;
+ r->pack = pack;
- const struct fringe_rgb_repacker *fmt = NULL;
- for (int n = 0; n < MP_ARRAY_SIZE(fringe_rgb_repackers); n++) {
- if (fringe_rgb_repackers[n].avfmt == avfmt) {
- fmt = &fringe_rgb_repackers[n];
- break;
- }
- }
+ zimg_image_format_default(zfmt, ZIMG_API_VERSION);
- if (!fmt)
- return;
+ int rp_flags = 0;
- int depth = 8;
+ // For e.g. RGB565, go to lowest depth on pack for less weird dithering.
if (r->pack) {
- // Dither to lowest depth - loses some precision, but result is saner.
- depth = fmt->bits[0];
- for (int n = 0; n < 3; n++)
- depth = MPMIN(depth, fmt->bits[n]);
- }
-
- r->zimgfmt = find_gbrp_format(depth, 3);
- if (!r->zimgfmt)
- return;
- if (ctx)
- r->comp_lut = talloc_array(ctx, uint8_t, 256 * 3);
- r->repack = fringe_rgb_repack;
- static const int c_order_rgb[] = {3, 1, 2};
- static const int c_order_bgr[] = {2, 1, 3};
- for (int n = 0; n < 3; n++)
- r->components[n] = (fmt->rev_order ? c_order_bgr : c_order_rgb)[n] - 1;
-
- int bitpos = 0;
- for (int n = 0; n < 3; n++) {
- int bits = fmt->bits[n];
- r->comp_shifts[n] = bitpos;
- if (r->comp_lut) {
- uint8_t *lut = r->comp_lut + 256 * n;
- uint8_t zmax = (1 << depth) - 1;
- uint8_t cmax = (1 << bits) - 1;
- for (int v = 0; v < 256; v++) {
- if (r->pack) {
- lut[v] = (v * cmax + zmax / 2) / zmax;
- } else {
- lut[v] = (v & cmax) * zmax / cmax;
- }
- }
- }
- bitpos += bits;
- }
-
- r->comp_size = (bitpos + 7) / 8;
- assert(r->comp_size == 1 || r->comp_size == 2);
-
- if (fmt->be) {
- assert(r->comp_size == 2);
- r->endian_size = 2;
- r->endian_items[0] = 1;
- }
-}
-
-static void setup_fringe_yuv422_packer(struct mp_zimg_repack *r)
-{
- enum AVPixelFormat avfmt = imgfmt2pixfmt(r->zimgfmt);
-
- const struct fringe_yuv422_repacker *fmt = NULL;
- for (int n = 0; n < MP_ARRAY_SIZE(fringe_yuv422_repackers); n++) {
- if (fringe_yuv422_repackers[n].avfmt == avfmt) {
- fmt = &fringe_yuv422_repackers[n];
- break;
- }
- }
-
- if (!fmt)
- return;
-
- r->comp_size = (fmt->depth + 7) / 8;
- assert(r->comp_size == 1 || r->comp_size == 2);
-
- struct mp_regular_imgfmt yuvfmt = {
- .component_type = MP_COMPONENT_TYPE_UINT,
- // NB: same problem with P010 and not clearing padding.
- .component_size = r->comp_size,
- .num_planes = 3,
- .planes = { {1, {1}}, {1, {2}}, {1, {3}} },
- .chroma_xs = 1,
- .chroma_ys = 0,
- };
- r->zimgfmt = mp_find_regular_imgfmt(&yuvfmt);
- r->repack = fringe_yuv422_repack;
- r->comp_map = (uint8_t *)fmt->comp;
-
- if (fmt->be) {
- assert(r->comp_size == 2);
- r->endian_size = 2;
- r->endian_items[0] = 4;
- }
-}
-
-static void setup_nv_packer(struct mp_zimg_repack *r)
-{
- struct mp_regular_imgfmt desc;
- if (!mp_get_regular_imgfmt(&desc, r->zimgfmt))
- return;
-
- // Check for NV.
- if (desc.num_planes != 2)
- return;
- if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1)
- return;
- if (desc.planes[1].num_components != 2)
- return;
- int cr0 = desc.planes[1].components[0];
- int cr1 = desc.planes[1].components[1];
- if (cr0 > cr1)
- MPSWAP(int, cr0, cr1);
- if (cr0 != 2 || cr1 != 3)
- return;
-
- // Construct equivalent planar format.
- struct mp_regular_imgfmt desc2 = desc;
- desc2.num_planes = 3;
- desc2.planes[1].num_components = 1;
- desc2.planes[1].components[0] = 2;
- desc2.planes[2].num_components = 1;
- desc2.planes[2].components[0] = 3;
- // For P010. Strangely this concept exists only for the NV format.
- if (desc2.component_pad > 0)
- desc2.component_pad = 0;
-
- int planar_fmt = mp_find_regular_imgfmt(&desc2);
- if (!planar_fmt)
- return;
-
- for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
- const struct regular_repacker *pa = &regular_repackers[i];
-
- void (*repack_cb)(void *p1, void *p2[], int x0, int x1) =
- r->pack ? pa->pa_scanline : pa->un_scanline;
-
- if (pa->packed_width != desc.component_size * 2 * 8 ||
- pa->component_width != desc.component_size * 8 ||
- pa->num_components != 2 ||
- pa->prepadding != 0 ||
- !repack_cb)
- continue;
-
- r->repack = repack_nv;
- r->pass_through_y = true;
- r->packed_repack_scanline = repack_cb;
- r->zimgfmt = planar_fmt;
- r->components[0] = desc.planes[1].components[0] - 1;
- r->components[1] = desc.planes[1].components[1] - 1;
- return;
- }
-}
-
-static void setup_misc_packer(struct mp_zimg_repack *r)
-{
- // Although it's in regular_repackers[], the generic mpv imgfmt metadata
- // can't handle it yet.
- if (r->zimgfmt == IMGFMT_RGB30) {
- int planar_fmt = find_gbrp_format(10, 3);
- if (!planar_fmt)
- return;
- r->zimgfmt = planar_fmt;
- r->repack = packed_repack;
- r->packed_repack_scanline = r->pack ? pa_ccc10z2 : un_ccc10x2;
- static int c_order[] = {3, 2, 1};
- for (int n = 0; n < 3; n++)
- r->components[n] = c_order[n] - 1;
- } else if (r->zimgfmt == IMGFMT_PAL8 && !r->pack) {
- int grap_fmt = find_gbrp_format(8, 4);
- if (!grap_fmt)
- return;
- r->zimgfmt = grap_fmt;
- r->repack = unpack_pal;
+ rp_flags |= REPACK_CREATE_ROUND_DOWN;
} else {
- enum AVPixelFormat avfmt = imgfmt2pixfmt(r->zimgfmt);
- if (avfmt == AV_PIX_FMT_MONOWHITE || avfmt == AV_PIX_FMT_MONOBLACK) {
- r->zimgfmt = IMGFMT_Y1;
- r->repack = bitmap_repack;
- r->comp_size = avfmt == AV_PIX_FMT_MONOWHITE; // abuse to pass a flag
- return;
- }
+ rp_flags |= REPACK_CREATE_EXPAND_8BIT;
}
-}
-
-// Tries to set a packer/unpacker for component-wise byte aligned RGB formats.
-static void setup_regular_rgb_packer(struct mp_zimg_repack *r)
-{
- struct mp_regular_imgfmt desc;
- if (!mp_get_regular_imgfmt(&desc, r->zimgfmt))
- return;
- if (desc.num_planes != 1 || desc.planes[0].num_components < 2)
- return;
- struct mp_regular_imgfmt_plane *p = &desc.planes[0];
-
- int num_real_components = 0;
- bool has_alpha = false;
- for (int n = 0; n < p->num_components; n++) {
- if (p->components[n]) {
- has_alpha |= p->components[n] == 4;
- num_real_components += 1;
- } else {
- // padding must be in MSB or LSB
- if (n != 0 && n != p->num_components - 1)
- return;
- }
- }
+ r->repack = mp_repack_create_planar(r->fmt.imgfmt, r->pack, rp_flags);
+ if (!r->repack)
+ return false;
- int depth = desc.component_size * 8 + MPMIN(0, desc.component_pad);
+ int align_x = mp_repack_get_align_x(r->repack);
- int planar_fmt = num_real_components > 2
- ? find_gbrp_format(depth, num_real_components)
- : find_gray_format(depth, num_real_components);
- if (!planar_fmt)
- return;
- static const int reorder_gbrp[] = {0, 3, 1, 2, 4};
- static const int reorder_gray[] = {0, 1, 0, 0, 4};
- const int *reorder = num_real_components > 2 ? reorder_gbrp : reorder_gray;
-
- for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
- const struct regular_repacker *pa = &regular_repackers[i];
-
- // The following may assume little endian (because some repack backends
- // use word access, while the metadata here uses byte access).
-
- int prepad = p->components[0] ? 0 : 8;
- int first_comp = p->components[0] ? 0 : 1;
- void (*repack_cb)(void *p1, void *p2[], int x0, int x1) =
- r->pack ? pa->pa_scanline : pa->un_scanline;
-
- if (pa->packed_width != desc.component_size * p->num_components * 8 ||
- pa->component_width != depth ||
- pa->num_components != num_real_components ||
-