diff options
Diffstat (limited to 'video/zimg.c')
-rw-r--r-- | video/zimg.c | 990 |
1 files changed, 365 insertions, 625 deletions
diff --git a/video/zimg.c b/video/zimg.c index 8127e87b9d..907e81deb7 100644 --- a/video/zimg.c +++ b/video/zimg.c @@ -17,13 +17,20 @@ #include <math.h> +#include <libavutil/cpu.h> + #include "common/common.h" #include "common/msg.h" #include "csputils.h" +#include "misc/thread_pool.h" +#include "misc/thread_tools.h" #include "options/m_config.h" #include "options/m_option.h" +#include "repack.h" +#include "video/fmt-conversion.h" #include "video/img_format.h" #include "zimg.h" +#include "config.h" static_assert(MP_IMAGE_BYTE_ALIGN >= ZIMG_ALIGN, ""); @@ -39,69 +46,69 @@ static const struct m_opt_choice_alternatives mp_zimg_scalers[] = { {0} }; -#define OPT_PARAM(name, var, flags) \ - OPT_DOUBLE(name, var, (flags) | M_OPT_DEFAULT_NAN) +const struct zimg_opts zimg_opts_defaults = { + .scaler = ZIMG_RESIZE_LANCZOS, + .scaler_params = {NAN, NAN}, + .scaler_chroma_params = {NAN, NAN}, + .scaler_chroma = ZIMG_RESIZE_BILINEAR, + .dither = ZIMG_DITHER_RANDOM, + .fast = true, +}; + +#define OPT_PARAM(var) OPT_DOUBLE(var), .flags = M_OPT_DEFAULT_NAN #define OPT_BASE_STRUCT struct zimg_opts const struct m_sub_options zimg_conf = { .opts = (struct m_option[]) { - OPT_CHOICE_C("scaler", scaler, 0, mp_zimg_scalers), - OPT_PARAM("scaler-param-a", scaler_params[0], 0), - OPT_PARAM("scaler-param-b", scaler_params[1], 0), - OPT_CHOICE_C("scaler-chroma", scaler_chroma, 0, mp_zimg_scalers), - OPT_PARAM("scaler-chroma-param-a", scaler_chroma_params[0], 0), - OPT_PARAM("scaler-chroma-param-b", scaler_chroma_params[1], 0), - OPT_CHOICE("dither", dither, 0, - ({"no", ZIMG_DITHER_NONE}, - {"ordered", ZIMG_DITHER_ORDERED}, - {"random", ZIMG_DITHER_RANDOM}, - {"error-diffusion", ZIMG_DITHER_ERROR_DIFFUSION})), - OPT_FLAG("fast", fast, 0), + {"scaler", OPT_CHOICE_C(scaler, mp_zimg_scalers)}, + {"scaler-param-a", OPT_PARAM(scaler_params[0])}, + {"scaler-param-b", OPT_PARAM(scaler_params[1])}, + {"scaler-chroma", OPT_CHOICE_C(scaler_chroma, mp_zimg_scalers)}, + {"scaler-chroma-param-a", OPT_PARAM(scaler_chroma_params[0])}, + {"scaler-chroma-param-b", OPT_PARAM(scaler_chroma_params[1])}, + {"dither", OPT_CHOICE(dither, + {"no", ZIMG_DITHER_NONE}, + {"ordered", ZIMG_DITHER_ORDERED}, + {"random", ZIMG_DITHER_RANDOM}, + {"error-diffusion", ZIMG_DITHER_ERROR_DIFFUSION})}, + {"fast", OPT_BOOL(fast)}, + {"threads", OPT_CHOICE(threads, {"auto", 0}), M_RANGE(1, 64)}, {0} }, .size = sizeof(struct zimg_opts), - .defaults = &(const struct zimg_opts){ - .scaler = ZIMG_RESIZE_LANCZOS, - .scaler_params = {NAN, NAN}, - .scaler_chroma_params = {NAN, NAN}, - .scaler_chroma = ZIMG_RESIZE_BILINEAR, - .dither = ZIMG_DITHER_RANDOM, - .fast = 1, - }, + .defaults = &zimg_opts_defaults, +}; + +struct mp_zimg_state { + zimg_filter_graph *graph; + void *tmp; + void *tmp_alloc; + struct mp_zimg_repack *src; + struct mp_zimg_repack *dst; + int slice_y, slice_h; // y start position, height of target slice + double scale_y; + struct mp_waiter thread_waiter; }; struct mp_zimg_repack { bool pack; // if false, this is for unpacking - struct mp_image_params fmt; // original mp format (possibly packed format) + struct mp_image_params fmt; // original mp format (possibly packed format, + // swapped endian) int zimgfmt; // zimg equivalent unpacked format int num_planes; // number of planes involved unsigned zmask[4]; // zmask[mp_index] = zimg mask (using mp index!) int z_planes[4]; // z_planes[zimg_index] = mp_index (or -1) - bool pass_through_y; // luma plane optimization for e.g. nv12 - // If set, the pack/unpack callback to pass to zimg. - // Called with user==mp_zimg_repack. - zimg_filter_graph_callback repack; - - // For packed_repack. - int components[4]; // p2[n] = mp_image.planes[components[n]] - // pack: p1 is dst, p2 is src - // unpack: p1 is src, p2 is dst - void (*packed_repack_scanline)(void *p1, void *p2[], int x0, int x1); + struct mp_repack *repack; // converting to/from planar // Temporary memory for slice-wise repacking. This may be set even if repack // is not set (then it may be used to avoid alignment issues). This has // about one slice worth of data. struct mp_image *tmp; - // Temporary, per-call source/target frame. (Regrettably a mutable field, - // but it's not the only one, and makes the callbacks much less of a mess - // by avoiding another "closure" indirection.) - // To be used by the repack callback. - struct mp_image *mpi; - - // Also temporary, per-call. use_buf[n] == plane n uses tmp (and not mpi). - bool use_buf[4]; + // Temporary memory for zimg buffer. + zimg_image_buffer zbuf; + struct mp_image cropped_tmp; int real_w, real_h; // aligned size }; @@ -114,80 +121,94 @@ static void mp_zimg_update_from_cmdline(struct mp_zimg_context *ctx) ctx->opts = *opts; } -static zimg_chroma_location_e mp_to_z_chroma(enum mp_chroma_location cl) +static zimg_chroma_location_e pl_to_z_chroma(enum pl_chroma_location cl) { switch (cl) { - case MP_CHROMA_LEFT: return ZIMG_CHROMA_LEFT; - case MP_CHROMA_CENTER: return ZIMG_CHROMA_CENTER; - default: return ZIMG_CHROMA_LEFT; + case PL_CHROMA_LEFT: return ZIMG_CHROMA_LEFT; + case PL_CHROMA_CENTER: return ZIMG_CHROMA_CENTER; + case PL_CHROMA_TOP_LEFT: return ZIMG_CHROMA_TOP_LEFT; + case PL_CHROMA_TOP_CENTER: return ZIMG_CHROMA_TOP; + case PL_CHROMA_BOTTOM_LEFT: return ZIMG_CHROMA_BOTTOM_LEFT; + case PL_CHROMA_BOTTOM_CENTER: return ZIMG_CHROMA_BOTTOM; + default: return ZIMG_CHROMA_LEFT; } } -static zimg_matrix_coefficients_e mp_to_z_matrix(enum mp_csp csp) +static zimg_matrix_coefficients_e pl_to_z_matrix(enum pl_color_system csp) { switch (csp) { - case MP_CSP_BT_601: return ZIMG_MATRIX_BT470_BG; - case MP_CSP_BT_709: return ZIMG_MATRIX_BT709; - case MP_CSP_SMPTE_240M: return ZIMG_MATRIX_ST240_M; - case MP_CSP_BT_2020_NC: return ZIMG_MATRIX_BT2020_NCL; - case MP_CSP_BT_2020_C: return ZIMG_MATRIX_BT2020_CL; - case MP_CSP_RGB: return ZIMG_MATRIX_RGB; - case MP_CSP_XYZ: return ZIMG_MATRIX_RGB; - case MP_CSP_YCGCO: return ZIMG_MATRIX_YCGCO; + case PL_COLOR_SYSTEM_BT_601: return ZIMG_MATRIX_BT470_BG; + case PL_COLOR_SYSTEM_BT_709: return ZIMG_MATRIX_BT709; + case PL_COLOR_SYSTEM_SMPTE_240M: return ZIMG_MATRIX_ST240_M; + case PL_COLOR_SYSTEM_BT_2020_NC: return ZIMG_MATRIX_BT2020_NCL; + case PL_COLOR_SYSTEM_BT_2020_C: return ZIMG_MATRIX_BT2020_CL; + case PL_COLOR_SYSTEM_RGB: return ZIMG_MATRIX_RGB; + case PL_COLOR_SYSTEM_XYZ: return ZIMG_MATRIX_RGB; + case PL_COLOR_SYSTEM_YCGCO: return ZIMG_MATRIX_YCGCO; default: return ZIMG_MATRIX_BT709; } } -static zimg_transfer_characteristics_e mp_to_z_trc(enum mp_csp_trc trc) +static zimg_transfer_characteristics_e pl_to_z_trc(enum pl_color_transfer trc) { switch (trc) { - case MP_CSP_TRC_BT_1886: return ZIMG_TRANSFER_BT709; - case MP_CSP_TRC_SRGB: return ZIMG_TRANSFER_IEC_61966_2_1; - case MP_CSP_TRC_LINEAR: return ZIMG_TRANSFER_LINEAR; - case MP_CSP_TRC_GAMMA22: return ZIMG_TRANSFER_BT470_M; - case MP_CSP_TRC_GAMMA28: return ZIMG_TRANSFER_BT470_BG; - case MP_CSP_TRC_PQ: return ZIMG_TRANSFER_ST2084; - case MP_CSP_TRC_HLG: return ZIMG_TRANSFER_ARIB_B67; - case MP_CSP_TRC_GAMMA18: // ? - case MP_CSP_TRC_GAMMA20: - case MP_CSP_TRC_GAMMA24: - case MP_CSP_TRC_GAMMA26: - case MP_CSP_TRC_PRO_PHOTO: - case MP_CSP_TRC_V_LOG: - case MP_CSP_TRC_S_LOG1: - case MP_CSP_TRC_S_LOG2: // ? + case PL_COLOR_TRC_BT_1886: return ZIMG_TRANSFER_BT709; + case PL_COLOR_TRC_SRGB: return ZIMG_TRANSFER_IEC_61966_2_1; + case PL_COLOR_TRC_LINEAR: return ZIMG_TRANSFER_LINEAR; + case PL_COLOR_TRC_GAMMA22: return ZIMG_TRANSFER_BT470_M; + case PL_COLOR_TRC_GAMMA28: return ZIMG_TRANSFER_BT470_BG; + case PL_COLOR_TRC_PQ: return ZIMG_TRANSFER_ST2084; + case PL_COLOR_TRC_HLG: return ZIMG_TRANSFER_ARIB_B67; +#if HAVE_ZIMG_ST428 + case PL_COLOR_TRC_ST428: return ZIMG_TRANSFER_ST428; +#endif + case PL_COLOR_TRC_GAMMA18: // ? + case PL_COLOR_TRC_GAMMA20: + case PL_COLOR_TRC_GAMMA24: + case PL_COLOR_TRC_GAMMA26: + case PL_COLOR_TRC_PRO_PHOTO: + case PL_COLOR_TRC_V_LOG: + case PL_COLOR_TRC_S_LOG1: + case PL_COLOR_TRC_S_LOG2: // ? default: return ZIMG_TRANSFER_BT709; } } -static zimg_color_primaries_e mp_to_z_prim(enum mp_csp_prim prim) +static zimg_color_primaries_e mp_to_z_prim(enum pl_color_primaries prim) { switch (prim) { - case MP_CSP_PRIM_BT_601_525:return ZIMG_PRIMARIES_ST170_M; - case MP_CSP_PRIM_BT_601_625:return ZIMG_PRIMARIES_BT470_BG; - case MP_CSP_PRIM_BT_709: return ZIMG_PRIMARIES_BT709; - case MP_CSP_PRIM_BT_2020: return ZIMG_PRIMARIES_BT2020; - case MP_CSP_PRIM_BT_470M: return ZIMG_PRIMARIES_BT470_M; - case MP_CSP_PRIM_CIE_1931: return ZIMG_PRIMARIES_ST428; - case MP_CSP_PRIM_DCI_P3: return ZIMG_PRIMARIES_ST431_2; - case MP_CSP_PRIM_DISPLAY_P3:return ZIMG_PRIMARIES_ST432_1; - case MP_CSP_PRIM_APPLE: // ? - case MP_CSP_PRIM_ADOBE: - case MP_CSP_PRIM_PRO_PHOTO: - case MP_CSP_PRIM_V_GAMUT: - case MP_CSP_PRIM_S_GAMUT: // ? + case PL_COLOR_PRIM_BT_601_525:return ZIMG_PRIMARIES_ST170_M; + case PL_COLOR_PRIM_BT_601_625:return ZIMG_PRIMARIES_BT470_BG; + case PL_COLOR_PRIM_BT_709: return ZIMG_PRIMARIES_BT709; + case PL_COLOR_PRIM_BT_2020: return ZIMG_PRIMARIES_BT2020; + case PL_COLOR_PRIM_BT_470M: return ZIMG_PRIMARIES_BT470_M; + case PL_COLOR_PRIM_DCI_P3: return ZIMG_PRIMARIES_ST431_2; + case PL_COLOR_PRIM_DISPLAY_P3:return ZIMG_PRIMARIES_ST432_1; + case PL_COLOR_PRIM_EBU_3213: return ZIMG_PRIMARIES_EBU3213_E; + case PL_COLOR_PRIM_FILM_C: return ZIMG_PRIMARIES_FILM; + case PL_COLOR_PRIM_CIE_1931: + case PL_COLOR_PRIM_APPLE: // ? + case PL_COLOR_PRIM_ADOBE: + case PL_COLOR_PRIM_PRO_PHOTO: + case PL_COLOR_PRIM_V_GAMUT: + case PL_COLOR_PRIM_S_GAMUT: // ? + case PL_COLOR_PRIM_ACES_AP0: + case PL_COLOR_PRIM_ACES_AP1: default: return ZIMG_PRIMARIES_BT709; } } static void destroy_zimg(struct mp_zimg_context *ctx) { - free(ctx->zimg_tmp); - ctx->zimg_tmp = NULL; - zimg_filter_graph_free(ctx->zimg_graph); - ctx->zimg_graph = NULL; - TA_FREEP(&ctx->zimg_src); - TA_FREEP(&ctx->zimg_dst); + for (int n = 0; n < ctx->num_states; n++) { + struct mp_zimg_state *st = ctx->states[n]; + talloc_free(st->tmp_alloc); + zimg_filter_graph_free(st->graph); + TA_FREEP(&st->src); + TA_FREEP(&st->dst); + talloc_free(st); + } + ctx->num_states = 0; } static void free_mp_zimg(void *p) @@ -195,6 +216,7 @@ static void free_mp_zimg(void *p) struct mp_zimg_context *ctx = p; destroy_zimg(ctx); + TA_FREEP(&ctx->tp); } struct mp_zimg_context *mp_zimg_alloc(void) @@ -219,275 +241,57 @@ void mp_zimg_enable_cmdline_opts(struct mp_zimg_context *ctx, mp_zimg_update_from_cmdline(ctx); // first update } -static int repack_align(void *user, unsigned i, unsigned x0, unsigned x1) +static int repack_entrypoint(void *user, unsigned i, unsigned x0, unsigned x1) { struct mp_zimg_repack *r = user; - for (int p = 0; p < r->mpi->fmt.num_planes; p++) { - if (!r->use_buf[p]) - continue; - - int bpp = r->mpi->fmt.bytes[p]; - int xs = r->mpi->fmt.xs[p]; - int ys = r->mpi->fmt.ys[p]; - // Number of lines on this plane. - int h = (1 << r->mpi->fmt.chroma_ys) - (1 << ys) + 1; - - for (int y = i; y < i + h; y++) { - void *a = r->mpi->planes[p] + - r->mpi->stride[p] * (ptrdiff_t)(y >> ys) + - bpp * (x0 >> xs); - void *b = r->tmp->planes[p] + - r->tmp->stride[p] * (ptrdiff_t)((y >> ys) & r->zmask[p]) + - bpp * (x0 >> xs); - size_t size = ((x1 - x0) >> xs) * bpp; - if (r->pack) { - memcpy(a, b, size); - } else { - memcpy(b, a, size); - } - } - } - - return 0; -} - -// PA = PAck, copy planar input to single packed array -// UN = UNpack, copy packed input to planar output -// Naming convention: -// pa_/un_ prefix to identify conversion direction. -// Left (LSB, lowest byte address) -> Right (MSB, highest byte address). -// (This is unusual; MSB to LSB is more commonly used to describe formats, -// but our convention makes more sense for byte access in little endian.) -// "c" identifies a color component. -// "z" identifies known zero padding. -// "x" identifies uninitialized padding. -// A component is followed by its size in bits. -// Size can be omitted for multiple uniform components (c8c8c8 == ccc8). -// Unpackers will often use "x" for padding, because they ignore it, while -// packers will use "z" because they write zero. - -#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3) \ - static void name(void *dst, void *src[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - ((packed_t *)dst)[x] = \ - ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ - ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \ - ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) | \ - ((packed_t)((plane_t *)src[3])[x] << (sh_c3)); \ - } \ - } - -#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\ - static void name(void *src, void *dst[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - packed_t c = ((packed_t *)src)[x]; \ - ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ - ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ - ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \ - ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask); \ - } \ - } - - -#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad) \ - static void name(void *dst, void *src[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - ((packed_t *)dst)[x] = (pad) | \ - ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ - ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \ - ((packed_t)((plane_t *)src[2])[x] << (sh_c2)); \ - } \ - } - -UN_WORD_4(un_cccc8, uint32_t, uint8_t, 0, 8, 16, 24, 0xFFu) -PA_WORD_4(pa_cccc8, uint32_t, uint8_t, 0, 8, 16, 24) -// Not sure if this is a good idea; there may be no alignment guarantee. -UN_WORD_4(un_cccc16, uint64_t, uint16_t, 0, 16, 32, 48, 0xFFFFu) -PA_WORD_4(pa_cccc16, uint64_t, uint16_t, 0, 16, 32, 48) - -#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask) \ - static void name(void *src, void *dst[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - packed_t c = ((packed_t *)src)[x]; \ - ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ - ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ - ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \ - } \ - } - -UN_WORD_3(un_ccc8x8, uint32_t, uint8_t, 0, 8, 16, 0xFFu) -PA_WORD_3(pa_ccc8z8, uint32_t, uint8_t, 0, 8, 16, 0) -UN_WORD_3(un_x8ccc8, uint32_t, uint8_t, 8, 16, 24, 0xFFu) -PA_WORD_3(pa_z8ccc8, uint32_t, uint8_t, 8, 16, 24, 0) -UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu) -PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 20, 10, 0, 0) - -#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad) \ - static void name(void *dst, void *src[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - ((packed_t *)dst)[x] = (pad) | \ - ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ - ((packed_t)((plane_t *)src[1])[x] << (sh_c1)); \ - } \ - } - -#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask) \ - static void name(void *src, void *dst[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - packed_t c = ((packed_t *)src)[x]; \ - ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ - ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ - } \ - } - -UN_WORD_2(un_cc8, uint16_t, uint8_t, 0, 8, 0xFFu) -PA_WORD_2(pa_cc8, uint16_t, uint8_t, 0, 8, 0) -UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu) -PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0) - -#define PA_SEQ_3(name, comp_t) \ - static void name(void *dst, void *src[], int x0, int x1) { \ - comp_t *r = dst; \ - for (int x = x0; x < x1; x++) { \ - *r++ = ((comp_t *)src[0])[x]; \ - *r++ = ((comp_t *)src[1])[x]; \ - *r++ = ((comp_t *)src[2])[x]; \ - } \ - } - -#define UN_SEQ_3(name, comp_t) \ - static void name(void *src, void *dst[], int x0, int x1) { \ - comp_t *r = src; \ - for (int x = x0; x < x1; x++) { \ - ((comp_t *)dst[0])[x] = *r++; \ - ((comp_t *)dst[1])[x] = *r++; \ - ((comp_t *)dst[2])[x] = *r++; \ - } \ - } - -UN_SEQ_3(un_ccc8, uint8_t) -PA_SEQ_3(pa_ccc8, uint8_t) -UN_SEQ_3(un_ccc16, uint16_t) -PA_SEQ_3(pa_ccc16, uint16_t) - -// "regular": single packed plane, all components have same width (except padding) -struct regular_repacker { - int packed_width; // number of bits of the packed pixel - int component_width; // number of bits for a single component - int prepadding; // number of bits of LSB padding - int num_components; // number of components that can be accessed - void (*pa_scanline)(void *p1, void *p2[], int x0, int x1); - void (*un_scanline)(void *p1, void *p2[], int x0, int x1); -}; - -static const struct regular_repacker regular_repackers[] = { - {32, 8, 0, 3, pa_ccc8z8, un_ccc8x8}, - {32, 8, 8, 3, pa_z8ccc8, un_x8ccc8}, - {32, 8, 0, 4, pa_cccc8, un_cccc8}, - {64, 16, 0, 4, pa_cccc16, un_cccc16}, - {24, 8, 0, 3, pa_ccc8, un_ccc8}, - {48, 16, 0, 3, pa_ccc16, un_ccc16}, - {16, 8, 0, 2, pa_cc8, un_cc8}, - {32, 16, 0, 2, pa_cc16, un_cc16}, - {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2}, -}; - -static int packed_repack(void *user, unsigned i, unsigned x0, unsigned x1) -{ - struct mp_zimg_repack *r = user; - - uint32_t *p1 = - (void *)(r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)i); - - void *p2[4] = {0}; - for (int p = 0; p < r->num_planes; p++) { - int s = r->components[p]; - p2[p] = r->tmp->planes[s] + - r->tmp->stride[s] * (ptrdiff_t)(i & r->zmask[s]); - } - - r->packed_repack_scanline(p1, p2, x0, x1); - - return 0; -} - -static int unpack_pal(void *user, unsigned i, unsigned x0, unsigned x1) -{ - struct mp_zimg_repack *r = user; + // If reading is not aligned, just read slightly more data. + if (!r->pack) + x0 &= ~(unsigned)(mp_repack_get_align_x(r->repack) - 1); - uint8_t *src = (void *)(r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)i); - uint32_t *pal = (void *)r->mpi->planes[1]; + // mp_repack requirements and zimg guarantees. + assert(!(i & (mp_repack_get_align_y(r->repack) - 1))); + assert(!(x0 & (mp_repack_get_align_x(r->repack) - 1))); - uint8_t *dst[4] = {0}; - for (int p = 0; p < r->num_planes; p++) { - dst[p] = r->tmp->planes[p] + - r->tmp->stride[p] * (ptrdiff_t)(i & r->zmask[p]); - } + unsigned i_src = i & (r->pack ? r->zmask[0] : ZIMG_BUFFER_MAX); + unsigned i_dst = i & (r->pack ? ZIMG_BUFFER_MAX : r->zmask[0]); - for (int x = x0; x < x1; x++) { - uint32_t c = pal[src[x]]; - dst[0][x] = (c >> 8) & 0xFF; // G - dst[1][x] = (c >> 0) & 0xFF; // B - dst[2][x] = (c >> 16) & 0xFF; // R - dst[3][x] = (c >> 24) & 0xFF; // A - } + repack_line(r->repack, x0, i_dst, x0, i_src, x1 - x0); return 0; } -static int repack_nv(void *user, unsigned i, unsigned x0, unsigned x1) +static bool wrap_buffer(struct mp_zimg_state *st, struct mp_zimg_repack *r, + struct mp_image *a_mpi) { - struct mp_zimg_repack *r = user; + zimg_image_buffer *buf = &r->zbuf; + *buf = (zimg_image_buffer){ZIMG_API_VERSION}; - int xs = r->mpi->fmt.chroma_xs; - int ys = r->mpi->fmt.chroma_ys; - - if (r->use_buf[0]) { - // Copy Y. - int l_h = 1 << ys; - for (int y = i; y < i + l_h; y++) { - ptrdiff_t bpp = r->mpi->fmt.bytes[0]; - void *a = r->mpi->planes[0] + - r->mpi->stride[0] * (ptrdiff_t)y + bpp * x0; - void *b = r->tmp->planes[0] + - r->tmp->stride[0] * (ptrdiff_t)(y & r->zmask[0]) + bpp * x0; - size_t size = (x1 - x0) * bpp; - if (r->pack) { - memcpy(a, b, size); - } else { - memcpy(b, a, size); - } + struct mp_image *mpi = a_mpi; + if (r->pack) { + mpi = &r->cropped_tmp; + *mpi = *a_mpi; + int y1 = st->slice_y + st->slice_h; + // Due to subsampling we may assume the image to be bigger than it + // actually is (see real_h in setup_format). + if (mpi->h < y1) { + assert(y1 - mpi->h < 4); + mp_image_set_size(mpi, mpi->w, y1); } + mp_image_crop(mpi, 0, st->slice_y, mpi->w, y1); } - uint32_t *p1 = - (void *)(r->mpi->planes[1] + r->mpi->stride[1] * (ptrdiff_t)(i >> ys)); + bool direct[MP_MAX_PLANES] = {0}; - void *p2[2]; - for (int p = 0; p < 2; p++) { - int s = r->components[p]; - p2[p] = r->tmp->planes[s] + - r->tmp->stride[s] * (ptrdiff_t)((i >> ys) & r->zmask[s]); + for (int p = 0; p < mpi->num_planes; p++) { + // If alignment is good, try to avoid copy. + direct[p] = !((uintptr_t)mpi->planes[p] % ZIMG_ALIGN) && + !(mpi->stride[p] % ZIMG_ALIGN); } - r->packed_repack_scanline(p1, p2, x0 >> xs, x1 >> xs); - - return 0; -} - -static void wrap_buffer(struct mp_zimg_repack *r, - zimg_image_buffer *buf, - zimg_filter_graph_callback *cb, - struct mp_image *mpi) -{ - *buf = (zimg_image_buffer){ZIMG_API_VERSION}; - - bool plane_aligned[4] = {0}; - for (int n = 0; n < r->num_planes; n++) { - plane_aligned[n] = !((uintptr_t)mpi->planes[n] % ZIMG_ALIGN) && - !(mpi->stride[n] % ZIMG_ALIGN); - } + if (!repack_config_buffers(r->repack, 0, r->pack ? mpi : r->tmp, + 0, r->pack ? r->tmp : mpi, direct)) + return false; for (int n = 0; n < MP_ARRAY_SIZE(buf->plane); n++) { // Note: this is really the only place we have to care about plane @@ -498,229 +302,68 @@ static void wrap_buffer(struct mp_zimg_repack *r, if (mplane < 0) continue; - r->use_buf[mplane] = !plane_aligned[mplane]; - if (!(r->pass_through_y && mplane == 0)) - r->use_buf[mplane] |= !!r->repack; - - struct mp_image *tmpi = r->use_buf[mplane] ? r->tmp : mpi; + struct mp_image *tmpi = direct[mplane] ? mpi : r->tmp; buf->plane[n].data = tmpi->planes[mplane]; buf->plane[n].stride = tmpi->stride[mplane]; - buf->plane[n].mask = r->use_buf[mplane] ? r->zmask[mplane] - : ZIMG_BUFFER_MAX; + buf->plane[n].mask = direct[mplane] ? ZIMG_BUFFER_MAX : r->zmask[mplane]; } - *cb = r->repack ? r->repack : repack_align; - - r->mpi = mpi; -} - -static void setup_nv_packer(struct mp_zimg_repack *r) -{ - struct mp_regular_imgfmt desc; - if (!mp_get_regular_imgfmt(&desc, r->zimgfmt)) - return; - - // Check for NV. - if (desc.num_planes != 2) - return; - if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1) - return; - if (desc.planes[1].num_components != 2) - return; - int cr0 = desc.planes[1].components[0]; - int cr1 = desc.planes[1].components[1]; - if (cr0 > cr1) - MPSWAP(int, cr0, cr1); - if (cr0 != 2 || cr1 != 3) - return; - - // Construct equivalent planar format. - struct mp_regular_imgfmt desc2 = desc; - desc2.num_planes = 3; - desc2.planes[1].num_components = 1; - desc2.planes[1].components[0] = 2; - desc2.planes[2].num_components = 1; - desc2.planes[2].components[0] = 3; - // For P010. Strangely this concept exists only for the NV format. - if (desc2.component_pad > 0) - desc2.component_pad = 0; - - int planar_fmt = mp_find_regular_imgfmt(&desc2); - if (!planar_fmt) - return; - - for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) { - const struct regular_repacker *pa = ®ular_repackers[i]; - - void (*repack_cb)(void *p1, void *p2[], int x0, int x1) = - r->pack ? pa->pa_scanline : pa->un_scanline; - - if (pa->packed_width != desc.component_size * 2 * 8 || - pa->component_width != desc.component_size * 8 || - pa->num_components != 2 || - pa->prepadding != 0 || - !repack_cb) - continue; - - r->repack = repack_nv; - r->pass_through_y = true; - r->packed_repack_scanline = repack_cb; - r->zimgfmt = planar_fmt; - r->components[0] = desc.planes[1].components[0] - 1; - r->components[1] = desc.planes[1].components[1] - 1; - return; - } + return true; } -static void setup_misc_packer(struct mp_zimg_repack *r) +// (ctx and st can be NULL for probing.) +static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r, + bool pack, struct mp_image_params *user_fmt, + struct mp_zimg_context *ctx, + struct mp_zimg_state *st) { - // Although it's in regular_repackers[], the generic mpv imgfmt metadata - // can't handle it yet. - if (r->zimgfmt == IMGFMT_RGB30) { - struct mp_regular_imgfmt planar10 = { - .component_type = MP_COMPONENT_TYPE_UINT, - .component_size = 2, - .component_pad = -6, - .num_planes = 3, - .planes = { - {1, {1}}, - {1, {2}}, - {1, {3}}, - }, - .chroma_w = 1, - .chroma_h = 1, - }; - int planar_fmt = mp_find_regular_imgfmt(&planar10); - if (!planar_fmt) - return; - r->zimgfmt = planar_fmt; - r->repack = packed_repack; - r->packed_repack_scanline = r->pack ? pa_ccc10z2 : un_ccc10x2; - static int c_order[] = {3, 2, 1}; - for (int n = 0; n < 3; n++) - r->components[n] = c_order[n] - 1; - } else if (r->zimgfmt == IMGFMT_PAL8 && !r->pack) { - struct mp_regular_imgfmt gbrap = { - .component_type = MP_COMPONENT_TYPE_UINT, - .forced_csp = MP_CSP_RGB, - .component_size = 1, - .num_planes = 4, - .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}}, }, - .chroma_w = 1, - .chroma_h = 1, - }; - int grap_fmt = mp_find_regular_imgfmt(&gbrap); - if (!grap_fmt) - return; - r->zimgfmt = grap_fmt; - r->repack = unpack_pal; - } -} + r->fmt = *user_fmt; + r->pack = pack; -// Tries to set a packer/unpacker for component-wise byte aligned RGB formats. -static void setup_regular_rgb_packer(struct mp_zimg_repack *r) -{ - struct mp_regular_imgfmt desc; - if (!mp_get_regular_imgfmt(&desc, r->zimgfmt)) - return; + zimg_image_format_default(zfmt, ZIMG_API_VERSION); - if (desc.num_planes != 1 || desc.planes[0].num_components < 2) - return; - struct mp_regular_imgfmt_plane *p = &desc.planes[0]; - - int num_real_components = 0; - bool has_alpha = false; - for (int n = 0; n < p->num_components; n++) { - if (p->components[n]) { - has_alpha |= p->components[n] == 4; - num_real_components += 1; - } else { - // padding must be in MSB or LSB - if (n != 0 && n != p->num_components - 1) - return; - } - } + int rp_flags = 0; - int depth = desc.component_size * 8 + MPMIN(0, desc.component_pad); - - // Find a physically compatible planar format (typically IMGFMT_420P). - struct mp_regular_imgfmt desc2 = desc; - desc2.forced_csp = 0; - if (desc2.component_pad > 0) - desc2.component_pad = 0; - desc2.num_planes = num_real_components; - for (int n = 0; n < desc2.num_planes; n++) { - desc2.planes[n].num_components = 1; - desc2.planes[n].components[0] = n + 1; + // For e.g. RGB565, go to lowest depth on pack for less weird dithering. + if (r->pack) { + rp_flags |= REPACK_CREATE_ROUND_DOWN; + } else { + rp_flags |= REPACK_CREATE_EXPAND_8BIT; } - if (has_alpha) - desc2.planes[desc2.num_planes - 1].components[0] = 4; - int planar_fmt = mp_find_regular_imgfmt(&desc2); - if (!planar_fmt) - return; - - for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) { - const struct regular_repacker *pa = ®ular_repackers[i]; - // The following may assume little endian (because some repack backends - // use word access, while the metadata here uses byte access). + r->repack = mp_repack_create_planar(r->fmt.imgfmt, r->pack, rp_flags); + if (!r->repack) + return false; - int prepad = p->components[0] ? 0 : 8; - int first_comp = p->components[0] ? 0 : 1; - void (*repack_cb)(void *p1, void *p2[], int x0, int x1) = - r->pack ? pa->pa_scanline : pa->un_scanline; + int align_x = mp_repack_get_align_x(r->repack); - if (pa->packed_width != desc.component_size * p->num_components * 8 || - pa->component_width != depth || - pa->num_components != num_real_components || - pa->prepadding != prepad || - !repack_cb) - continue; + r->zimgfmt = r->pack ? mp_repack_get_format_src(r->repack) + : mp_repack_get_format_dst(r->repack); - r->repack = packed_repack; - r->packed_repack_scanline = repack_cb; - r->zimgfmt = planar_fmt; - for (int n = 0; n < num_real_components; n++) { - // Determine permutation that maps component order between the two - // formats, with has_alpha special case (see above). - int c = p->components[first_comp + n]; - r->components[n] = c == 4 ? num_real_components - 1 : c - 1; - } - return; + if (ctx) { + talloc_steal(r, r->repack); + } else { + TA_FREEP(&r->repack); } -} - -// (ctx can be NULL for the sake of probing.) -static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r, - struct mp_zimg_context *ctx) -{ - zimg_image_format_default(zfmt, ZIMG_API_VERSION); struct mp_image_params fmt = r->fmt; mp_image_params_guess_csp(&fmt); - r->zimgfmt = fmt.imgfmt; - - if (!r->repack) - setup_nv_packer(r); - if (!r->repack) - setup_misc_packer(r); - if (!r->repack) - setup_regular_rgb_packer(r); - struct mp_regular_imgfmt desc; if (!mp_get_regular_imgfmt(&desc, r->zimgfmt)) return false; + // Relies on zimg callbacks reading on 64 byte alignment. + if (!MP_IS_POWER_OF_2(align_x) || align_x > 64 / desc.component_size) + return false; + // no weird stuff - if (desc.num_planes > 4 || !MP_IS_POWER_OF_2(desc.chroma_w) || - !MP_IS_POWER_OF_2(desc.chroma_h)) + if (desc.num_planes > 4) return false; for (int n = 0; n < 4; n++) r->z_planes[n] = -1; - // Accept only true planar formats. for (int n = 0; n < desc.num_planes; n++) { if (desc.planes[n].num_components != 1) return false; @@ -735,7 +378,8 @@ static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r, r->z_planes[3] = n; // alpha, always plane 4 in zimg #if HAVE_ZIMG_ALPHA - zfmt->alpha = ZIMG_ALPHA_STRAIGHT; + zfmt->alpha = fmt.repr.alpha == PL_ALPHA_PREMULTIPLIED + ? ZIMG_ALPHA_PREMULTIPLIED : ZIMG_ALPHA_STRAIGHT; #else return false; #endif @@ -744,26 +388,36 @@ static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r, r->num_planes = desc.num_planes; + // Take care of input/output size, including slicing. // Note: formats with subsampled chroma may have odd width or height in // mpv and FFmpeg. This is because the width/height is actually a cropping // rectangle. Reconstruct the image allocation size and set the cropping. - zfmt->width = r->real_w = MP_ALIGN_UP(fmt.w, desc.chroma_w); - zfmt->height = r->real_h = MP_ALIGN_UP(fmt.h, desc.chroma_h); - if (!r->pack && ctx) { - // Relies on ctx->zimg_dst being initialized first. - struct mp_zimg_repack *dst = ctx->zimg_dst; - zfmt->active_region.width = dst->real_w * (double)fmt.w / dst->fmt.w; - zfmt->active_region.height = dst->real_h * (double)fmt.h / dst->fmt.h; + zfmt->width = r->real_w = MP_ALIGN_UP(fmt.w, 1 << desc.chroma_xs); + zfmt->height = r->real_h = MP_ALIGN_UP(fmt.h, 1 << desc.chroma_ys); + if (st) { + if (r->pack) { + zfmt->height = r->real_h = st->slice_h = + MPMIN(st->slice_y + st->slice_h, r->real_h) - st->slice_y; + + assert(MP_IS_ALIGNED(r->real_h, 1 << desc.chroma_ys)); + } else { + // Relies on st->dst being initialized first. + struct mp_zimg_repack *dst = st->dst; + + zfmt->active_region.width = dst->real_w * (double)fmt.w / dst->fmt.w; + zfmt->active_region.height = dst->real_h * st->scale_y; + zfmt->active_region.top = st->slice_y * st->scale_y; + } } - zfmt->subsample_w = mp_log2(desc.chroma_w); - zfmt->subsample_h = mp_log2(desc.chroma_h); + zfmt->subsample_w = desc.chroma_xs; + zfmt->subsample_h = desc.chroma_ys; zfmt->color_family = ZIMG_COLOR_YUV; if (desc.num_planes <= 2) { zfmt->color_family = ZIMG_COLOR_GREY; - } else if (fmt.color.space == MP_CSP_RGB || fmt.color.space == MP_CSP_XYZ) { + } else if (fmt.repr.sys == PL_COL |