diff options
-rw-r--r-- | test/ref/repack.txt | 163 | ||||
-rw-r--r-- | test/repack.c | 249 | ||||
-rw-r--r-- | test/tests.c | 23 | ||||
-rw-r--r-- | test/tests.h | 7 | ||||
-rw-r--r-- | video/img_format.h | 3 | ||||
-rw-r--r-- | video/repack.c | 1110 | ||||
-rw-r--r-- | video/repack.h | 72 | ||||
-rw-r--r-- | video/zimg.c | 954 | ||||
-rw-r--r-- | wscript_build.py | 2 |
9 files changed, 1688 insertions, 895 deletions
diff --git a/test/ref/repack.txt b/test/ref/repack.txt new file mode 100644 index 0000000000..3946a6eb47 --- /dev/null +++ b/test/ref/repack.txt @@ -0,0 +1,163 @@ +0bgr => [pa] [un] gbrp | a=1:1 [tu] [tp] +0rgb => [pa] [un] gbrp | a=1:1 [tu] [tp] +abgr => [pa] [un] gbrap | a=1:1 [tu] [tp] +argb => [pa] [un] gbrap | a=1:1 [tu] [tp] +ayuv64 => [pa] [un] yuva444p16 | a=1:1 [tu] [tp] +ayuv64be => [pa] [un] yuva444p16 | a=1:1 [tu] [tp] +bayer_bggr16 => no +bayer_bggr16be => no +bayer_bggr8 => no +bayer_gbrg16 => no +bayer_gbrg16be => no +bayer_gbrg8 => no +bayer_grbg16 => no +bayer_grbg16be => no +bayer_grbg8 => no +bayer_rggb16 => no +bayer_rggb16be => no +bayer_rggb8 => no +bgr0 => [pa] [un] gbrp | a=1:1 [tu] [tp] +bgr24 => [pa] [un] gbrp | a=1:1 +bgr4 => no +bgr444 => [pa] [un] gbrp4 | a=1:1 +bgr444 => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr444be => [pa] [un] gbrp4 | a=1:1 +bgr444be => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr48 => [pa] [un] gbrp16 | a=1:1 +bgr48be => [pa] [un] gbrp16 | a=1:1 +bgr4_byte => [pa] [un] gbrp2 | a=1:1 +bgr4_byte => [pa] [un] gbrp1 | a=1:1 [round-down] +bgr4_byte => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr555 => [pa] [un] gbrp5 | a=1:1 +bgr555 => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr555be => [pa] [un] gbrp5 | a=1:1 +bgr555be => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr565 => [pa] [un] gbrp6 | a=1:1 +bgr565 => [pa] [un] gbrp5 | a=1:1 [round-down] +bgr565 => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr565be => [pa] [un] gbrp6 | a=1:1 +bgr565be => [pa] [un] gbrp5 | a=1:1 [round-down] +bgr565be => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr8 => [pa] [un] gbrp3 | a=1:1 +bgr8 => [pa] [un] gbrp2 | a=1:1 [round-down] +bgr8 => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgra => [pa] [un] gbrap | a=1:1 [tu] [tp] +bgra64 => [pa] [un] gbrap16 | a=1:1 +bgra64be => [pa] [un] gbrap16 | a=1:1 +cuda => no +d3d11 => no +d3d11va_vld => no +drm_prime => no +dxva2_vld => no +gbrap10be => [pa] [un] gbrap10 | a=1:1 +gbrap12be => [pa] [un] gbrap12 | a=1:1 +gbrap16be => [pa] [un] gbrap16 | a=1:1 +gbrapf32be => [pa] [un] gbrapf32 | a=1:1 +gbrp10be => [pa] [un] gbrp10 | a=1:1 +gbrp12be => [pa] [un] gbrp12 | a=1:1 +gbrp14be => [pa] [un] gbrp14 | a=1:1 +gbrp16be => [pa] [un] gbrp16 | a=1:1 +gbrp9be => [pa] [un] gbrp9 | a=1:1 +gbrpf32be => [pa] [un] gbrpf32 | a=1:1 +gray10be => [pa] [un] gray10 | a=1:1 +gray12be => [pa] [un] gray12 | a=1:1 +gray14be => [pa] [un] gray14 | a=1:1 +gray16be => [pa] [un] gray16 | a=1:1 +gray9be => [pa] [un] gray9 | a=1:1 +grayf32be => [pa] [un] grayf32 | a=1:1 +mediacodec => no +mmal => no +monob => [pa] [un] y1 | a=8:1 [tu] [tp] +monob => [pa] [un] gray | a=8:1 [expand-8bit] +monow => [pa] [un] y1 | a=8:1 [tu] [tp] +monow => [pa] [un] gray | a=8:1 [expand-8bit] +nv12 => [pa] [un] yuv420p | a=2:2 [tu] [tp] +nv16 => [pa] [un] yuv422p | a=2:1 +nv20 => [pa] [un] yuv422p10 | a=2:1 +nv20be => [pa] [un] yuv422p10 | a=2:1 +nv21 => [pa] [un] yuv420p | a=2:2 [tu] [tp] +nv24 => [pa] [un] yuv444p | a=1:1 +nv42 => [pa] [un] yuv444p | a=1:1 +opencl => no +p010 => [pa] [un] yuv420p16 | a=2:2 +p010be => [pa] [un] yuv420p16 | a=2:2 +p016 => [pa] [un] yuv420p16 | a=2:2 +p016be => [pa] [un] yuv420p16 | a=2:2 +pal8 => [un] gbrap | a=1:1 +qsv => no +rgb0 => [pa] [un] gbrp | a=1:1 [tu] [tp] +rgb24 => [pa] [un] gbrp | a=1:1 +rgb30 => [pa] [un] gbrp10 | a=1:1 +rgb4 => no +rgb444 => [pa] [un] gbrp4 | a=1:1 +rgb444 => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb444be => [pa] [un] gbrp4 | a=1:1 +rgb444be => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb48 => [pa] [un] gbrp16 | a=1:1 +rgb48be => [pa] [un] gbrp16 | a=1:1 [tu] [tp] +rgb4_byte => [pa] [un] gbrp2 | a=1:1 +rgb4_byte => [pa] [un] gbrp1 | a=1:1 [round-down] +rgb4_byte => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb555 => [pa] [un] gbrp5 | a=1:1 +rgb555 => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb555be => [pa] [un] gbrp5 | a=1:1 +rgb555be => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb565 => [pa] [un] gbrp6 | a=1:1 +rgb565 => [pa] [un] gbrp5 | a=1:1 [round-down] +rgb565 => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb565be => [pa] [un] gbrp6 | a=1:1 +rgb565be => [pa] [un] gbrp5 | a=1:1 [round-down] +rgb565be => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb8 => [pa] [un] gbrp3 | a=1:1 +rgb8 => [pa] [un] gbrp2 | a=1:1 [round-down] +rgb8 => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgba => [pa] [un] gbrap | a=1:1 [tu] [tp] +rgba64 => [pa] [un] gbrap16 | a=1:1 [tu] [tp] +rgba64be => [pa] [un] gbrap16 | a=1:1 +uyvy422 => [pa] [un] yuv422p | a=2:1 +uyyvyy411 => no +vaapi => no +vaapi_idct => no +vaapi_moco => no +vdpau => no +vdpau_output => no +videotoolbox => no +vulkan => no +xvmc => no +xyz12 => [pa] [un] gbrp16 | a=1:1 +xyz12be => [pa] [un] gbrp16 | a=1:1 +y210 => [pa] [un] yuv422p16 | a=2:1 +y210be => [pa] [un] yuv422p16 | a=2:1 +ya16 => [pa] [un] yap16 | a=1:1 [tu] [tp] +ya16be => [pa] [un] yap16 | a=1:1 +ya8 => [pa] [un] yap8 | a=1:1 +yuv420p10be => [pa] [un] yuv420p10 | a=2:2 +yuv420p12be => [pa] [un] yuv420p12 | a=2:2 +yuv420p14be => [pa] [un] yuv420p14 | a=2:2 +yuv420p16be => [pa] [un] yuv420p16 | a=2:2 +yuv420p9be => [pa] [un] yuv420p9 | a=2:2 +yuv422p10be => [pa] [un] yuv422p10 | a=2:1 +yuv422p12be => [pa] [un] yuv422p12 | a=2:1 +yuv422p14be => [pa] [un] yuv422p14 | a=2:1 +yuv422p16be => [pa] [un] yuv422p16 | a=2:1 [tu] [tp] +yuv422p9be => [pa] [un] yuv422p9 | a=2:1 +yuv440p10be => [pa] [un] yuv440p10 | a=1:2 +yuv440p12be => [pa] [un] yuv440p12 | a=1:2 +yuv444p10be => [pa] [un] yuv444p10 | a=1:1 +yuv444p12be => [pa] [un] yuv444p12 | a=1:1 +yuv444p14be => [pa] [un] yuv444p14 | a=1:1 +yuv444p16be => [pa] [un] yuv444p16 | a=1:1 +yuv444p9be => [pa] [un] yuv444p9 | a=1:1 +yuva420p10be => [pa] [un] yuva420p10 | a=2:2 +yuva420p16be => [pa] [un] yuva420p16 | a=2:2 +yuva420p9be => [pa] [un] yuva420p9 | a=2:2 +yuva422p10be => [pa] [un] yuva422p10 | a=2:1 +yuva422p12be => [pa] [un] yuva422p12 | a=2:1 +yuva422p16be => [pa] [un] yuva422p16 | a=2:1 +yuva422p9be => [pa] [un] yuva422p9 | a=2:1 +yuva444p10be => [pa] [un] yuva444p10 | a=1:1 +yuva444p12be => [pa] [un] yuva444p12 | a=1:1 +yuva444p16be => [pa] [un] yuva444p16 | a=1:1 +yuva444p9be => [pa] [un] yuva444p9 | a=1:1 +yuyv422 => [pa] [un] yuv422p | a=2:1 +yvyu422 => [pa] [un] yuv422p | a=2:1 [tu] [tp] diff --git a/test/repack.c b/test/repack.c new file mode 100644 index 0000000000..ede6046350 --- /dev/null +++ b/test/repack.c @@ -0,0 +1,249 @@ +#include <libavutil/pixfmt.h> + +#include "common/common.h" +#include "tests.h" +#include "video/fmt-conversion.h" +#include "video/img_format.h" +#include "video/repack.h" +#include "video/zimg.h" + +// Excuse the utter stupidity. +#define UNFUCK(v) ((v) > 0 ? (v) : pixfmt2imgfmt(-(v))) +static_assert(IMGFMT_START > 0, ""); +#define IMGFMT_GBRP (-AV_PIX_FMT_GBRP) +#define IMGFMT_GBRAP (-AV_PIX_FMT_GBRAP) + +struct entry { + int w, h; + int fmt_a; + const void *const a[4]; + int fmt_b; + const void *const b[4]; + int flags; +}; + +#define P8(...) (const uint8_t[]){__VA_ARGS__} +#define P16(...) (const uint16_t[]){__VA_ARGS__} + +// Warning: only entries that match existing conversions are tested. +static const struct entry repack_tests[] = { + // Note: the '0' tests rely on 0 being written, although by definition the + // contents of this padding is undefined. The repacker always writes + // it this way, though. + {1, 1, IMGFMT_RGB0, {P8(1, 2, 3, 0)}, + IMGFMT_GBRP, {P8(2), P8(3), P8(1)}}, + {1, 1, IMGFMT_BGR0, {P8(1, 2, 3, 0)}, + IMGFMT_GBRP, {P8(2), P8(1), P8(3)}}, + {1, 1, IMGFMT_0RGB, {P8(0, 1, 2, 3)}, + IMGFMT_GBRP, {P8(2), P8(3), P8(1)}}, + {1, 1, IMGFMT_0BGR, {P8(0, 1, 2, 3)}, + IMGFMT_GBRP, {P8(2), P8(1), P8(3)}}, + {1, 1, IMGFMT_RGBA, {P8(1, 2, 3, 4)}, + IMGFMT_GBRAP, {P8(2), P8(3), P8(1), P8(4)}}, + {1, 1, IMGFMT_BGRA, {P8(1, 2, 3, 4)}, + IMGFMT_GBRAP, {P8(2), P8(1), P8(3), P8(4)}}, + {1, 1, IMGFMT_ARGB, {P8(4, 1, 2, 3)}, + IMGFMT_GBRAP, {P8(2), P8(3), P8(1), P8(4)}}, + {1, 1, IMGFMT_ABGR, {P8(4, 1, 2, 3)}, + IMGFMT_GBRAP, {P8(2), P8(1), P8(3), P8(4)}}, + {1, 1, IMGFMT_RGBA64, {P16(0x1a1b, 0x2a2b, 0x3a3b, 0x4a4b)}, + -AV_PIX_FMT_GBRAP16, {P16(0x2a2b), P16(0x3a3b), + P16(0x1a1b), P16(0x4a4b)}}, + {1, 1, -AV_PIX_FMT_RGB48BE, {P16(0x1a1b, 0x2a2b, 0x3a3b)}, + -AV_PIX_FMT_GBRP16, {P16(0x2b2a), P16(0x3b3a), + P16(0x1b1a)}}, + {8, 1, -AV_PIX_FMT_MONOWHITE, {P8(0xAA)}, + IMGFMT_Y1, {P8(0, 1, 0, 1, 0, 1, 0, 1)}}, + {8, 1, -AV_PIX_FMT_MONOBLACK, {P8(0xAA)}, + IMGFMT_Y1, {P8(1, 0, 1, 0, 1, 0, 1, 0)}}, + {2, 2, IMGFMT_NV12, {P8(1, 2, 3, 4), P8(5, 6)}, + IMGFMT_420P, {P8(1, 2, 3, 4), P8(5), P8(6)}}, + {2, 2, -AV_PIX_FMT_NV21, {P8(1, 2, 3, 4), P8(5, 6)}, + IMGFMT_420P, {P8(1, 2, 3, 4), P8(6), P8(5)}}, + {1, 1, -AV_PIX_FMT_AYUV64, {P16(1, 2, 3, 4)}, + -AV_PIX_FMT_YUVA444P16, {P16(2), P16(3), P16(4), P16(1)}}, + {1, 1, -AV_PIX_FMT_AYUV64BE, {P16(0x0100, 0x0200, 0x0300, 0x0400)}, + -AV_PIX_FMT_YUVA444P16, {P16(2), P16(3), P16(4), P16(1)}}, + {2, 1, -AV_PIX_FMT_YVYU422, {P8(1, 2, 3, 4)}, + -AV_PIX_FMT_YUV422P, {P8(1, 3), P8(4), P8(2)}}, + {1, 1, -AV_PIX_FMT_YA16, {P16(1, 2)}, + IMGFMT_YAP16, {P16(1), P16(2)}}, + {2, 1, -AV_PIX_FMT_YUV422P16BE, {P16(0x1a1b, 0x2a2b), P16(0x3a3b), + P16(0x4a4b)}, + -AV_PIX_FMT_YUV422P16, {P16(0x1b1a, 0x2b2a), P16(0x3b3a), + P16(0x4b4a)}}, +}; + +static bool is_true_planar(int imgfmt) +{ + struct mp_regular_imgfmt desc; + if (!mp_get_regular_imgfmt(&desc, imgfmt)) + return false; + + for (int n = 0; n < desc.num_planes; n++) { + if (desc.planes[n].num_components != 1) + return false; + } + + return true; +} + +static int try_repack(struct test_ctx *ctx, FILE *f, int imgfmt, int flags, + int not_if_fmt) +{ + char *head = mp_tprintf(80, "%-15s =>", mp_imgfmt_to_name(imgfmt)); + struct mp_repack *un = mp_repack_create_planar(imgfmt, false, flags); + struct mp_repack *pa = mp_repack_create_planar(imgfmt, true, flags); + + // If both exists, they must be always symmetric. + if (un && pa) { + assert(mp_repack_get_format_src(pa) == mp_repack_get_format_dst(un)); + assert(mp_repack_get_format_src(un) == mp_repack_get_format_dst(pa)); + assert(mp_repack_get_align_x(pa) == mp_repack_get_align_x(un)); + assert(mp_repack_get_align_y(pa) == mp_repack_get_align_y(un)); + } + + int a = 0; + int b = 0; + if (un) { + a = mp_repack_get_format_src(un); + b = mp_repack_get_format_dst(un); + } else if (pa) { + a = mp_repack_get_format_dst(pa); + b = mp_repack_get_format_src(pa); + } + + // Skip the identity ones because they're uninteresting, and add too much + // noise. But still make sure they behave as expected. + if (is_true_planar(imgfmt)) { + // (note that we require alpha-enabled zimg) + assert(mp_zimg_supports_in_format(imgfmt)); + assert(un && pa); + assert(a == imgfmt && b == imgfmt); + talloc_free(pa); + talloc_free(un); + return 0; + } + + struct mp_repack *rp = pa ? pa : un; + if (!rp) { + if (!flags) + fprintf(f, "%s no\n", head); + return 0; + } + + assert(a == imgfmt); + if (b && b == not_if_fmt) { + talloc_free(pa); + talloc_free(un); + return 0; + } + + fprintf(f, "%s %4s %4s %-15s |", head, pa ? "[pa]" : "", un ? "[un]" : "", + mp_imgfmt_to_name(b)); + + fprintf(f, " a=%d:%d", mp_repack_get_align_x(rp), mp_repack_get_align_y(rp)); + + if (flags & REPACK_CREATE_ROUND_DOWN) + fprintf(f, " [round-down]"); + if (flags & REPACK_CREATE_EXPAND_8BIT) + fprintf(f, " [expand-8bit]"); + + // LCM of alignment of all packers. + int ax = mp_repack_get_align_x(rp); + int ay = mp_repack_get_align_y(rp); + if (pa && un) { + ax = MPMAX(mp_repack_get_align_x(pa), mp_repack_get_align_x(un)); + ay = MPMAX(mp_repack_get_align_y(pa), mp_repack_get_align_y(un)); + } + + for (int n = 0; n < MP_ARRAY_SIZE(repack_tests); n++) { + const struct entry *e = &repack_tests[n]; + int fmt_a = UNFUCK(e->fmt_a); + int fmt_b = UNFUCK(e->fmt_b); + if (!(fmt_a == a && fmt_b == b && e->flags == flags)) + continue; + + // We convert a "random" macro pixel to catch potential addressing bugs + // that might be ignored with (0, 0) origins. + struct mp_image *ia = mp_image_alloc(fmt_a, e->w * 5 * ax, e->h * 5 * ay); + struct mp_image *ib = mp_image_alloc(fmt_b, e->w * 7 * ax, e->h * 6 * ay); + int sx = 4 * ax, sy = 3 * ay, dx = 3 * ax, dy = 2 * ay; + + assert(ia && ib); + + for (int pack = 0; pack < 2; pack++) { + struct mp_repack *repacker = pack ? pa : un; + if (!repacker) + continue; + + mp_image_clear(ia, 0, 0, ia->w, ia->h); + mp_image_clear(ib, 0, 0, ib->w, ib->h); + + const void *const *dstd = pack ? e->a : e->b; + const void *const *srcd = pack ? e->b : e->a; + struct mp_image *dsti = pack ? ia : ib; + struct mp_image *srci = pack ? ib : ia; + + bool r = repack_config_buffers(repacker, 0, dsti, 0, srci, NULL); + assert(r); + + for (int p = 0; p < srci->num_planes; p++) { + uint8_t *ptr = mp_image_pixel_ptr(srci, p, sx, sy); + for (int y = 0; y < e->h >> srci->fmt.ys[p]; y++) { + int w = e->w >> srci->fmt.xs[p]; + int wb = (w * srci->fmt.bpp[p] + 7) / 8; + const void *cptr = (uint8_t *)srcd[p] + wb * y; + memcpy(ptr + srci->stride[p] * y, cptr, wb); + } + } + + repack_line(repacker, dx, dy, sx, sy, e->w); + + for (int p = 0; p < dsti->num_planes; p++) { + uint8_t *ptr = mp_image_pixel_ptr(dsti, p, dx, dy); + for (int y = 0; y < e->h >> dsti->fmt.ys[p]; y++) { + int w = e->w >> dsti->fmt.xs[p]; + int wb = (w * dsti->fmt.bpp[p] + 7) / 8; + const void *cptr = (uint8_t *)dstd[p] + wb * y; + assert_memcmp(ptr + dsti->stride[p] * y, cptr, wb); + } + } + + fprintf(f, " [t%s]", pack ? "p" : "u"); + } + + talloc_free(ia); + talloc_free(ib); + } + + fprintf(f, "\n"); + + talloc_free(pa); + talloc_free(un); + return b; +} + +static void run(struct test_ctx *ctx) +{ + FILE *f = test_open_out(ctx, "repack.txt"); + + init_imgfmts_list(); + for (int n = 0; n < num_imgfmts; n++) { + int imgfmt = imgfmts[n]; + + int other = try_repack(ctx, f, imgfmt, 0, 0); + try_repack(ctx, f, imgfmt, REPACK_CREATE_ROUND_DOWN, other); + try_repack(ctx, f, imgfmt, REPACK_CREATE_EXPAND_8BIT, other); + } + + fclose(f); + + assert_text_files_equal(ctx, "repack.txt", "repack.txt", + "This can fail if FFmpeg/libswscale adds or removes pixfmts."); +} + +const struct unittest test_repack = { + .name = "repack", + .run = run, +}; diff --git a/test/tests.c b/test/tests.c index 9ef88f4a8d..d8df43f319 100644 --- a/test/tests.c +++ b/test/tests.c @@ -12,6 +12,7 @@ static const struct unittest *unittests[] = { &test_paths, &test_repack_sws, #if HAVE_ZIMG + &test_repack, // zimg only due to cross-checking with zimg.c &test_repack_zimg, #endif NULL @@ -128,3 +129,25 @@ void assert_text_files_equal_impl(const char *file, int line, abort(); } } + +static void hexdump(const uint8_t *d, size_t size) +{ + printf("|"); + while (size--) { + printf(" %02x", d[0]); + d++; + } + printf(" |\n"); +} + +void assert_memcmp_impl(const char *file, int line, + const void *a, const void *b, size_t size) +{ + if (memcmp(a, b, size) == 0) + return; + + printf("%s:%d: mismatching data:\n", file, line); + hexdump(a, size); + hexdump(b, size); + abort(); +} diff --git a/test/tests.h b/test/tests.h index f4065f596f..8b2eb98174 100644 --- a/test/tests.h +++ b/test/tests.h @@ -43,6 +43,7 @@ extern const struct unittest test_json; extern const struct unittest test_linked_list; extern const struct unittest test_repack_sws; extern const struct unittest test_repack_zimg; +extern const struct unittest test_repack; extern const struct unittest test_paths; #define assert_true(x) assert(x) @@ -54,6 +55,10 @@ extern const struct unittest test_paths; #define assert_float_equal(a, b, tolerance) \ assert_float_equal_impl(__FILE__, __LINE__, (a), (b), (tolerance)) +// Assert that memcmp(a,b,s)==0, or hexdump output on failure. +#define assert_memcmp(a, b, s) \ + assert_memcmp_impl(__FILE__, __LINE__, (a), (b), (s)) + // Require that the files "ref" and "new" are the same. The paths can be // relative to ref_path and out_path respectively. If they're not the same, // the output of "diff" is shown, the err message (if not NULL), and the test @@ -69,6 +74,8 @@ void assert_float_equal_impl(const char *file, int line, void assert_text_files_equal_impl(const char *file, int line, struct test_ctx *ctx, const char *ref, const char *new, const char *err); +void assert_memcmp_impl(const char *file, int line, + const void *a, const void *b, size_t size); // Open a new file in the out_path. Always succeeds. FILE *test_open_out(struct test_ctx *ctx, const char *name); diff --git a/video/img_format.h b/video/img_format.h index 8e55cc9493..b0fdef8a50 100644 --- a/video/img_format.h +++ b/video/img_format.h @@ -69,8 +69,9 @@ struct mp_imgfmt_desc { int flags; // MP_IMGFLAG_* bitfield int8_t num_planes; int8_t chroma_xs, chroma_ys; // chroma shift (i.e. log2 of chroma pixel size) - int8_t align_x, align_y; // pixel size to get byte alignment and to get + int8_t align_x, align_y; // pixel count to get byte alignment and to get // to a pixel pos where luma & chroma aligns + // always power of 2 int8_t bytes[MP_MAX_PLANES]; // bytes per pixel (MP_IMGFLAG_BYTE_ALIGNED) int8_t bpp[MP_MAX_PLANES]; // bits per pixel int8_t plane_bits; // number of bits in use for plane 0 diff --git a/video/repack.c b/video/repack.c new file mode 100644 index 0000000000..359e32996d --- /dev/null +++ b/video/repack.c @@ -0,0 +1,1110 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <math.h> + +#include <libavutil/bswap.h> +#include <libavutil/pixfmt.h> + +#include "common/common.h" +#include "repack.h" +#include "video/fmt-conversion.h" +#include "video/img_format.h" +#include "video/mp_image.h" + +enum repack_step_type { + REPACK_STEP_REPACK, + REPACK_STEP_ENDIAN, +}; + +struct repack_step { + enum repack_step_type type; + // 0=input, 1=output + struct mp_image *buf[2]; + bool user_buf[2]; // user_buf[n]==true if buf[n] = user src/dst buffer + struct mp_imgfmt_desc fmt[2]; + struct mp_image *tmp; // output buffer, if needed +}; + +struct mp_repack { + bool pack; // if false, this is for unpacking + int flags; + int imgfmt_user; // original mp format (unchanged endian) + int imgfmt_a; // original mp format (possibly packed format, + // swapped endian) + int imgfmt_b; // equivalent unpacked/planar format + struct mp_imgfmt_desc fmt_a;// ==imgfmt_a + struct mp_imgfmt_desc fmt_b;// ==imgfmt_b + + void (*repack)(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w); + + bool passthrough_y; // possible luma plane optimization for e.g. nv12 + int endian_size; // endian swap; 0=none, 2/4=swap word size + + // For packed_repack. + int components[4]; // b[n] = mp_image.planes[components[n]] + // pack: a is dst, b is src + // unpack: a is src, b is dst + void (*packed_repack_scanline)(void *a, void *b[], int w); + + // Fringe RGB/YUV. + uint8_t comp_size; + uint8_t *comp_map; + uint8_t comp_shifts[3]; + uint8_t *comp_lut; + + // REPACK_STEP_REPACK: if true, need to copy this plane + bool copy_buf[4]; + + struct repack_step steps[4]; + int num_steps; + + bool configured; +}; + +// depth = number of LSB in use +static int find_gbrp_format(int depth, int num_planes) +{ + if (num_planes != 3 && num_planes != 4) + return 0; + struct mp_regular_imgfmt desc = { + .component_type = MP_COMPONENT_TYPE_UINT, + .forced_csp = MP_CSP_RGB, + .component_size = depth > 8 ? 2 : 1, + .component_pad = depth - (depth > 8 ? 16 : 8), + .num_planes = num_planes, + .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} }, + }; + return mp_find_regular_imgfmt(&desc); +} + +// depth = number of LSB in use +static int find_yuv_format(int depth, int num_planes) +{ + if (num_planes < 1 || num_planes > 4) + return 0; + struct mp_regular_imgfmt desc = { + .component_type = MP_COMPONENT_TYPE_UINT, + .component_size = depth > 8 ? 2 : 1, + .component_pad = depth - (depth > 8 ? 16 : 8), + .num_planes = num_planes, + .planes = { {1, {1}}, {1, {2}}, {1, {3}}, {1, {4}} }, + }; + if (num_planes == 2) + desc.planes[1].components[0] = 4; + return mp_find_regular_imgfmt(&desc); +} + +// Copy one line on the plane p. +static void copy_plane(struct mp_image *dst, int dst_x, int dst_y, + struct mp_image *src, int src_x, int src_y, + int w, int p) +{ + // Number of lines on this plane. + int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1; + size_t size = mp_image_plane_bytes(dst, p, dst_x, w); + + assert(dst->fmt.bpp[p] == src->fmt.bpp[p]); + + for (int y = 0; y < h; y++) { + void *pd = mp_image_pixel_ptr(dst, p, dst_x, dst_y + y); + void *ps = mp_image_pixel_ptr(src, p, src_x, src_y + y); + memcpy(pd, ps, size); + } +} + +// Swap endian for one line. +static void swap_endian(struct mp_image *dst, int dst_x, int dst_y, + struct mp_image *src, int src_x, int src_y, + int w, int endian_size) +{ + assert(src->fmt.num_planes == dst->fmt.num_planes); + + for (int p = 0; p < dst->fmt.num_planes; p++) { + int xs = dst->fmt.xs[p]; + int bpp = dst->fmt.bytes[p]; + int words_per_pixel = bpp / endian_size; + int num_words = ((w + (1 << xs) - 1) >> xs) * words_per_pixel; + // Number of lines on this plane. + int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1; + + assert(src->fmt.bytes[p] == bpp); + + for (int y = 0; y < h; y++) { + void *s = mp_image_pixel_ptr(src, p, src_x, src_y + y); + void *d = mp_image_pixel_ptr(dst, p, dst_x, dst_y + y); + switch (endian_size) { + case 2: + for (int x = 0; x < num_words; x++) + ((uint16_t *)d)[x] = av_bswap16(((uint16_t *)s)[x]); + break; + case 4: + for (int x = 0; x < num_words; x++) + ((uint32_t *)d)[x] = av_bswap32(((uint32_t *)s)[x]); + break; + default: + assert(0); + } + } + } +} + +// PA = PAck, copy planar input to single packed array +// UN = UNpack, copy packed input to planar output +// Naming convention: +// pa_/un_ prefix to identify conversion direction. +// Left (LSB, lowest byte address) -> Right (MSB, highest byte address). +// (This is unusual; MSB to LSB is more commonly used to describe formats, +// but our convention makes more sense for byte access in little endian.) +// "c" identifies a color component. +// "z" identifies known zero padding. +// "x" identifies uninitialized padding. +// A component is followed by its size in bits. +// Size can be omitted for multiple uniform components (c8c8c8 == ccc8). +// Unpackers will often use "x" for padding, because they ignore it, while +// packers will use "z" because they write zero. + +#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3) \ + static void name(void *dst, void *src[], int w) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = \ + ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ + ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \ + ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) | \ + ((packed_t)((plane_t *)src[3])[x] << (sh_c3)); \ + } \ + } + +#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\ + static void name(void *src, void *dst[], int w) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ + ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ + ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \ + ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask); \ + } \ + } + + +#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad) \ + static void name(void *dst, void *src[], int w) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = (pad) | \ + ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ + ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \ + ((packed_t)((plane_t *)src[2])[x] << (sh_c2)); \ + } \ + } + +UN_WORD_4(un_cccc8, uint32_t, uint8_t, 0, 8, 16, 24, 0xFFu) +PA_WORD_4(pa_cccc8, uint32_t, uint8_t, 0, 8, 16, 24) +// Not sure if this is a good idea; there may be no alignment guarantee. +UN_WORD_4(un_cccc16, uint64_t, uint16_t, 0, 16, 32, 48, 0xFFFFu) +PA_WORD_4(pa_cccc16, uint64_t, uint16_t, 0, 16, 32, 48) + +#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask) \ + static void name(void *src, void *dst[], int w) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ + ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ + ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \ + } \ + } + +UN_WORD_3(un_ccc8x8, uint32_t, uint8_t, 0, 8, 16, 0xFFu) +PA_WORD_3(pa_ccc8z8, uint32_t, uint8_t, 0, 8, 16, 0) +UN_WORD_3(un_x8ccc8, uint32_t, uint8_t, 8, 16, 24, 0xFFu) +PA_WORD_3(pa_z8ccc8, uint32_t, uint8_t, 8, 16, 24, 0) +UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu) +PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 20, 10, 0, 0) + +#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad) \ + static void name(void *dst, void *src[], int w) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = (pad) | \ + ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ + ((packed_t)((plane_t *)src[1])[x] << (sh_c1)); \ + } \ + } + +#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask) \ + static void name(void *src, void *dst[], int w) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ + ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ + } \ + } + +UN_WORD_2(un_cc8, uint16_t, uint8_t, 0, 8, 0xFFu) +PA_WORD_2(pa_cc8, uint16_t, uint8_t, 0, 8, 0) +UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu) +PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0) + +#define PA_SEQ_3(name, comp_t) \ + static void name(void *dst, void *src[], int w) { \ + comp_t *r = dst; \ + for (int x = 0; x < w; x++) { \ + *r++ = ((comp_t *)src[0])[x]; \ + *r++ = ((comp_t *)src[1])[x]; \ + *r++ = ((comp_t *)src[2])[x]; \ + } \ + } + +#define UN_SEQ_3(name, comp_t) \ + static void name(void *src, void *dst[], int w) { \ + comp_t *r = src; \ + for (int x = 0; x < w; x++) { \ + ((comp_t *)dst[0])[x] = *r++; \ + ((comp_t *)dst[1])[x] = *r++; \ + ((comp_t *)dst[2])[x] = *r++; \ + } \ + } + +UN_SEQ_3(un_ccc8, uint8_t) +PA_SEQ_3(pa_ccc8, uint8_t) +UN_SEQ_3(un_ccc16, uint16_t) +PA_SEQ_3(pa_ccc16, uint16_t) + +// "regular": single packed plane, all components have same width (except padding) +struct regular_repacker { + int packed_width; // number of bits of the packed pixel + int component_width; // number of bits for a single component + int prepadding; // number of bits of LSB padding + int num_components; // number of components that can be accessed + void (*pa_scanline)(void *a, void *b[], int w); + void (*un_scanline)(void *a, void *b[], int w); +}; + +static const struct regular_repacker regular_repackers[] = { + {32, 8, 0, 3, pa_ccc8z8, un_ccc8x8}, + {32, 8, 8, 3, pa_z8ccc8, un_x8ccc8}, + {32, 8, 0, 4, pa_cccc8, un_cccc8}, + {64, 16, 0, 4, pa_cccc16, un_cccc16}, + {24, 8, 0, 3, pa_ccc8, un_ccc8}, + {48, 16, 0, 3, pa_ccc16, un_ccc16}, + {16, 8, 0, 2, pa_cc8, un_cc8}, + {32, 16, 0, 2, pa_cc16, un_cc16}, + {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2}, +}; + +static void packed_repack(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + uint32_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y); + + void *pb[4] = {0}; + for (int p = 0; p < b->num_planes; p++) { + int s = rp->components[p]; + pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y); + } + + rp->packed_repack_scanline(pa, pb, w); +} + +// Tries to set a packer/unpacker for component-wise byte aligned formats. +static void setup_packed_packer(struct mp_repack *rp) +{ + struct mp_regular_imgfmt desc; + if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_a)) + return; + + if (desc.num_planes != 1 || desc.planes[0].num_components < 2) + return; + struct mp_regular_imgfmt_plane *p = &desc.planes[0]; + |