summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2019-11-02 01:10:22 +0100
committerwm4 <wm4@nowhere>2019-11-02 01:10:22 +0100
commit3b56f829656a43402e417847570e83a0e5b693be (patch)
treef40c5682a401b42dbcb0e88b4c82c7e63542117e
parentc3cee4b9ecd2a13a35651b81158d1577c899daa1 (diff)
downloadmpv-3b56f829656a43402e417847570e83a0e5b693be.tar.bz2
mpv-3b56f829656a43402e417847570e83a0e5b693be.tar.xz
zimg: add semi-planar repacker
This is for formats like nv12 (including p010, nv24, etc.). Might be important for hardware decoding. Previously, this would have forced a libswscale fallback. The genericism makes this only slightly more complicated. The main complication is due to the fact that mixing planar and packed stuff is insane (thanks, Nvidia). P010 output will actually happily set any of the 6 bit "padding" LSB, that are normally supposed to be 0 (for unpadded data there is P016). Scaling happens with 16 bit precision. Not going to bother adding an extra packer which zeros them out, or with shifting them in packing/unpacking. Lets just hope nobody notices.
-rw-r--r--video/zimg.c124
1 files changed, 123 insertions, 1 deletions
diff --git a/video/zimg.c b/video/zimg.c
index 89c99b9f40..c279b02f91 100644
--- a/video/zimg.c
+++ b/video/zimg.c
@@ -287,6 +287,29 @@ PA_WORD_3(pa_z8ccc8, uint32_t, uint8_t, 8, 16, 24, 0)
UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu)
PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 0, 10, 20, 0)
+#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad) \
+ static void name(void *dst, void *src[], int x0, int x1) { \
+ for (int x = x0; x < x1; x++) { \
+ ((packed_t *)dst)[x] = (pad) | \
+ ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \
+ ((packed_t)((plane_t *)src[1])[x] << (sh_c1)); \
+ } \
+ }
+
+#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask) \
+ static void name(void *src, void *dst[], int x0, int x1) { \
+ for (int x = x0; x < x1; x++) { \
+ packed_t c = ((packed_t *)src)[x]; \
+ ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \
+ ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \
+ } \
+ }
+
+UN_WORD_2(un_cc8, uint16_t, uint8_t, 0, 8, 0xFFu)
+PA_WORD_2(pa_cc8, uint16_t, uint8_t, 0, 8, 0)
+UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu)
+PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0)
+
#define PA_SEQ_3(name, comp_t) \
static void name(void *dst, void *src[], int x0, int x1) { \
comp_t *r = dst; \
@@ -325,9 +348,11 @@ struct regular_repacker {
static const struct regular_repacker regular_repackers[] = {
{32, 8, 0, 3, pa_ccc8z8, un_ccc8x8},
{32, 8, 8, 3, pa_z8ccc8, un_x8ccc8},
- {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2},
{24, 8, 0, 3, pa_ccc8, un_ccc8},
{48, 16, 0, 3, pa_ccc16, un_ccc16},
+ {16, 8, 0, 2, pa_cc8, un_cc8},
+ {32, 16, 0, 2, pa_cc16, un_cc16},
+ {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2},
};
static int packed_repack(void *user, unsigned i, unsigned x0, unsigned x1)
@@ -349,6 +374,44 @@ static int packed_repack(void *user, unsigned i, unsigned x0, unsigned x1)
return 0;
}
+static int repack_nv(void *user, unsigned i, unsigned x0, unsigned x1)
+{
+ struct mp_zimg_repack *r = user;
+
+ int xs = r->mpi->fmt.chroma_xs;
+ int ys = r->mpi->fmt.chroma_ys;
+
+ // Copy Y.
+ int l_h = 1 << ys;
+ for (int y = i; y < i + l_h; y++) {
+ ptrdiff_t bpp = r->mpi->fmt.bytes[0];
+ void *a = r->mpi->planes[0] +
+ r->mpi->stride[0] * (ptrdiff_t)y + bpp * x0;
+ void *b = r->tmp->planes[0] +
+ r->tmp->stride[0] * (ptrdiff_t)(y & r->zmask[0]) + bpp * x0;
+ size_t size = (x1 - x0) * bpp;
+ if (r->pack) {
+ memcpy(a, b, size);
+ } else {
+ memcpy(b, a, size);
+ }
+ }
+
+ uint32_t *p1 =
+ (void *)(r->mpi->planes[1] + r->mpi->stride[1] * (ptrdiff_t)(i >> ys));
+
+ void *p2[2];
+ for (int p = 0; p < 2; p++) {
+ int s = r->components[p];
+ p2[p] = r->tmp->planes[s] +
+ r->tmp->stride[s] * (ptrdiff_t)((i >> ys) & r->zmask[s]);
+ }
+
+ r->packed_repack_scanline(p1, p2, x0 >> xs, x1 >> xs);
+
+ return 0;
+}
+
static void wrap_buffer(struct mp_zimg_repack *r,
zimg_image_buffer *buf,
zimg_filter_graph_callback *cb,
@@ -383,6 +446,63 @@ static void wrap_buffer(struct mp_zimg_repack *r,
r->mpi = mpi;
}
+static void setup_nv_packer(struct mp_zimg_repack *r)
+{
+ struct mp_regular_imgfmt desc;
+ if (!mp_get_regular_imgfmt(&desc, r->zimgfmt))
+ return;
+
+ // Check for NV.
+ if (desc.num_planes != 2)
+ return;
+ if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1)
+ return;
+ if (desc.planes[1].num_components != 2)
+ return;
+ int cr0 = desc.planes[1].components[0];
+ int cr1 = desc.planes[1].components[1];
+ if (cr0 > cr1)
+ MPSWAP(int, cr0, cr1);
+ if (cr0 != 2 || cr1 != 3)
+ return;
+
+ // Construct equivalent planar format.
+ struct mp_regular_imgfmt desc2 = desc;
+ desc2.num_planes = 3;
+ desc2.planes[1].num_components = 1;
+ desc2.planes[1].components[0] = 2;
+ desc2.planes[2].num_components = 1;
+ desc2.planes[2].components[0] = 3;
+ // For P010. Strangely this concept exists only for the NV format.
+ if (desc2.component_pad > 0)
+ desc2.component_pad = 0;
+
+ int planar_fmt = mp_find_regular_imgfmt(&desc2);
+ if (!planar_fmt)
+ return;
+
+ for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
+ const struct regular_repacker *pa = &regular_repackers[i];
+
+ void (*repack_cb)(void *p1, void *p2[], int x0, int x1) =
+ r->pack ? pa->pa_scanline : pa->un_scanline;
+
+ if (pa->packed_width != desc.component_size * 2 * 8 ||
+ pa->component_width != desc.component_size * 8 ||
+ pa->num_components != 2 ||
+ pa->prepadding != 0 ||
+ !repack_cb)
+ continue;
+
+ r->repack = repack_nv;
+ r->packed_repack_scanline = repack_cb;
+ r->zimgfmt = planar_fmt;
+ r->components[0] = desc.planes[1].components[0] - 1;
+ r->components[1] = desc.planes[1].components[1] - 1;
+ return;
+ }
+}
+
static void setup_misc_packer(struct mp_zimg_repack *r)
{
// Although it's in regular_repackers[], the generic mpv imgfmt metadata
@@ -478,6 +598,8 @@ static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r,
r->zimgfmt = fmt.imgfmt;
if (!r->repack)
+ setup_nv_packer(r);
+ if (!r->repack)
setup_misc_packer(r);
if (!r->repack)
setup_regular_rgb_packer(r);