summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKacper Michajłow <kasper93@gmail.com>2024-03-17 13:32:26 +0100
committerKacper Michajłow <kasper93@gmail.com>2024-03-19 13:04:05 +0100
commit374470d471a49491bceb4e1241402e3e56ccb209 (patch)
tree4b5b7b80ba3acf91a99642b6b7a8b20875ea00ed
parente0b517985bb0baafa3581cb768d8a9ce4126b6a8 (diff)
downloadmpv-374470d471a49491bceb4e1241402e3e56ccb209.tar.bz2
mpv-374470d471a49491bceb4e1241402e3e56ccb209.tar.xz
repack: add restrict qualifier to src/dst pointers
Allows compiler to do its job and optimize this code. We don't really want to repack in-place.
-rw-r--r--video/repack.c72
1 files changed, 36 insertions, 36 deletions
diff --git a/video/repack.c b/video/repack.c
index 00587fbb6b..0388493b28 100644
--- a/video/repack.c
+++ b/video/repack.c
@@ -63,14 +63,14 @@ struct mp_repack {
int components[4]; // b[n] = mp_image.planes[components[n]]
// pack: a is dst, b is src
// unpack: a is src, b is dst
- void (*packed_repack_scanline)(void *a, void *b[], int w);
+ void (*packed_repack_scanline)(void *restrict a, void *restrict b[], int w);
// Fringe RGB/YUV.
uint8_t comp_size;
uint8_t comp_map[6];
uint8_t comp_shifts[3];
uint8_t *comp_lut;
- void (*repack_fringe_yuv)(void *dst, void *src[], int w, uint8_t *c);
+ void (*repack_fringe_yuv)(void *restrict dst, void *restrict src[], int w, uint8_t *restrict c);
// F32 repacking.
int f32_comp_size;
@@ -133,8 +133,8 @@ static void copy_plane(struct mp_image *dst, int dst_x, int dst_y,
assert(dst->fmt.bpp[p] == src->fmt.bpp[p]);
for (int y = 0; y < h; y++) {
- void *pd = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y);
- void *ps = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y);
+ void *restrict pd = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y);
+ void *restrict ps = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y);
memcpy(pd, ps, size);
}
}
@@ -157,8 +157,8 @@ static void swap_endian(struct mp_image *dst, int dst_x, int dst_y,
assert(src->fmt.bpp[p] == bpp * 8);
for (int y = 0; y < h; y++) {
- void *s = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y);
- void *d = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y);
+ void *restrict s = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y);
+ void *restrict d = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y);
switch (endian_size) {
case 2:
for (int x = 0; x < num_words; x++)
@@ -191,7 +191,7 @@ static void swap_endian(struct mp_image *dst, int dst_x, int dst_y,
// packers will use "z" because they write zero.
#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3) \
- static void name(void *dst, void *src[], int w) { \
+ static void name(void *restrict dst, void *restrict src[], int w) { \
for (int x = 0; x < w; x++) { \
((packed_t *)dst)[x] = \
((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \
@@ -202,7 +202,7 @@ static void swap_endian(struct mp_image *dst, int dst_x, int dst_y,
}
#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\
- static void name(void *src, void *dst[], int w) { \
+ static void name(void *restrict src, void *restrict dst[], int w) { \
for (int x = 0; x < w; x++) { \
packed_t c = ((packed_t *)src)[x]; \
((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \
@@ -214,7 +214,7 @@ static void swap_endian(struct mp_image *dst, int dst_x, int dst_y,
#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad) \
- static void name(void *dst, void *src[], int w) { \
+ static void name(void *restrict dst, void *restrict src[], int w) { \
for (int x = 0; x < w; x++) { \
((packed_t *)dst)[x] = (pad) | \
((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \
@@ -230,7 +230,7 @@ UN_WORD_4(un_cccc16, uint64_t, uint16_t, 0, 16, 32, 48, 0xFFFFu)
PA_WORD_4(pa_cccc16, uint64_t, uint16_t, 0, 16, 32, 48)
#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask) \
- static void name(void *src, void *dst[], int w) { \
+ static void name(void *restrict src, void *restrict dst[], int w) { \
for (int x = 0; x < w; x++) { \
packed_t c = ((packed_t *)src)[x]; \
((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \
@@ -249,7 +249,7 @@ UN_WORD_3(un_ccc16x16, uint64_t, uint16_t, 0, 16, 32, 0xFFFFu)
PA_WORD_3(pa_ccc16z16, uint64_t, uint16_t, 0, 16, 32, 0)
#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad) \
- static void name(void *dst, void *src[], int w) { \
+ static void name(void *restrict dst, void *restrict src[], int w) { \
for (int x = 0; x < w; x++) { \
((packed_t *)dst)[x] = (pad) | \
((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \
@@ -258,7 +258,7 @@ PA_WORD_3(pa_ccc16z16, uint64_t, uint16_t, 0, 16, 32, 0)
}
#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask) \
- static void name(void *src, void *dst[], int w) { \
+ static void name(void *restrict src, void *restrict dst[], int w) { \
for (int x = 0; x < w; x++) { \
packed_t c = ((packed_t *)src)[x]; \
((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \
@@ -272,7 +272,7 @@ UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu)
PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0)
#define PA_SEQ_3(name, comp_t) \
- static void name(void *dst, void *src[], int w) { \
+ static void name(void *restrict dst, void *restrict src[], int w) { \
comp_t *r = dst; \
for (int x = 0; x < w; x++) { \
*r++ = ((comp_t *)src[0])[x]; \
@@ -282,7 +282,7 @@ PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0)
}
#define UN_SEQ_3(name, comp_t) \
- static void name(void *src, void *dst[], int w) { \
+ static void name(void *restrict src, void *restrict dst[], int w) { \
comp_t *r = src; \
for (int x = 0; x < w; x++) { \
((comp_t *)dst[0])[x] = *r++; \
@@ -302,8 +302,8 @@ struct regular_repacker {
int component_width; // number of bits for a single component
int prepadding; // number of bits of LSB padding
int num_components; // number of components that can be accessed
- void (*pa_scanline)(void *a, void *b[], int w);
- void (*un_scanline)(void *a, void *b[], int w);
+ void (*pa_scanline)(void *restrict a, void *restrict b[], int w);
+ void (*un_scanline)(void *restrict a, void *restrict b[], int w);
};
static const struct regular_repacker regular_repackers[] = {
@@ -384,7 +384,7 @@ static void setup_packed_packer(struct mp_repack *rp)
int prepad = components[0] ? 0 : 8;
int first_comp = components[0] ? 0 : 1;
- void (*repack_cb)(void *pa, void *pb[], int w) =
+ void (*repack_cb)(void *restrict pa, void *restrict pb[], int w) =
rp->pack ? pa->pa_scanline : pa->un_scanline;
if (pa->packed_width != desc.bpp[0] ||
@@ -408,8 +408,8 @@ static void setup_packed_packer(struct mp_repack *rp)
}
#define PA_SHIFT_LUT8(name, packed_t) \
- static void name(void *dst, void *src[], int w, uint8_t *lut, \
- uint8_t s0, uint8_t s1, uint8_t s2) { \
+ static void name(void *restrict dst, void *restrict src[], int w, \
+ uint8_t *restrict lut, uint8_t s0, uint8_t s1, uint8_t s2) { \
for (int x = 0; x < w; x++) { \
((packed_t *)dst)[x] = \
(lut[((uint8_t *)src[0])[x] + 256 * 0] << s0) | \
@@ -420,8 +420,8 @@ static void setup_packed_packer(struct mp_repack *rp)
#define UN_SHIFT_LUT8(name, packed_t) \
- static void name(void *src, void *dst[], int w, uint8_t *lut, \
- uint8_t s0, uint8_t s1, uint8_t s2) { \
+ static void name(void *restrict src, void *restrict dst[], int w, \
+ uint8_t *restrict lut, uint8_t s0, uint8_t s1, uint8_t s2) { \
for (int x = 0; x < w; x++) { \
packed_t c = ((packed_t *)src)[x]; \
((uint8_t *)dst[0])[x] = lut[((c >> s0) & 0xFF) + 256 * 0]; \
@@ -449,7 +449,7 @@ static void fringe_rgb_repack(struct mp_repack *rp,
assert(rp->comp_size == 1 || rp->comp_size == 2);
- void (*repack)(void *pa, void *pb[], int w, uint8_t *lut,
+ void (*repack)(void *restrict pa, void *restrict pb[], int w, uint8_t *restrict lut,
uint8_t s0, uint8_t s1, uint8_t s2) = NULL;
if (rp->pack) {
repack = rp->comp_size == 1 ? pa_shift_lut8_8 : pa_shift_lut8_16;
@@ -525,10 +525,10 @@ static void unpack_pal(struct mp_repack *rp,
struct mp_image *a, int a_x, int a_y,
struct mp_image *b, int b_x, int b_y, int w)
{
- uint8_t *src = mp_image_pixel_ptr(a, 0, a_x, a_y);
+ uint8_t *restrict src = mp_image_pixel_ptr(a, 0, a_x, a_y);
uint32_t *pal = (void *)a->planes[1];
- uint8_t *dst[4] = {0};
+ uint8_t *restrict dst[4] = {0};
for (int p = 0; p < b->num_planes; p++)
dst[p] = mp_image_pixel_ptr(b, p, b_x, b_y);
@@ -545,8 +545,8 @@ static void bitmap_repack(struct mp_repack *rp,
struct mp_image *a, int a_x, int a_y,
struct mp_image *b, int b_x, int b_y, int w)
{
- uint8_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
- uint8_t *pb = mp_image_pixel_ptr(b, 0, b_x, b_y);
+ uint8_t *restrict pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
+ uint8_t *restrict pb = mp_image_pixel_ptr(b, 0, b_x, b_y);
if (rp->pack) {
for (unsigned x = 0; x < w; x += 8) {
@@ -596,7 +596,7 @@ static void setup_misc_packer(struct mp_repack *rp)
}
#define PA_P422(name, comp_t) \
- static void name(void *dst, void *src[], int w, uint8_t *c) { \
+ static void name(void *restrict dst, void *restrict src[], int w, uint8_t *restrict c) { \
for (int x = 0; x < w; x += 2) { \
((comp_t *)dst)[x * 2 + c[0]] = ((comp_t *)src[0])[x + 0]; \
((comp_t *)dst)[x * 2 + c[1]] = ((comp_t *)src[0])[x + 1]; \
@@ -607,7 +607,7 @@ static void setup_misc_packer(struct mp_repack *rp)
#define UN_P422(name, comp_t) \
- static void name(void *src, void *dst[], int w, uint8_t *c) { \
+ static void name(void *restrict src, void *restrict dst[], int w, uint8_t *restrict c) { \
for (int x = 0; x < w; x += 2) { \
((comp_t *)dst[0])[x + 0] = ((comp_t *)src)[x * 2 + c[0]]; \
((comp_t *)dst[0])[x + 1] = ((comp_t *)src)[x * 2 + c[1]]; \
@@ -621,7 +621,7 @@ PA_P422(pa_p422_16, uint16_t)
UN_P422(un_p422_8, uint8_t)
UN_P422(un_p422_16, uint16_t)
-static void pa_p411_8(void *dst, void *src[], int w, uint8_t *c)
+static void pa_p411_8(void *restrict dst, void *restrict src[], int w, uint8_t *restrict c)
{
for (int x = 0; x < w; x += 4) {
((uint8_t *)dst)[x / 4 * 6 + c[0]] = ((uint8_t *)src[0])[x + 0];
@@ -634,7 +634,7 @@ static void pa_p411_8(void *dst, void *src[], int w, uint8_t *c)
}
-static void un_p411_8(void *src, void *dst[], int w, uint8_t *c)
+static void un_p411_8(void *restrict src, void *restrict dst[], int w, uint8_t *restrict c)
{
for (int x = 0; x < w; x += 4) {
((uint8_t *)dst[0])[x + 0] = ((uint8_t *)src)[x / 4 * 6 + c[0]];
@@ -773,7 +773,7 @@ static void setup_nv_packer(struct mp_repack *rp)
for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
const struct regular_repacker *pa = &regular_repackers[i];
- void (*repack_cb)(void *pa, void *pb[], int w) =
+ void (*repack_cb)(void *restrict pa, void *restrict pb[], int w) =
rp->pack ? pa->pa_scanline : pa->un_scanline;
if (pa->packed_width != desc.component_size * 2 * 8 ||
@@ -794,8 +794,8 @@ static void setup_nv_packer(struct mp_repack *rp)
}
#define PA_F32(name, packed_t) \
- static void name(void *dst, float *src, int w, float m, float o, \
- uint32_t p_max) { \
+ static void name(void *restrict dst, float *restrict src, int w, float m, \
+ float o, uint32_t p_max) { \
for (int x = 0; x < w; x++) { \
((packed_t *)dst)[x] = \
MPCLAMP(lrint((src[x] + o) * m), 0, (packed_t)p_max); \
@@ -803,8 +803,8 @@ static void setup_nv_packer(struct mp_repack *rp)
}
#define UN_F32(name, packed_t) \
- static void name(void *src, float *dst, int w, float m, float o, \
- uint32_t unused) { \
+ static void name(void *restrict src, float *restrict dst, int w, float m, \
+ float o, uint32_t unused) { \
for (int x = 0; x < w; x++) \
dst[x] = ((packed_t *)src)[x] * m + o; \
}
@@ -821,7 +821,7 @@ static void repack_float(struct mp_repack *rp,
{
assert(rp->f32_comp_size == 1 || rp->f32_comp_size == 2);
- void (*packer)(void *a, float *b, int w, float fm, float fb, uint32_t max)
+ void (*packer)(void *restrict a, float *restrict b, int w, float fm, float fb, uint32_t max)
= rp->pack ? (rp->f32_comp_size == 1 ? pa_f32_8 : pa_f32_16)
: (rp->f32_comp_size == 1 ? un_f32_8 : un_f32_16);