1 files changed, 163 insertions, 64 deletions
diff --git a/video/sws_utils.c b/video/sws_utils.c
index 55faadb1ee..a07bb55424 100644
--- a/video/sws_utils.c
+++ b/video/sws_utils.c
@@ -21,6 +21,10 @@
 #include <libavcodec/avcodec.h>
 #include <libavutil/bswap.h>
 #include <libavutil/opt.h>
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
+#include <libavutil/pixdesc.h>
+#endif
+#include <libplacebo/utils/libav.h>
 
 #include "config.h"
 
@@ -49,41 +53,41 @@ struct sws_opts {
     int chr_hshift;
     float chr_sharpen;
     float lum_sharpen;
-    int fast;
-    int bitexact;
-    int zimg;
+    bool fast;
+    bool bitexact;
+    bool zimg;
 };
 
 #define OPT_BASE_STRUCT struct sws_opts
 const struct m_sub_options sws_conf = {
     .opts = (const m_option_t[]) {
-        OPT_CHOICE("scaler", scaler, 0,
-                   ({"fast-bilinear",   SWS_FAST_BILINEAR},
-                    {"bilinear",        SWS_BILINEAR},
-                    {"bicubic",         SWS_BICUBIC},
-                    {"x",               SWS_X},
-                    {"point",           SWS_POINT},
-                    {"area",            SWS_AREA},
-                    {"bicublin",        SWS_BICUBLIN},
-                    {"gauss",           SWS_GAUSS},
-                    {"sinc",            SWS_SINC},
-                    {"lanczos",         SWS_LANCZOS},
-                    {"spline",          SWS_SPLINE})),
-        OPT_FLOATRANGE("lgb", lum_gblur, 0, 0, 100.0),
-        OPT_FLOATRANGE("cgb", chr_gblur, 0, 0, 100.0),
-        OPT_INT("cvs", chr_vshift, 0),
-        OPT_INT("chs", chr_hshift, 0),
-        OPT_FLOATRANGE("ls", lum_sharpen, 0, -100.0, 100.0),
-        OPT_FLOATRANGE("cs", chr_sharpen, 0, -100.0, 100.0),
-        OPT_FLAG("fast", fast, 0),
-        OPT_FLAG("bitexact", bitexact, 0),
-        OPT_FLAG("allow-zimg", zimg, 0),
+        {"scaler", OPT_CHOICE(scaler,
+            {"fast-bilinear",   SWS_FAST_BILINEAR},
+            {"bilinear",        SWS_BILINEAR},
+            {"bicubic",         SWS_BICUBIC},
+            {"x",               SWS_X},
+            {"point",           SWS_POINT},
+            {"area",            SWS_AREA},
+            {"bicublin",        SWS_BICUBLIN},
+            {"gauss",           SWS_GAUSS},
+            {"sinc",            SWS_SINC},
+            {"lanczos",         SWS_LANCZOS},
+            {"spline",          SWS_SPLINE})},
+        {"lgb", OPT_FLOAT(lum_gblur), M_RANGE(0, 100.0)},
+        {"cgb", OPT_FLOAT(chr_gblur), M_RANGE(0, 100.0)},
+        {"cvs", OPT_INT(chr_vshift)},
+        {"chs", OPT_INT(chr_hshift)},
+        {"ls", OPT_FLOAT(lum_sharpen), M_RANGE(-100.0, 100.0)},
+        {"cs", OPT_FLOAT(chr_sharpen), M_RANGE(-100.0, 100.0)},
+        {"fast", OPT_BOOL(fast)},
+        {"bitexact", OPT_BOOL(bitexact)},
+        {"allow-zimg", OPT_BOOL(zimg)},
         {0}
     },
     .size = sizeof(struct sws_opts),
     .defaults = &(const struct sws_opts){
         .scaler = SWS_LANCZOS,
-        .zimg = 1,
+        .zimg = true,
     },
 };
 
@@ -124,26 +128,40 @@ bool mp_sws_supported_format(int imgfmt)
         && sws_isSupportedOutput(av_format);
 }
 
+#if HAVE_ZIMG
+static bool allow_zimg(struct mp_sws_context *ctx)
+{
+    return ctx->force_scaler == MP_SWS_ZIMG ||
+           (ctx->force_scaler == MP_SWS_AUTO && ctx->allow_zimg);
+}
+#endif
+
+static bool allow_sws(struct mp_sws_context *ctx)
+{
+    return ctx->force_scaler == MP_SWS_SWS || ctx->force_scaler == MP_SWS_AUTO;
+}
+
 bool mp_sws_supports_formats(struct mp_sws_context *ctx,
                              int imgfmt_out, int imgfmt_in)
 {
 #if HAVE_ZIMG
-    if (ctx->allow_zimg) {
+    if (allow_zimg(ctx)) {
         if (mp_zimg_supports_in_format(imgfmt_in) &&
             mp_zimg_supports_out_format(imgfmt_out))
             return true;
     }
 #endif
 
-    return sws_isSupportedInput(imgfmt2pixfmt(imgfmt_in)) &&
+    return allow_sws(ctx) &&
+           sws_isSupportedInput(imgfmt2pixfmt(imgfmt_in)) &&
            sws_isSupportedOutput(imgfmt2pixfmt(imgfmt_out));
 }
 
-static int mp_csp_to_sws_colorspace(enum mp_csp csp)
+static int pl_csp_to_sws_colorspace(enum pl_color_system csp)
 {
     // The SWS_CS_* macros are just convenience redefinitions of the
     // AVCOL_SPC_* macros, inside swscale.h.
-    return mp_csp_to_avcol_spc(csp);
+    return pl_system_to_av(csp);
 }
 
 static bool cache_valid(struct mp_sws_context *ctx)
@@ -154,10 +172,8 @@ static bool cache_valid(struct mp_sws_context *ctx)
     return mp_image_params_equal(&ctx->src, &old->src) &&
            mp_image_params_equal(&ctx->dst, &old->dst) &&
            ctx->flags == old->flags &&
-           ctx->brightness == old->brightness &&
-           ctx->contrast == old->contrast &&
-           ctx->saturation == old->saturation &&
            ctx->allow_zimg == old->allow_zimg &&
+           ctx->force_scaler == old->force_scaler &&
            (!ctx->opts_cache || !m_config_cache_update(ctx->opts_cache));
 }
 
@@ -167,6 +183,8 @@ static void free_mp_sws(void *p)
     sws_freeContext(ctx->sws);
     sws_freeFilter(ctx->src_filter);
     sws_freeFilter(ctx->dst_filter);
+    TA_FREEP(&ctx->aligned_src);
+    TA_FREEP(&ctx->aligned_dst);
 }
 
 // You're supposed to set your scaling parameters on the returned context.
@@ -177,8 +195,6 @@ struct mp_sws_context *mp_sws_alloc(void *talloc_ctx)
     *ctx = (struct mp_sws_context) {
         .log = mp_null_log,
         .flags = SWS_BILINEAR,
-        .contrast = 1 << 16,    // 1.0 in 16.16 fixed point
-        .saturation = 1 << 16,
         .force_reload = true,
         .params = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT},
         .cached = talloc_zero(ctx, struct mp_sws_context),
@@ -198,6 +214,9 @@ struct mp_sws_context *mp_sws_alloc(void *talloc_ctx)
 // if the user changes any options.
 void mp_sws_enable_cmdline_opts(struct mp_sws_context *ctx, struct mpv_global *g)
 {
+    // Should only ever be NULL for tests.
+    if (!g)
+        return;
     if (ctx->opts_cache)
         return;
 
@@ -214,12 +233,8 @@ void mp_sws_enable_cmdline_opts(struct mp_sws_context *ctx, struct mpv_global *g
 // Optional, but possibly useful to avoid having to handle mp_sws_scale errors.
 int mp_sws_reinit(struct mp_sws_context *ctx)
 {
-    struct mp_image_params *src = &ctx->src;
-    struct mp_image_params *dst = &ctx->dst;
-
-    // Neutralize unsupported or ignored parameters.
-    src->p_w = dst->p_w = 0;
-    src->p_h = dst->p_h = 0;
+    struct mp_image_params src = ctx->src;
+    struct mp_image_params dst = ctx->dst;
 
     if (cache_valid(ctx))
         return 0;
@@ -230,12 +245,16 @@ int mp_sws_reinit(struct mp_sws_context *ctx)
     sws_freeContext(ctx->sws);
     ctx->sws = NULL;
     ctx->zimg_ok = false;
+    TA_FREEP(&ctx->aligned_src);
+    TA_FREEP(&ctx->aligned_dst);
 
 #if HAVE_ZIMG
-    if (ctx->allow_zimg) {
+    if (allow_zimg(ctx)) {
         ctx->zimg->log = ctx->log;
-        ctx->zimg->src = *src;
-        ctx->zimg->dst = *dst;
+        ctx->zimg->src = src;
+        ctx->zimg->dst = dst;
+        if (ctx->zimg_opts)
+            ctx->zimg->opts = *ctx->zimg_opts;
         if (mp_zimg_config(ctx->zimg)) {
             ctx->zimg_ok = true;
             MP_VERBOSE(ctx, "Using zimg.\n");
@@ -245,49 +264,64 @@ int mp_sws_reinit(struct mp_sws_context *ctx)
     }
 #endif
 
+    if (!allow_sws(ctx)) {
+        MP_ERR(ctx, "No scaler.\n");
+        return -1;
+    }
+
     ctx->sws = sws_alloc_context();
     if (!ctx->sws)
         return -1;
 
-    mp_image_params_guess_csp(src); // sanitize colorspace/colorlevels
-    mp_image_params_guess_csp(dst);
+    mp_image_params_guess_csp(&src); // sanitize colorspace/colorlevels
+    mp_image_params_guess_csp(&dst);
 
-    enum AVPixelFormat s_fmt = imgfmt2pixfmt(src->imgfmt);
+    enum AVPixelFormat s_fmt = imgfmt2pixfmt(src.imgfmt);
     if (s_fmt == AV_PIX_FMT_NONE || sws_isSupportedInput(s_fmt) < 1) {
         MP_ERR(ctx, "Input image format %s not supported by libswscale.\n",
-               mp_imgfmt_to_name(src->imgfmt));
+               mp_imgfmt_to_name(src.imgfmt));
         return -1;
     }
 
-    enum AVPixelFormat d_fmt = imgfmt2pixfmt(dst->imgfmt);
+    enum AVPixelFormat d_fmt = imgfmt2pixfmt(dst.imgfmt);
     if (d_fmt == AV_PIX_FMT_NONE || sws_isSupportedOutput(d_fmt) < 1) {
         MP_ERR(ctx, "Output image format %s not supported by libswscale.\n",
-               mp_imgfmt_to_name(dst->imgfmt));
+               mp_imgfmt_to_name(dst.imgfmt));
         return -1;
     }
 
-    int s_csp = mp_csp_to_sws_colorspace(src->color.space);
-    int s_range = src->color.levels == MP_CSP_LEVELS_PC;
+    int s_csp = pl_csp_to_sws_colorspace(src.repr.sys);
+    int s_range = src.repr.levels == PL_COLOR_LEVELS_FULL;
 
-    int d_csp = mp_csp_to_sws_colorspace(dst->color.space);
-    int d_range = dst->color.levels == MP_CSP_LEVELS_PC;
+    int d_csp = pl_csp_to_sws_colorspace(src.repr.sys);
+    int d_range = dst.repr.levels == PL_COLOR_LEVELS_FULL;
 
     av_opt_set_int(ctx->sws, "sws_flags", ctx->flags, 0);
 
-    av_opt_set_int(ctx->sws, "srcw", src->w, 0);
-    av_opt_set_int(ctx->sws, "srch", src->h, 0);
+    av_opt_set_int(ctx->sws, "srcw", src.w, 0);
+    av_opt_set_int(ctx->sws, "srch", src.h, 0);
     av_opt_set_int(ctx->sws, "src_format", s_fmt, 0);
 
-    av_opt_set_int(ctx->sws, "dstw", dst->w, 0);
-    av_opt_set_int(ctx->sws, "dsth", dst->h, 0);
+    av_opt_set_int(ctx->sws, "dstw", dst.w, 0);
+    av_opt_set_int(ctx->sws, "dsth", dst.h, 0);
     av_opt_set_int(ctx->sws, "dst_format", d_fmt, 0);
 
     av_opt_set_double(ctx->sws, "param0", ctx->params[0], 0);
     av_opt_set_double(ctx->sws, "param1", ctx->params[1], 0);
 
-    int cr_src = mp_chroma_location_to_av(src->chroma_location);
-    int cr_dst = mp_chroma_location_to_av(dst->chroma_location);
+    int cr_src = pl_chroma_to_av(src.chroma_location);
+    int cr_dst = pl_chroma_to_av(dst.chroma_location);
     int cr_xpos, cr_ypos;
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
+    if (av_chroma_location_enum_to_pos(&cr_xpos, &cr_ypos, cr_src) >= 0) {
+        av_opt_set_int(ctx->sws, "src_h_chr_pos", cr_xpos, 0);
+        av_opt_set_int(ctx->sws, "src_v_chr_pos", cr_ypos, 0);
+    }
+    if (av_chroma_location_enum_to_pos(&cr_xpos, &cr_ypos, cr_dst) >= 0) {
+        av_opt_set_int(ctx->sws, "dst_h_chr_pos", cr_xpos, 0);
+        av_opt_set_int(ctx->sws, "dst_v_chr_pos", cr_ypos, 0);
+    }
+#else
     if (avcodec_enum_to_chroma_pos(&cr_xpos, &cr_ypos, cr_src) >= 0) {
         av_opt_set_int(ctx->sws, "src_h_chr_pos", cr_xpos, 0);
         av_opt_set_int(ctx->sws, "src_v_chr_pos", cr_ypos, 0);
@@ -296,24 +330,64 @@ int mp_sws_reinit(struct mp_sws_context *ctx)
         av_opt_set_int(ctx->sws, "dst_h_chr_pos", cr_xpos, 0);
         av_opt_set_int(ctx->sws, "dst_v_chr_pos", cr_ypos, 0);
     }
+#endif
 
     // This can fail even with normal operation, e.g. if a conversion path
     // simply does not support these settings.
     int r =
         sws_setColorspaceDetails(ctx->sws, sws_getCoefficients(s_csp), s_range,
                                  sws_getCoefficients(d_csp), d_range,
-                                 ctx->brightness, ctx->contrast, ctx->saturation);
+                                 0, 1 << 16, 1 << 16);
     ctx->supports_csp = r >= 0;
 
     if (sws_init_context(ctx->sws, ctx->src_filter, ctx->dst_filter) < 0)
         return -1;
 
+#if HAVE_ZIMG
 success:
+#endif
+
     ctx->force_reload = false;
     *ctx->cached = *ctx;
     return 1;
 }
 
+static struct mp_image *check_alignment(struct mp_log *log,
+                                        struct mp_image **alloc,
+                                        struct mp_image *img)
+{
+    // It's completely unclear which alignment libswscale wants (for performance)
+    // or requires (for avoiding crashes and memory corruption).
+    // Is it av_cpu_max_align()? Is it the hardcoded AVFrame "default" of 32
+    // in get_video_buffer()? Is it whatever avcodec_align_dimensions2()
+    // determines? It's like you can't win if you try to prevent libswscale from
+    // corrupting memory...
+    // So use 32, a value that has been experimentally determined to be safe,
+    // and which in most cases is not larger than decoder output. It is smaller
+    // or equal to what most image allocators in mpv/ffmpeg use.
+    size_t align = 32;
+    assert(align <= MP_IMAGE_BYTE_ALIGN); // or mp_image_alloc will not cut it
+
+    bool is_aligned = true;
+    for (int p = 0; p < img->num_planes; p++) {
+        is_aligned &= MP_IS_ALIGNED((uintptr_t)img->planes[p], align);
+        is_aligned &= MP_IS_ALIGNED(labs(img->stride[p]), align);
+    }
+
+    if (is_aligned)
+        return img;
+
+    if (!*alloc) {
+        mp_verbose(log, "unaligned libswscale parameter; using slow copy.\n");
+        *alloc = mp_image_alloc(img->imgfmt, img->w, img->h);
+        if (!*alloc)
+            return NULL;
+    }
+
+    mp_image_copy_attributes(*alloc, img);
+    return *alloc;
+}
+
 // Scale from src to dst - if src/dst have different parameters from previous
 // calls, the context is reinitialized. Return error code. (It can fail if
 // reinitialization was necessary, and swscale returned an error.)
@@ -334,8 +408,32 @@ int mp_sws_scale(struct mp_sws_context *ctx, struct mp_image *dst,
         return mp_zimg_convert(ctx->zimg, dst, src) ? 0 : -1;
 #endif
 
-    sws_scale(ctx->sws, (const uint8_t *const *) src->planes, src->stride,
-              0, src->h, dst->planes, dst->stride);
+    if (src->params.repr.sys == PL_COLOR_SYSTEM_XYZ && dst->params.repr.sys != PL_COLOR_SYSTEM_XYZ) {
+        // swsscale has hardcoded gamma 2.2 internally and 2.6 for XYZ
+        dst->params.color.transfer = PL_COLOR_TRC_GAMMA22;
+        // and sRGB primaries...
+        dst->params.color.primaries = PL_COLOR_PRIM_BT_709;
+        // it doesn't adjust white point though, but it is not worth to support
+        // this case. It would require custom prim with equal energy white point
+        // and sRGB primaries.
+    }
+
+    struct mp_image *a_src = check_alignment(ctx->log, &ctx->aligned_src, src);
+    struct mp_image *a_dst = check_alignment(ctx->log, &ctx->aligned_dst, dst);
+    if (!a_src || !a_dst) {
+        MP_ERR(ctx, "image allocation failed.\n");
+        return -1;
+    }
+
+    if (a_src != src)
+        mp_image_copy(a_src, src);
+
+    sws_scale(ctx->sws, (const uint8_t *const *) a_src->planes, a_src->stride,
+              0, a_src->h, a_dst->planes, a_dst->stride);
+
+    if (a_dst != dst)
+        mp_image_copy(dst, a_dst);
+
     return 0;
 }
 
@@ -378,14 +476,15 @@ static const int endian_swaps[][2] = {
 // might reduce the effective bit depth in some cases.
 struct mp_image *mp_img_swap_to_native(struct mp_image *img)
 {
+    int avfmt = imgfmt2pixfmt(img->imgfmt);
     int to = AV_PIX_FMT_NONE;
     for (int n = 0; endian_swaps[n][0] != AV_PIX_FMT_NONE; n++) {
-        if (endian_swaps[n][0] == img->fmt.avformat)
+        if (endian_swaps[n][0] == avfmt)
             to = endian_swaps[n][1];
     }
     if (to == AV_PIX_FMT_NONE || !mp_image_make_writeable(img))
         return img;
-    int elems = img->fmt.bytes[0] / 2 * img->w;
+    int elems = img->fmt.bpp[0] / 8 / 2 * img->w;
     for (int y = 0; y < img->h; y++) {
         uint16_t *p = (uint16_t *)(img->planes[0] + y * img->stride[0]);
         for (int i = 0; i < elems; i++)