summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--DOCS/man/vo.rst68
-rw-r--r--video/csputils.c3
-rw-r--r--video/csputils.h1
-rw-r--r--video/out/gl_osd.c8
-rw-r--r--video/out/gl_utils.c6
-rw-r--r--video/out/gl_utils.h16
-rw-r--r--video/out/gl_video.c864
-rw-r--r--video/out/gl_video.h7
8 files changed, 750 insertions, 223 deletions
diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst
index e73f1d578a..82611e5a19 100644
--- a/DOCS/man/vo.rst
+++ b/DOCS/man/vo.rst
@@ -352,6 +352,10 @@ Available video output drivers are:
blurrier. Defaults to 1. Note that setting this too low (eg. 0.5)
leads to bad results. It's recommended to stay between 0.9 and 1.1.
+ ``sharpen3``, ``sharpen5``
+ Sharpening strength. Increasing this makes the image sharper but
+ adds more ringing and aliasing. Defaults to 0.5.
+
``scale-radius=<r>``
Set radius for filters listed below, must be a float number between 1.0
and 16.0. Defaults to be 3.0 if not specified.
@@ -377,21 +381,6 @@ Available video output drivers are:
will reproduce the source image perfectly if no scaling is performed.
Note that this option never affects ``cscale``.
- ``srgb``
- Convert and color correct the output to sRGB before displaying it on
- the screen. This option enables ``linear-scaling``.
-
- This option is equivalent to using ``icc-profile`` with an sRGB ICC
- profile, but it is implemented without a 3DLUT and does not require
- LittleCMS 2. If both ``srgb`` and ``icc-profile`` are present, the
- latter takes precedence, as they are somewhat redundant.
-
- Note: When playing back BT.2020 content with this option enabled, out
- of gamut colors will be numerically clipped, which can potentially
- change the hue and/or luminance. If this is not desired, it is
- recommended to use ``icc-profile`` with an sRGB ICC profile instead,
- when playing back wide-gamut BT.2020 content.
-
``pbo``
Enable use of PBOs. This is slightly faster, but can sometimes lead to
sporadic and temporary image corruption (in theory, because reupload
@@ -460,9 +449,10 @@ Available video output drivers are:
``scale-antiring``.
``linear-scaling``
- Scale in linear light. This is automatically enabled if ``srgb``,
- ``icc-profile`` or ``sigmoid-upscaling`` is set. It should only
- be used with a ``fbo-format`` that has at least 16 bit precision.
+ Scale in linear light. This is automatically enabled if
+ ``target-prim``, ``target-trc``, ``icc-profile`` or
+ ``sigmoid-upscaling`` is set. It should only be used with a
+ ``fbo-format`` that has at least 16 bit precision.
``fancy-downscaling``
When using convolution based filters, extend the filter size
@@ -553,13 +543,44 @@ Available video output drivers are:
NOTE: Only implemented on OS X.
+ ``target-prim=<value>``
+ Specifies the primaries of the display. Video colors will be adapted
+ to this colorspace if necessary. Valid values are:
+
+ auto
+ Disable any adaptation (default)
+ bt470m
+ ITU-R BT.470 M
+ bt601-525
+ ITU-R BT.601 (525-line SD systems, eg. NTSC), SMPTE 170M/240M
+ bt601-625
+ ITU-R BT.601 (625-line SD systems, eg. PAL/SECAM), ITU-R BT.470 B/G
+ bt709
+ ITU-R BT.709 (HD), IEC 61966-2-4 (sRGB), SMPTE RP177 Annex B
+ bt2020
+ ITU-R BT.2020 (UHD)
+
+ ``target-trc=<value>``
+ Specifies the transfer characteristics (gamma) of the display. Video
+ colors will be adjusted to this curve. Valid values are:
+
+ auto
+ Disable any adaptation (default)
+ bt1886
+ ITU-R BT.1886 curve, without the brightness drop (approx. 1.961)
+ srgb
+ IEC 61966-2-4 (sRGB)
+ linear
+ Linear light output
+ gamma22
+ Pure power curve (gamma 2.2)
+
``icc-profile=<file>``
Load an ICC profile and use it to transform linear RGB to screen output.
- Needs LittleCMS 2 support compiled in. This option overrides the ``srgb``
- property, as using both is somewhat redundant. It also enables
+ Needs LittleCMS 2 support compiled in. This option overrides the
+ ``target-prim`` and ``target-trc`` options. It also enables
``linear-scaling``.
-
``icc-profile-auto``
Automatically select the ICC display profile currently specified by
the display settings of the operating system.
@@ -573,9 +594,8 @@ Available video output drivers are:
Its size depends on the ``3dlut-size``, and can be very big.
``icc-intent=<value>``
- Specifies the ICC Intent used for transformations between color spaces.
- This affects the rendering when using ``icc-profile`` or ``srgb`` and
- also affects the way DCP XYZ content gets converted to RGB.
+ Specifies the ICC intent used for the color transformation (when using
+ ``icc-profile``).
0
perceptual
diff --git a/video/csputils.c b/video/csputils.c
index cee33dbba9..06de4bb9e8 100644
--- a/video/csputils.c
+++ b/video/csputils.c
@@ -70,6 +70,7 @@ const char *const mp_csp_trc_names[MP_CSP_TRC_COUNT] = {
"BT.1886 (SD, HD, UHD)",
"sRGB (IEC 61966-2-1)",
"Linear light",
+ "Pure power (gamma 2.2)",
};
const char *const mp_csp_equalizer_names[MP_CSP_EQ_COUNT] = {
@@ -156,6 +157,7 @@ enum mp_csp_trc avcol_trc_to_mp_csp_trc(int avtrc)
case AVCOL_TRC_BT2020_12: return MP_CSP_TRC_BT_1886;
case AVCOL_TRC_IEC61966_2_1: return MP_CSP_TRC_SRGB;
case AVCOL_TRC_LINEAR: return MP_CSP_TRC_LINEAR;
+ case AVCOL_TRC_GAMMA22: return MP_CSP_TRC_GAMMA22;
default: return MP_CSP_TRC_AUTO;
}
}
@@ -202,6 +204,7 @@ int mp_csp_trc_to_avcol_trc(enum mp_csp_trc trc)
case MP_CSP_TRC_BT_1886: return AVCOL_TRC_BT709;
case MP_CSP_TRC_SRGB: return AVCOL_TRC_IEC61966_2_1;
case MP_CSP_TRC_LINEAR: return AVCOL_TRC_LINEAR;
+ case MP_CSP_TRC_GAMMA22: return AVCOL_TRC_GAMMA22;
default: return AVCOL_TRC_UNSPECIFIED;
}
}
diff --git a/video/csputils.h b/video/csputils.h
index a082682e43..a68c106549 100644
--- a/video/csputils.h
+++ b/video/csputils.h
@@ -76,6 +76,7 @@ enum mp_csp_trc {
MP_CSP_TRC_BT_1886,
MP_CSP_TRC_SRGB,
MP_CSP_TRC_LINEAR,
+ MP_CSP_TRC_GAMMA22,
MP_CSP_TRC_COUNT
};
diff --git a/video/out/gl_osd.c b/video/out/gl_osd.c
index 0ab85f59c4..7a9532d416 100644
--- a/video/out/gl_osd.c
+++ b/video/out/gl_osd.c
@@ -294,7 +294,7 @@ static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs)
osd->num_subparts * sizeof(osd->subparts[0]));
}
-static void write_quad(struct vertex *va, float matrix[3][3],
+static void write_quad(struct vertex *va, float matrix[3][2],
float x0, float y0, float x1, float y1,
float tx0, float ty0, float tx1, float ty1,
float tex_w, float tex_h, const uint8_t color[4])
@@ -312,7 +312,7 @@ static void write_quad(struct vertex *va, float matrix[3][3],
#undef COLOR_INIT
}
-static int generate_verts(struct mpgl_osd_part *part, float matrix[3][3])
+static int generate_verts(struct mpgl_osd_part *part, float matrix[3][2])
{
int num_vertices = part->num_subparts * 6;
MP_TARRAY_GROW(part, part->vertices, num_vertices);
@@ -337,7 +337,7 @@ static int generate_verts(struct mpgl_osd_part *part, float matrix[3][3])
return num_vertices;
}
-static void draw_part(struct mpgl_osd *ctx, int index, float matrix[3][3])
+static void draw_part(struct mpgl_osd *ctx, int index, float matrix[3][2])
{
GL *gl = ctx->gl;
struct mpgl_osd_part *part = ctx->parts[index];
@@ -377,7 +377,7 @@ void mpgl_osd_draw_part(struct mpgl_osd *ctx, int vp_w, int vp_h, int index)
for (int x = 0; x < div[0]; x++) {
for (int y = 0; y < div[1]; y++) {
- float matrix[3][3];
+ float matrix[3][2];
gl_matrix_ortho2d(matrix, 0, vp_w, 0, vp_h);
diff --git a/video/out/gl_utils.c b/video/out/gl_utils.c
index ca2fef10bf..7881a6cf1f 100644
--- a/video/out/gl_utils.c
+++ b/video/out/gl_utils.c
@@ -418,7 +418,7 @@ void fbotex_uninit(struct fbotex *fbo)
// Standard parallel 2D projection, except y1 < y0 means that the coordinate
// system is flipped, not the projection.
-void gl_matrix_ortho2d(float m[3][3], float x0, float x1, float y0, float y1)
+void gl_matrix_ortho2d(float m[3][2], float x0, float x1, float y0, float y1)
{
if (y1 < y0) {
float t = y0;
@@ -426,12 +426,12 @@ void gl_matrix_ortho2d(float m[3][3], float x0, float x1, float y0, float y1)
y1 = t;
}
- memset(m, 0, 9 * sizeof(float));
m[0][0] = 2.0f / (x1 - x0);
+ m[0][1] = 0.0f;
+ m[1][0] = 0.0f;
m[1][1] = 2.0f / (y1 - y0);
m[2][0] = -(x1 + x0) / (x1 - x0);
m[2][1] = -(y1 + y0) / (y1 - y0);
- m[2][2] = 1.0f;
}
static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id,
diff --git a/video/out/gl_utils.h b/video/out/gl_utils.h
index a1bb2ecafb..b4f5650ea6 100644
--- a/video/out/gl_utils.h
+++ b/video/out/gl_utils.h
@@ -86,15 +86,27 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
#define FBOTEX_FUZZY_H 2
void fbotex_set_filter(struct fbotex *fbo, GLenum gl_filter);
-void gl_matrix_ortho2d(float m[3][3], float x0, float x1, float y0, float y1);
+void gl_matrix_ortho2d(float m[3][2], float x0, float x1, float y0, float y1);
-static inline void gl_matrix_mul_vec(float m[3][3], float *x, float *y)
+// This treats m as an affine transformation, in other words m[2][n] gets
+// added to the output.
+static inline void gl_matrix_mul_vec(float m[3][2], float *x, float *y)
{
float vx = *x, vy = *y;
*x = vx * m[0][0] + vy * m[1][0] + m[2][0];
*y = vx * m[0][1] + vy * m[1][1] + m[2][1];
}
+struct mp_rect_f {
+ float x0, y0, x1, y1;
+};
+
+static inline void gl_matrix_mul_rect(float m[3][2], struct mp_rect_f *r)
+{
+ gl_matrix_mul_vec(m, &r->x0, &r->y0);
+ gl_matrix_mul_vec(m, &r->x1, &r->y1);
+}
+
void gl_set_debug_logger(GL *gl, struct mp_log *log);
struct gl_shader_cache;
diff --git a/video/out/gl_video.c b/video/out/gl_video.c
index a52bd82020..5f64dcb1d6 100644
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@@ -44,7 +44,7 @@
// Pixel width of 1D lookup textures.
#define LOOKUP_TEXTURE_SIZE 256
-// Texture units 0-3 are used by the video, with unit 0 for free use.
+// Texture units 0-3 are used by the video, and for free use by the passes
// Units 4-5 are used for scaler LUTs.
#define TEXUNIT_SCALERS 4
#define TEXUNIT_3DLUT 6
@@ -123,16 +123,15 @@ struct scaler {
struct fbosurface {
struct fbotex fbotex;
int64_t pts;
- bool valid;
};
-#define FBOSURFACES_MAX 2
+#define FBOSURFACES_MAX 4
struct src_tex {
GLuint gl_tex;
GLenum gl_target;
int tex_w, tex_h;
- struct mp_rect src;
+ struct mp_rect_f src;
};
struct gl_video {
@@ -171,10 +170,7 @@ struct gl_video {
bool has_alpha;
char color_swizzle[5];
- float input_gamma, conv_gamma;
- float user_gamma;
- bool user_gamma_enabled; // shader handles user_gamma
- bool sigmoid_enabled;
+ bool user_gamma_enabled;
struct video_image image;
@@ -183,20 +179,14 @@ struct gl_video {
struct fbosurface surfaces[FBOSURFACES_MAX];
size_t surface_idx;
+ size_t surface_now;
+ bool is_interpolated;
// state for luma (0) and chroma (1) scalers
struct scaler scalers[2];
- // true if scaler is currently upscaling
- bool upscaling;
-
- bool is_interpolated;
-
struct mp_csp_equalizer video_eq;
- // Source and destination color spaces for the CMS matrix
- struct mp_csp_primaries csp_src, csp_dest;
-
struct mp_rect src_rect; // displayed part of the source video
struct mp_rect dst_rect; // video rectangle on output window
struct mp_osd_res osd_rect; // OSD size/margins
@@ -366,7 +356,19 @@ const struct m_sub_options gl_video_conf = {
.opts = (const m_option_t[]) {
OPT_FLOATRANGE("gamma", gamma, 0, 0.1, 2.0),
OPT_FLAG("gamma-auto", gamma_auto, 0),
- OPT_FLAG("srgb", srgb, 0),
+ OPT_CHOICE("target-prim", target_prim, 0,
+ ({"auto", MP_CSP_PRIM_AUTO},
+ {"bt601-525", MP_CSP_PRIM_BT_601_525},
+ {"bt601-625", MP_CSP_PRIM_BT_601_625},
+ {"bt709", MP_CSP_PRIM_BT_709},
+ {"bt2020", MP_CSP_PRIM_BT_2020},
+ {"bt470m", MP_CSP_PRIM_BT_470M})),
+ OPT_CHOICE("target-trc", target_trc, 0,
+ ({"auto", MP_CSP_TRC_AUTO},
+ {"bt1886", MP_CSP_TRC_BT_1886},
+ {"srgb", MP_CSP_TRC_SRGB},
+ {"linear", MP_CSP_TRC_LINEAR},
+ {"gamma22", MP_CSP_TRC_GAMMA22})),
OPT_FLAG("npot", npot, 0),
OPT_FLAG("pbo", pbo, 0),
OPT_STRING_VALIDATE("scale", scalers[0], 0, validate_scaler_opt),
@@ -433,6 +435,7 @@ const struct m_sub_options gl_video_conf = {
OPT_REPLACED("cparam2", "cscale-param2"),
OPT_REPLACED("cradius", "cscale-radius"),
OPT_REPLACED("cantiring", "cscale-antiring"),
+ OPT_REPLACED("srgb", "target-prim=srgb:target-trc=srgb"),
{0}
},
@@ -479,6 +482,19 @@ void gl_video_set_debug(struct gl_video *p, bool enable)
gl_set_debug_logger(gl, enable ? p->log : NULL);
}
+static void gl_video_reset_surfaces(struct gl_video *p)
+{
+ for (int i = 0; i < FBOSURFACES_MAX; i++)
+ p->surfaces[i].pts = 0;
+ p->surface_idx = 0;
+ p->surface_now = 0;
+}
+
+static size_t fbosurface_next(size_t id)
+{
+ return (id+1) % FBOSURFACES_MAX;
+}
+
static void recreate_osd(struct gl_video *p)
{
if (p->osd)
@@ -507,6 +523,8 @@ static void uninit_rendering(struct gl_video *p)
gl->DeleteTextures(1, &p->dither_texture);
p->dither_texture = 0;
+
+ gl_video_reset_surfaces(p);
}
void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d)
@@ -546,13 +564,28 @@ void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d)
reinit_rendering(p);
}
-static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg)
+static void pass_load_fbotex(struct gl_video *p, struct fbotex *src_fbo, int id,
+ int w, int h)
+{
+ p->pass_tex[id] = (struct src_tex){
+ .gl_tex = src_fbo->texture,
+ .gl_target = GL_TEXTURE_2D,
+ .tex_w = src_fbo->tex_w,
+ .tex_h = src_fbo->tex_h,
+ .src = {0, 0, w, h},
+ };
+}
+
+static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg,
+ float chroma[3][2])
{
GLuint imgtex[4] = {0};
assert(vimg->mpi);
- float offset[2] = {0};
+ float ls_w = 1.0 / (1 << p->image_desc.chroma_xs);
+ float ls_h = 1.0 / (1 << p->image_desc.chroma_ys);
+
int chroma_loc = p->opts.chroma_location;
if (!chroma_loc)
chroma_loc = p->image_params.chroma_location;
@@ -564,13 +597,21 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg
// so that the luma and chroma sample line up exactly.
// For 4:4:4, setting chroma location should have no effect at all.
// luma sample size (in chroma coord. space)
- float ls_w = 1.0 / (1 << p->image_desc.chroma_xs);
- float ls_h = 1.0 / (1 << p->image_desc.chroma_ys);
- // move chroma center to luma center (in chroma coord. space)
- offset[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
- offset[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
+ chroma[2][0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
+ chroma[2][1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
+ } else {
+ chroma[2][0] = chroma[2][1] = 0.0;
}
+ // Make sure luma/chroma sizes are aligned.
+ // Example: For 4:2:0 with size 3x3, the subsampled chroma plane is 2x2
+ // so luma (3,3) has to align with chroma (2,2).
+ chroma[0][0] = ls_w * (float)vimg->planes[0].tex_w
+ / vimg->planes[1].tex_w;
+ chroma[1][1] = ls_h * (float)vimg->planes[0].tex_h
+ / vimg->planes[1].tex_h;
+ chroma[0][1] = chroma[1][0] = 0.0; // No rotation etc.
+
if (p->hwdec_active) {
p->hwdec->driver->map_image(p->hwdec, vimg->mpi, imgtex);
} else {
@@ -585,17 +626,7 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg
.gl_target = t->gl_target,
.tex_w = t->tex_w,
.tex_h = t->tex_h,
- //.src = {0, 0, t->w, t->h},
- .src = {
- // xxx this is wrong; we want to crop the source when sampling
- // from indirect_fbo, but not when rendering to indirect_fbo
- // also, this should apply offset, and take care of odd video
- // dimensions properly; and it should use floats instead
- .x0 = p->src_rect.x0 >> p->image_desc.xs[n],
- .y0 = p->src_rect.y0 >> p->image_desc.ys[n],
- .x1 = p->src_rect.x1 >> p->image_desc.xs[n],
- .y1 = p->src_rect.y1 >> p->image_desc.ys[n],
- },
+ .src = {0, 0, t->w, t->h},
};
}
}
@@ -712,7 +743,7 @@ static void pass_prepare_src_tex(struct gl_video *p)
GL *gl = p->gl;
struct gl_shader_cache *sc = p->sc;
- for (int n = 0; n < p->plane_count; n++) {
+ for (int n = 0; n < 4; n++) {
struct src_tex *s = &p->pass_tex[n];
if (!s->gl_tex)
continue;
@@ -722,9 +753,9 @@ static void pass_prepare_src_tex(struct gl_video *p)
snprintf(texture_name, sizeof(texture_name), "texture%d", n);
snprintf(texture_size, sizeof(texture_size), "texture_size%d", n);
- gl_sc_uniform_sampler(sc, texture_name, p->gl_target, n);
+ gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n);
float f[2] = {1, 1};
- if (p->gl_target != GL_TEXTURE_RECTANGLE) {
+ if (s->gl_target != GL_TEXTURE_RECTANGLE) {
f[0] = s->tex_w;
f[1] = s->tex_h;
}
@@ -736,12 +767,13 @@ static void pass_prepare_src_tex(struct gl_video *p)
gl->ActiveTexture(GL_TEXTURE0);
}
+// flags = bits 0-1: rotate, bit 2: flip vertically
static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
- const struct mp_rect *dst)
+ const struct mp_rect *dst, int flags)
{
struct vertex va[4];
- float matrix[3][3];
+ float matrix[3][2];
gl_matrix_ortho2d(matrix, 0, vp_w, 0, vp_h);
float x[2] = {dst->x0, dst->x1};
@@ -758,6 +790,8 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
if (s->gl_tex) {
float tx[2] = {s->src.x0, s->src.x1};
float ty[2] = {s->src.y0, s->src.y1};
+ if (flags & 4)
+ MPSWAP(float, ty[0], ty[1]);
bool rect = s->gl_target == GL_TEXTURE_RECTANGLE;
v->texcoord[i].x = tx[n / 2] / (rect ? 1 : s->tex_w);
v->texcoord[i].y = ty[n % 2] / (rect ? 1 : s->tex_h);
@@ -765,20 +799,31 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
}
}
+ int rot = flags & 3;
+ while (rot--) {
+ static const int perm[4] = {1, 3, 0, 2};
+ struct vertex vb[4];
+ memcpy(vb, va, sizeof(vb));
+ for (int n = 0; n < 4; n++)
+ memcpy(va[n].texcoord, vb[perm[n]].texcoord,
+ sizeof(struct vertex_pt[4]));
+ }
+
gl_vao_draw_data(&p->vao, GL_TRIANGLE_STRIP, va, 4);
debug_check_gl(p, "after rendering");
}
+// flags: see render_pass_quad
static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h,
- const struct mp_rect *dst)
+ const struct mp_rect *dst, int flags)
{
GL *gl = p->gl;
pass_prepare_src_tex(p);
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
gl->Viewport(0, 0, vp_w, vp_h < 0 ? -vp_h : vp_h);
gl_sc_gen_shader_and_reset(p->sc);
- render_pass_quad(p, vp_w, vp_h, dst);
+ render_pass_quad(p, vp_w, vp_h, dst, flags);
gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
memset(&p->pass_tex, 0, sizeof(p->pass_tex));
}
@@ -787,22 +832,17 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
// FBO, if the required parameters have changed
// w, h: required FBO target dimension, and also defines the target rectangle
// used for rasterization
+// tex: the texture ID to load the result back into
// flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy
// flags allows the FBO to be larger than the target)
static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
- int w, int h, int flags)
+ int w, int h, int tex, int flags)
{
fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags);
finish_pass_direct(p, dst_fbo->fbo, dst_fbo->tex_w, dst_fbo->tex_h,
- &(struct mp_rect){0, 0, w, h});
- p->pass_tex[0] = (struct src_tex){
- .gl_tex = dst_fbo->texture,
- .gl_target = GL_TEXTURE_2D,
- .tex_w = dst_fbo->tex_w,
- .tex_h = dst_fbo->tex_h,
- .src = {0, 0, w, h},
- };
+ &(struct mp_rect){0, 0, w, h}, 0);
+ pass_load_fbotex(p, dst_fbo, tex, w, h);
}
static void uninit_scaler(struct gl_video *p, int scaler_unit)
@@ -834,6 +874,9 @@ static void reinit_scaler(struct gl_video *p, int scaler_unit, const char *name,
scaler->insufficient = false;
scaler->initialized = true;
+ for (int n = 0; n < 2; n++)
+ scaler->params[n] = p->opts.scaler_params[scaler->index][n];
+
const struct filter_kernel *t_kernel = mp_find_filter_kernel(scaler->name);
if (!t_kernel)
return;
@@ -842,8 +885,8 @@ static void reinit_scaler(struct gl_video *p, int scaler_unit, const char *name,
scaler->kernel = &scaler->kernel_storage;
for (int n = 0; n < 2; n++) {
- if (!isnan(p->opts.scaler_params[scaler->index][n]))
- scaler->kernel->params[n] = p->opts.scaler_params[scaler->index][n];
+ if (!isnan(scaler->params[n]))
+ scaler->kernel->params[n] = scaler->params[n];
}
scaler->antiring = p->opts.scaler_antiring[scaler->index];
@@ -920,14 +963,15 @@ static void pass_sample_separated_get_weights(struct gl_video *p,
GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord));)
GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);)
} else {
- GLSL(float weights[N];)
- GLSL(for (int n = 0; n < N / 4; n++) {)
- GLSL( vec4 c = texture(lut, vec2(1.0 / (N / 2) + n / float(N / 4), fcoord));)
- GLSL( weights[n * 4 + 0] = c.r;)
- GLSL( weights[n * 4 + 1] = c.g;)
- GLSL( weights[n * 4 + 2] = c.b;)
- GLSL( weights[n * 4 + 3] = c.a;)
- GLSL(})
+ GLSLF("float weights[%d];\n", N);
+ for (int n = 0; n < N / 4; n++) {
+ GLSLF("c = texture(lut, vec2(1.0 / %d + %d / float(%d), fcoord));\n",
+ N / 2, n, N / 4);
+ GLSLF("weights[%d] = c.r;\n", n * 4 + 0);
+ GLSLF("weights[%d] = c.g;\n", n * 4 + 1);
+ GLSLF("weights[%d] = c.b;\n", n * 4 + 2);
+ GLSLF("weights[%d] = c.a;\n", n * 4 + 3);
+ }
}
}
@@ -937,117 +981,294 @@ static void pass_sample_separated_gen(struct gl_video *p, struct scaler *scaler,
int d_x, int d_y)
{
int N = scaler->kernel->size;
+ bool use_ar = scaler->antiring > 0;
+ GLSL(vec4 color = vec4(0.0);)
+ GLSLF("{\n");
GLSLF("vec2 dir = vec2(%d, %d);\n", d_x, d_y);
- GLSLF("#define N %d\n", N);
- GLSLF("#define ANTIRING %f\n", scaler->antiring);
- GLSL(vec2 pt = (vec2(1.0) / texture_size0) * dir;)
- GLSL(float fcoord = dot(fract(texcoord0 * texture_size0 - vec2(0.5)), dir);)
- GLSL(vec2 base = texcoord0 - fcoord * pt - pt * vec2(N / 2 - 1);)
+ GLSL(vec2 pt = (vec2(1.0) / sample_size) * dir;)
+ GLSL(float fcoord = dot(fract(sample_pos * sample_size - vec2(0.5)), dir);)
+ GLSLF("vec2 base = sample_pos - fcoord * pt - pt * vec2(%d);\n", N / 2 - 1);
+ GLSL(vec4 c;)
+ if (use_ar) {
+ GLSL(vec4 hi = vec4(0.0);)
+ GLSL(vec4 lo = vec4(1.0);)
+ }
pass_sample_separated_get_weights(p, scaler);
- GLSL(vec4 color = vec4(0);)
- GLSL(vec4 hi = vec4(0);)
- GLSL(vec4 lo = vec4(1);)
- GLSL(for (int n = 0; n < N; n++) {)
- GLSL( vec4 c = texture(texture0, base + pt * vec2(n));)
- GLSL( color += vec4(weights[n]) * c;)
- GLSL( if (n == N/2-1 || n == N/2) {)
- GLSL( lo = min(lo, c);)
- GLSL( hi = max(hi, c);)
- GLSL( })
- GLSL(})
- GLSL(color = mix(color, clamp(color, lo, hi), ANTIRING);)
-}
-
-static void pass_sample_separated(struct gl_video *p, struct scaler *scaler,
- int w, int h)
+ GLSLF("// scaler samples\n");
+ for (int n = 0; n < N; n++) {
+ GLSLF("c = texture(texture0, base + pt * vec2(%d));\n", n);
+ GLSLF("color += vec4(weights[%d]) * c;\n", n);
+ if (use_ar && (n == N/2-1 || n == N/2)) {
+ GLSL(lo = min(lo, c);)
+ GLSL(hi = max(hi, c);)
+ }
+ }
+ if (use_ar)
+ GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", scaler->antiring);
+ GLSLF("}\n");
+}
+
+static void pass_sample_separated(struct gl_video *p, int src_tex,
+ struct scaler *scaler, int w, int h,
+ float transform[3][2])
{
+ // Keep the x components untouched for the first pass
+ struct mp_rect_f src_new = p->pass_tex[0].src;
+ gl_matrix_mul_rect(transform, &src_new);
GLSLF("// pass 1\n");
+ p->pass_tex[0].src.y0 = src_new.y0;
+ p->pass_tex[0].src.y1 = src_new.y1;
pass_sample_separated_gen(p, scaler, 0, 1);
int src_w = p->pass_tex[0].src.x1 - p->pass_tex[0].src.x0;
- finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, 0);
+ finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, src_tex, FBOTEX_FUZZY_H);
+ // Restore the sample source for the second pass
+ GLSLF("#define sample_tex texture%d\n", src_tex);
+ GLSLF("#define sample_pos texcoord%d\n", src_tex);
+ GLSLF("#define sample_size texture_size%d\n", src_tex);
GLSLF("// pass 2\n");
+ p->pass_tex[0].src.x0 = src_new.x0;
+ p->pass_tex[0].src.x1 = src_new.x1;
pass_sample_separated_gen(p, scaler, 1, 0);
}
-// Scale. This uses the p->pass_tex[0] texture as source. It's hardcoded to
-// use all variables and values associated with p->pass_tex[0] (which includes
-// texture0/texcoord0/texture_size0).
-// The src rectangle is implicit in p->pass_tex.
+static void pass_sample_polar(struct gl_video *p, struct scaler *scaler)
+{
+ double radius = scaler->kernel->radius;
+ int bound = (int)ceil(radius);
+ bool use_ar = scaler->antiring > 0;
+ GLSL(vec4 color = vec4(0.0);)
+ GLSLF("{\n");
+ GLSL(vec2 pt = vec2(1.0) / sample_size;)
+ GLSL(vec2 fcoord = fract(sample_pos * sample_size - vec2(0.5));)
+ GLSL(vec2 base = sample_pos - fcoord * pt;)
+ GLSL(vec4 c;)
+ GLSLF("float w, d, wsum = 0.0;\n");
+ if (use_ar) {
+ GLSL(vec4 lo = vec4(1.0);)
+ GLSL(vec4 hi = vec4(0.0);)
+ }
+ gl_sc_uniform_sampler(p->sc, "lut", scaler->gl_target,
+ TEXUNIT_SCALERS + scaler->index);
+ GLSLF("// scaler samples\n");
+ for (int y = 1-bound; y <= bound; y++) {
+ for (int x = 1-bound; x <= bound; x++) {
+ // Since we can't know the subpixel position in advance, assume a
+ // worst case scenario
+ int yy = y > 0 ? y-1 : y;
+ int xx = x > 0 ? x-1 : x;
+ double dmax = sqrt(xx*xx + yy*yy);
+ // Skip samples definitely outside the radius
+ if (dmax >= radius)
+ continue;
+ GLSLF("d = length(vec2(%d, %d) - fcoord)/%f;\n", x, y, radius);
+ // Check for samples that might be skippable
+ if (dmax >= radius - 1)
+ GLSLF("if (d < 1.0) {\n");
+ GLSL(w = texture1D(lut, d).r;)
+ GLSL(wsum += w;)
+ GLSLF("c = texture(sample_tex, base + pt * vec2(%d, %d));\n", x, y);
+ GLSL(color += vec4(w) * c;)
+ if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) {
+ GLSL(lo = min(lo, c);)
+ GLSL(hi = max(hi, c);)
+ }
+ if (dmax >= radius -1)
+ GLSLF("}\n");
+ }
+ }
+ GLSL(color = color / vec4(wsum);)
+ if (use_ar)
+ GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", scaler->antiring);
+ GLSLF("}\n");
+}
+
+static void bicubic_calcweights(struct gl_video *p, const char *t, const char *s)
+{
+ // Explanation of how bicubic scaling with only 4 texel fetches is done:
+ // http://www.mate.tue.nl/mate/pdfs/10318.pdf
+ // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines'
+ // Explanation why this algorithm normally always blurs, even with unit
+ // scaling:
+ // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf
+ // 'GPU Prefilter for Accurate Cubic B-spline Interpolation'
+ GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s"
+ " + vec4(1, 0, -0.5, 0.5);\n", t, s);
+ GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s);
+ GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s);
+ GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t);
+ GLSLF("%s.xy += vec2(1 + %s, 1 - %s);\n", t, s, s);
+}
+
+static void pass_sample_bicubic_fast(struct gl_video *p)
+{
+ GLSL(vec4 color;)
+ GLSLF("{\n");
+ GLSL(vec2 pt = 1.0 / sample_size;)
+ GLSL(vec2 fcoord = fract(sample_tex * sample_size + vec2(0.5, 0.5));)
+ bicubic_calcweights(p, "parmx", "fcoord.x");
+ bicubic_calcweights(p, "parmy", "fcoord.y");
+ GLSL(vec4 cdelta;)
+ GLSL(cdelta.xz = parmx.RG * vec2(-pt.x, pt.x);)
+ GLSL(cdelta.yw = parmy.RG * vec2(-pt.y, pt.y);)
+ // first y-interpolation
+ GLSL(vec4 ar = texture(sample_tex, sample_pos + cdelta.xy);)
+ GLSL(vec4 ag = texture(sample_tex, sample_pos + cdelta.xw);)
+ GLSL(vec4 ab = mix(ag, ar, parmy.b);)
+ // second y-interpolation
+ GLSL(vec4 br = texture(sample_tex, sample_pos + cdelta.zy);)
+ GLSL(vec4 bg = texture(sample_tex, sample_pos + cdelta.zw);)
+ GLSL(vec4 aa = mix(bg, br, parmy.b);)
+ // x-interpolation
+ GLSL(color = mix(aa, ab, parmx.b);)
+ GLSLF("}\n");
+}
+
+static void pass_sample_sharpen3(struct gl_video *p, struct scaler *scaler)
+{
+ GLSL(vec4 color;)
+ GLSLF("{\n");
+ GLSL(vec2 pt = 1.0 / sample_size;)
+ GLSL(vec2 st = pt * 0.5;)
+ GLSL(vec4 p = texture(sample_tex, sample_pos);)
+ GLSL(vec4 sum = texture(sample_tex, sample_pos + st * vec2(+1, +1))
+ + texture(sample_tex, sample_pos + st * vec2(+1, -1))
+ + texture(sample_tex, sample_pos + st * vec2(-1, +1))
+ + texture(sample_tex, sample_pos + st * vec2(-1, -1));)
+ double param = isnan(scaler->params[0]) ? 0.5 : scaler->params[0];
+ GLSLF("color = p + (p - 0.25 * sum) * %f;\n", param);
+ GLSLF("}\n");
+}
+
+static void pass_sample_sharpen5(struct gl_video *p, struct scaler *scaler)
+{
+ GLSL(vec4 color;)
+ GLSLF("{\n");
+ GLSL(vec2 pt = 1.0 / sample_size;)
+ GLSL(vec2 st1 = pt * 1.2;)
+ GLSL(vec4 p = texture(sample_tex, sample_pos);)
+ GLSL(vec4 sum1 = texture(sample_tex, sample_pos + st1 * vec2(+1, +1))
+ + texture(sample_tex, sample_pos + st1 * vec2(+1, -1))
+ + texture(sample_tex, sample_pos + st1 * vec2(-1, +1))
+ + texture(sample_tex, sample_pos + st1 * vec2(-1, -1));)
+ GLSL(vec2 st2 = pt * 1.5;)
+ GLSL(vec4 sum2 = texture(sample_tex, sample_pos + st2 * vec2(+1, 0))
+ + texture(sample_tex, sample_pos + st2 * vec2( 0, +1))
+ + texture(sample_tex, sample_pos + st2 * vec2(-1, 0))
+ + texture(sample_tex, sample_pos + st2 * vec2( 0, -1));)
+ GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;)
+ double param = isnan(scaler->params[0]) ? 0.5 : scaler->params[0];
+ GLSLF("color = p + t * %f;\n", param);
+ GLSLF("}\n");
+
+}
+
+// Sample. This samples from the texture ID given by src_tex. It's hardcoded to
+// use all variables and values associated with it (which includes textureN,
+// texcoordN and texture_sizeN).
+// The src rectangle is implicit in p->pass_tex + transform.
// The dst rectangle is implicit by what the caller will do next, but w and h
// must still be what is going to be used (to dimension FBOs correctly).
// This will declare "vec4 color;", which contains the scaled contents.
// The scaler unit is initialized by this function; in order to avoid cache
// thrashing, the scaler unit should usually use the same parameters.
-static void pass_scale(struct gl_video *p, int scaler_unit, const char *name,
- double scale_factor, int w, int h)
+static void pass_sample(struct gl_video *p, int src_tex,
+ int scaler_unit, const char *name, double scale_factor,
+ int w, int h, float transform[3][2])
{
struct scaler *scaler = &p->scalers[scaler_unit];
reinit_scaler(p, scaler_unit, name, scale_factor);
+ // Set up the sample parameters appropriately
+ GLSLF("#define sample_tex texture%d\n", src_tex);
+ GLSLF("#define sample_pos texcoord%d\n", src_tex);
+ GLSLF("#define sample_size texture_size%d\n", src_tex);
+
+ // Set up the transformation for everything other than separated scaling
+ if (!scaler->kernel || scaler->kernel->polar)
+ gl_matrix_mul_rect(transform, &p->pass_tex[src_tex].src);
+
// Dispatch the scaler. They're all wildly different.
if (strcmp(scaler->name, "bilinear") == 0) {
- GLSL(vec4 color = texture(texture0, texcoord0);)
- } else if (scaler->kernel && !scaler->kernel->polar) {
- pass_sample_separated(p, scaler, w, h);
+ GLSL(vec4 color = texture(sample_tex, sample_pos);)
+ } else if (strcmp(scaler->name, "bicubic_fast") == 0) {
+ pass_sample_bicubic_fast(p);
+ } else if (strcmp(scaler->name, "sharpen3") == 0) {
+ pass_sample_sharpen3(p, scaler);
+ } else if (strcmp(scaler->name, "sharpen5") == 0) {
+ pass_sample_sharpen5(p, scaler);
+ } else if (scaler->kernel && scaler->kernel->polar) {
+ pass_sample_polar(p, scaler);
+ } else if (scaler->kernel) {
+ pass_sample_separated(p, src_tex, scaler, w, h, transform);
} else {
- abort(); //not implemented yet
+ // Should never happen
+ abort();
}
+
+ // Micro-optimization: Avoid scaling unneeded channels
+ if (!p->has_alpha || p->opts.alpha_mode != 1)
+ GLSL(color.a = 1.0;)
}
// sample from video textures, set "color" variable to yuv value
-// (not sure how exactly this should involve the resamplers)
-static