summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
Diffstat (limited to 'video')
-rw-r--r--video/csputils.c3
-rw-r--r--video/csputils.h1
-rw-r--r--video/out/gl_osd.c8
-rw-r--r--video/out/gl_utils.c6
-rw-r--r--video/out/gl_utils.h16
-rw-r--r--video/out/gl_video.c864
-rw-r--r--video/out/gl_video.h7
7 files changed, 706 insertions, 199 deletions
diff --git a/video/csputils.c b/video/csputils.c
index cee33dbba9..06de4bb9e8 100644
--- a/video/csputils.c
+++ b/video/csputils.c
@@ -70,6 +70,7 @@ const char *const mp_csp_trc_names[MP_CSP_TRC_COUNT] = {
"BT.1886 (SD, HD, UHD)",
"sRGB (IEC 61966-2-1)",
"Linear light",
+ "Pure power (gamma 2.2)",
};
const char *const mp_csp_equalizer_names[MP_CSP_EQ_COUNT] = {
@@ -156,6 +157,7 @@ enum mp_csp_trc avcol_trc_to_mp_csp_trc(int avtrc)
case AVCOL_TRC_BT2020_12: return MP_CSP_TRC_BT_1886;
case AVCOL_TRC_IEC61966_2_1: return MP_CSP_TRC_SRGB;
case AVCOL_TRC_LINEAR: return MP_CSP_TRC_LINEAR;
+ case AVCOL_TRC_GAMMA22: return MP_CSP_TRC_GAMMA22;
default: return MP_CSP_TRC_AUTO;
}
}
@@ -202,6 +204,7 @@ int mp_csp_trc_to_avcol_trc(enum mp_csp_trc trc)
case MP_CSP_TRC_BT_1886: return AVCOL_TRC_BT709;
case MP_CSP_TRC_SRGB: return AVCOL_TRC_IEC61966_2_1;
case MP_CSP_TRC_LINEAR: return AVCOL_TRC_LINEAR;
+ case MP_CSP_TRC_GAMMA22: return AVCOL_TRC_GAMMA22;
default: return AVCOL_TRC_UNSPECIFIED;
}
}
diff --git a/video/csputils.h b/video/csputils.h
index a082682e43..a68c106549 100644
--- a/video/csputils.h
+++ b/video/csputils.h
@@ -76,6 +76,7 @@ enum mp_csp_trc {
MP_CSP_TRC_BT_1886,
MP_CSP_TRC_SRGB,
MP_CSP_TRC_LINEAR,
+ MP_CSP_TRC_GAMMA22,
MP_CSP_TRC_COUNT
};
diff --git a/video/out/gl_osd.c b/video/out/gl_osd.c
index 0ab85f59c4..7a9532d416 100644
--- a/video/out/gl_osd.c
+++ b/video/out/gl_osd.c
@@ -294,7 +294,7 @@ static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs)
osd->num_subparts * sizeof(osd->subparts[0]));
}
-static void write_quad(struct vertex *va, float matrix[3][3],
+static void write_quad(struct vertex *va, float matrix[3][2],
float x0, float y0, float x1, float y1,
float tx0, float ty0, float tx1, float ty1,
float tex_w, float tex_h, const uint8_t color[4])
@@ -312,7 +312,7 @@ static void write_quad(struct vertex *va, float matrix[3][3],
#undef COLOR_INIT
}
-static int generate_verts(struct mpgl_osd_part *part, float matrix[3][3])
+static int generate_verts(struct mpgl_osd_part *part, float matrix[3][2])
{
int num_vertices = part->num_subparts * 6;
MP_TARRAY_GROW(part, part->vertices, num_vertices);
@@ -337,7 +337,7 @@ static int generate_verts(struct mpgl_osd_part *part, float matrix[3][3])
return num_vertices;
}
-static void draw_part(struct mpgl_osd *ctx, int index, float matrix[3][3])
+static void draw_part(struct mpgl_osd *ctx, int index, float matrix[3][2])
{
GL *gl = ctx->gl;
struct mpgl_osd_part *part = ctx->parts[index];
@@ -377,7 +377,7 @@ void mpgl_osd_draw_part(struct mpgl_osd *ctx, int vp_w, int vp_h, int index)
for (int x = 0; x < div[0]; x++) {
for (int y = 0; y < div[1]; y++) {
- float matrix[3][3];
+ float matrix[3][2];
gl_matrix_ortho2d(matrix, 0, vp_w, 0, vp_h);
diff --git a/video/out/gl_utils.c b/video/out/gl_utils.c
index ca2fef10bf..7881a6cf1f 100644
--- a/video/out/gl_utils.c
+++ b/video/out/gl_utils.c
@@ -418,7 +418,7 @@ void fbotex_uninit(struct fbotex *fbo)
// Standard parallel 2D projection, except y1 < y0 means that the coordinate
// system is flipped, not the projection.
-void gl_matrix_ortho2d(float m[3][3], float x0, float x1, float y0, float y1)
+void gl_matrix_ortho2d(float m[3][2], float x0, float x1, float y0, float y1)
{
if (y1 < y0) {
float t = y0;
@@ -426,12 +426,12 @@ void gl_matrix_ortho2d(float m[3][3], float x0, float x1, float y0, float y1)
y1 = t;
}
- memset(m, 0, 9 * sizeof(float));
m[0][0] = 2.0f / (x1 - x0);
+ m[0][1] = 0.0f;
+ m[1][0] = 0.0f;
m[1][1] = 2.0f / (y1 - y0);
m[2][0] = -(x1 + x0) / (x1 - x0);
m[2][1] = -(y1 + y0) / (y1 - y0);
- m[2][2] = 1.0f;
}
static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id,
diff --git a/video/out/gl_utils.h b/video/out/gl_utils.h
index a1bb2ecafb..b4f5650ea6 100644
--- a/video/out/gl_utils.h
+++ b/video/out/gl_utils.h
@@ -86,15 +86,27 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
#define FBOTEX_FUZZY_H 2
void fbotex_set_filter(struct fbotex *fbo, GLenum gl_filter);
-void gl_matrix_ortho2d(float m[3][3], float x0, float x1, float y0, float y1);
+void gl_matrix_ortho2d(float m[3][2], float x0, float x1, float y0, float y1);
-static inline void gl_matrix_mul_vec(float m[3][3], float *x, float *y)
+// This treats m as an affine transformation, in other words m[2][n] gets
+// added to the output.
+static inline void gl_matrix_mul_vec(float m[3][2], float *x, float *y)
{
float vx = *x, vy = *y;
*x = vx * m[0][0] + vy * m[1][0] + m[2][0];
*y = vx * m[0][1] + vy * m[1][1] + m[2][1];
}
+struct mp_rect_f {
+ float x0, y0, x1, y1;
+};
+
+static inline void gl_matrix_mul_rect(float m[3][2], struct mp_rect_f *r)
+{
+ gl_matrix_mul_vec(m, &r->x0, &r->y0);
+ gl_matrix_mul_vec(m, &r->x1, &r->y1);
+}
+
void gl_set_debug_logger(GL *gl, struct mp_log *log);
struct gl_shader_cache;
diff --git a/video/out/gl_video.c b/video/out/gl_video.c
index a52bd82020..5f64dcb1d6 100644
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@@ -44,7 +44,7 @@
// Pixel width of 1D lookup textures.
#define LOOKUP_TEXTURE_SIZE 256
-// Texture units 0-3 are used by the video, with unit 0 for free use.
+// Texture units 0-3 are used by the video, and for free use by the passes
// Units 4-5 are used for scaler LUTs.
#define TEXUNIT_SCALERS 4
#define TEXUNIT_3DLUT 6
@@ -123,16 +123,15 @@ struct scaler {
struct fbosurface {
struct fbotex fbotex;
int64_t pts;
- bool valid;
};
-#define FBOSURFACES_MAX 2
+#define FBOSURFACES_MAX 4
struct src_tex {
GLuint gl_tex;
GLenum gl_target;
int tex_w, tex_h;
- struct mp_rect src;
+ struct mp_rect_f src;
};
struct gl_video {
@@ -171,10 +170,7 @@ struct gl_video {
bool has_alpha;
char color_swizzle[5];
- float input_gamma, conv_gamma;
- float user_gamma;
- bool user_gamma_enabled; // shader handles user_gamma
- bool sigmoid_enabled;
+ bool user_gamma_enabled;
struct video_image image;
@@ -183,20 +179,14 @@ struct gl_video {
struct fbosurface surfaces[FBOSURFACES_MAX];
size_t surface_idx;
+ size_t surface_now;
+ bool is_interpolated;
// state for luma (0) and chroma (1) scalers
struct scaler scalers[2];
- // true if scaler is currently upscaling
- bool upscaling;
-
- bool is_interpolated;
-
struct mp_csp_equalizer video_eq;
- // Source and destination color spaces for the CMS matrix
- struct mp_csp_primaries csp_src, csp_dest;
-
struct mp_rect src_rect; // displayed part of the source video
struct mp_rect dst_rect; // video rectangle on output window
struct mp_osd_res osd_rect; // OSD size/margins
@@ -366,7 +356,19 @@ const struct m_sub_options gl_video_conf = {
.opts = (const m_option_t[]) {
OPT_FLOATRANGE("gamma", gamma, 0, 0.1, 2.0),
OPT_FLAG("gamma-auto", gamma_auto, 0),
- OPT_FLAG("srgb", srgb, 0),
+ OPT_CHOICE("target-prim", target_prim, 0,
+ ({"auto", MP_CSP_PRIM_AUTO},
+ {"bt601-525", MP_CSP_PRIM_BT_601_525},
+ {"bt601-625", MP_CSP_PRIM_BT_601_625},
+ {"bt709", MP_CSP_PRIM_BT_709},
+ {"bt2020", MP_CSP_PRIM_BT_2020},
+ {"bt470m", MP_CSP_PRIM_BT_470M})),
+ OPT_CHOICE("target-trc", target_trc, 0,
+ ({"auto", MP_CSP_TRC_AUTO},
+ {"bt1886", MP_CSP_TRC_BT_1886},
+ {"srgb", MP_CSP_TRC_SRGB},
+ {"linear", MP_CSP_TRC_LINEAR},
+ {"gamma22", MP_CSP_TRC_GAMMA22})),
OPT_FLAG("npot", npot, 0),
OPT_FLAG("pbo", pbo, 0),
OPT_STRING_VALIDATE("scale", scalers[0], 0, validate_scaler_opt),
@@ -433,6 +435,7 @@ const struct m_sub_options gl_video_conf = {
OPT_REPLACED("cparam2", "cscale-param2"),
OPT_REPLACED("cradius", "cscale-radius"),
OPT_REPLACED("cantiring", "cscale-antiring"),
+ OPT_REPLACED("srgb", "target-prim=srgb:target-trc=srgb"),
{0}
},
@@ -479,6 +482,19 @@ void gl_video_set_debug(struct gl_video *p, bool enable)
gl_set_debug_logger(gl, enable ? p->log : NULL);
}
+static void gl_video_reset_surfaces(struct gl_video *p)
+{
+ for (int i = 0; i < FBOSURFACES_MAX; i++)
+ p->surfaces[i].pts = 0;
+ p->surface_idx = 0;
+ p->surface_now = 0;
+}
+
+static size_t fbosurface_next(size_t id)
+{
+ return (id+1) % FBOSURFACES_MAX;
+}
+
static void recreate_osd(struct gl_video *p)
{
if (p->osd)
@@ -507,6 +523,8 @@ static void uninit_rendering(struct gl_video *p)
gl->DeleteTextures(1, &p->dither_texture);
p->dither_texture = 0;
+
+ gl_video_reset_surfaces(p);
}
void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d)
@@ -546,13 +564,28 @@ void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d)
reinit_rendering(p);
}
-static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg)
+static void pass_load_fbotex(struct gl_video *p, struct fbotex *src_fbo, int id,
+ int w, int h)
+{
+ p->pass_tex[id] = (struct src_tex){
+ .gl_tex = src_fbo->texture,
+ .gl_target = GL_TEXTURE_2D,
+ .tex_w = src_fbo->tex_w,
+ .tex_h = src_fbo->tex_h,
+ .src = {0, 0, w, h},
+ };
+}
+
+static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg,
+ float chroma[3][2])
{
GLuint imgtex[4] = {0};
assert(vimg->mpi);
- float offset[2] = {0};
+ float ls_w = 1.0 / (1 << p->image_desc.chroma_xs);
+ float ls_h = 1.0 / (1 << p->image_desc.chroma_ys);
+
int chroma_loc = p->opts.chroma_location;
if (!chroma_loc)
chroma_loc = p->image_params.chroma_location;
@@ -564,13 +597,21 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg
// so that the luma and chroma sample line up exactly.
// For 4:4:4, setting chroma location should have no effect at all.
// luma sample size (in chroma coord. space)
- float ls_w = 1.0 / (1 << p->image_desc.chroma_xs);
- float ls_h = 1.0 / (1 << p->image_desc.chroma_ys);
- // move chroma center to luma center (in chroma coord. space)
- offset[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
- offset[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
+ chroma[2][0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
+ chroma[2][1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
+ } else {
+ chroma[2][0] = chroma[2][1] = 0.0;
}
+ // Make sure luma/chroma sizes are aligned.
+ // Example: For 4:2:0 with size 3x3, the subsampled chroma plane is 2x2
+ // so luma (3,3) has to align with chroma (2,2).
+ chroma[0][0] = ls_w * (float)vimg->planes[0].tex_w
+ / vimg->planes[1].tex_w;
+ chroma[1][1] = ls_h * (float)vimg->planes[0].tex_h
+ / vimg->planes[1].tex_h;
+ chroma[0][1] = chroma[1][0] = 0.0; // No rotation etc.
+
if (p->hwdec_active) {
p->hwdec->driver->map_image(p->hwdec, vimg->mpi, imgtex);
} else {
@@ -585,17 +626,7 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg
.gl_target = t->gl_target,
.tex_w = t->tex_w,
.tex_h = t->tex_h,
- //.src = {0, 0, t->w, t->h},
- .src = {
- // xxx this is wrong; we want to crop the source when sampling
- // from indirect_fbo, but not when rendering to indirect_fbo
- // also, this should apply offset, and take care of odd video
- // dimensions properly; and it should use floats instead
- .x0 = p->src_rect.x0 >> p->image_desc.xs[n],
- .y0 = p->src_rect.y0 >> p->image_desc.ys[n],
- .x1 = p->src_rect.x1 >> p->image_desc.xs[n],
- .y1 = p->src_rect.y1 >> p->image_desc.ys[n],
- },
+ .src = {0, 0, t->w, t->h},
};
}
}
@@ -712,7 +743,7 @@ static void pass_prepare_src_tex(struct gl_video *p)
GL *gl = p->gl;
struct gl_shader_cache *sc = p->sc;
- for (int n = 0; n < p->plane_count; n++) {
+ for (int n = 0; n < 4; n++) {
struct src_tex *s = &p->pass_tex[n];
if (!s->gl_tex)
continue;
@@ -722,9 +753,9 @@ static void pass_prepare_src_tex(struct gl_video *p)
snprintf(texture_name, sizeof(texture_name), "texture%d", n);
snprintf(texture_size, sizeof(texture_size), "texture_size%d", n);
- gl_sc_uniform_sampler(sc, texture_name, p->gl_target, n);
+ gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n);
float f[2] = {1, 1};
- if (p->gl_target != GL_TEXTURE_RECTANGLE) {
+ if (s->gl_target != GL_TEXTURE_RECTANGLE) {
f[0] = s->tex_w;
f[1] = s->tex_h;
}
@@ -736,12 +767,13 @@ static void pass_prepare_src_tex(struct gl_video *p)
gl->ActiveTexture(GL_TEXTURE0);
}
+// flags = bits 0-1: rotate, bit 2: flip vertically
static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
- const struct mp_rect *dst)
+ const struct mp_rect *dst, int flags)
{
struct vertex va[4];
- float matrix[3][3];
+ float matrix[3][2];
gl_matrix_ortho2d(matrix, 0, vp_w, 0, vp_h);
float x[2] = {dst->x0, dst->x1};
@@ -758,6 +790,8 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
if (s->gl_tex) {
float tx[2] = {s->src.x0, s->src.x1};
float ty[2] = {s->src.y0, s->src.y1};
+ if (flags & 4)
+ MPSWAP(float, ty[0], ty[1]);
bool rect = s->gl_target == GL_TEXTURE_RECTANGLE;
v->texcoord[i].x = tx[n / 2] / (rect ? 1 : s->tex_w);
v->texcoord[i].y = ty[n % 2] / (rect ? 1 : s->tex_h);
@@ -765,20 +799,31 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
}
}
+ int rot = flags & 3;
+ while (rot--) {
+ static const int perm[4] = {1, 3, 0, 2};
+ struct vertex vb[4];
+ memcpy(vb, va, sizeof(vb));
+ for (int n = 0; n < 4; n++)
+ memcpy(va[n].texcoord, vb[perm[n]].texcoord,
+ sizeof(struct vertex_pt[4]));
+ }
+
gl_vao_draw_data(&p->vao, GL_TRIANGLE_STRIP, va, 4);
debug_check_gl(p, "after rendering");
}
+// flags: see render_pass_quad
static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h,
- const struct mp_rect *dst)
+ const struct mp_rect *dst, int flags)
{
GL *gl = p->gl;
pass_prepare_src_tex(p);
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
gl->Viewport(0, 0, vp_w, vp_h < 0 ? -vp_h : vp_h);
gl_sc_gen_shader_and_reset(p->sc);
- render_pass_quad(p, vp_w, vp_h, dst);
+ render_pass_quad(p, vp_w, vp_h, dst, flags);
gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
memset(&p->pass_tex, 0, sizeof(p->pass_tex));
}
@@ -787,22 +832,17 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
// FBO, if the required parameters have changed
// w, h: required FBO target dimension, and also defines the target rectangle
// used for rasterization
+// tex: the texture ID to load the result back into
// flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy
// flags allows the FBO to be larger than the target)
static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
- int w, int h, int flags)
+ int w, int h, int tex, int flags)
{
fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags);
finish_pass_direct(p, dst_fbo->fbo, dst_fbo->tex_w, dst_fbo->tex_h,
- &(struct mp_rect){0, 0, w, h});
- p->pass_tex[0] = (struct src_tex){
- .gl_tex = dst_fbo->texture,
- .gl_target = GL_TEXTURE_2D,
- .tex_w = dst_fbo->tex_w,
- .tex_h = dst_fbo->tex_h,
- .src = {0, 0, w, h},
- };
+ &(struct mp_rect){0, 0, w, h}, 0);
+ pass_load_fbotex(p, dst_fbo, tex, w, h);
}
static void uninit_scaler(struct gl_video *p, int scaler_unit)
@@ -834,6 +874,9 @@ static void reinit_scaler(struct gl_video *p, int scaler_unit, const char *name,
scaler->insufficient = false;
scaler->initialized = true;
+ for (int n = 0; n < 2; n++)
+ scaler->params[n] = p->opts.scaler_params[scaler->index][n];
+
const struct filter_kernel *t_kernel = mp_find_filter_kernel(scaler->name);
if (!t_kernel)
return;
@@ -842,8 +885,8 @@ static void reinit_scaler(struct gl_video *p, int scaler_unit, const char *name,
scaler->kernel = &scaler->kernel_storage;
for (int n = 0; n < 2; n++) {
- if (!isnan(p->opts.scaler_params[scaler->index][n]))
- scaler->kernel->params[n] = p->opts.scaler_params[scaler->index][n];
+ if (!isnan(scaler->params[n]))
+ scaler->kernel->params[n] = scaler->params[n];
}
scaler->antiring = p->opts.scaler_antiring[scaler->index];
@@ -920,14 +963,15 @@ static void pass_sample_separated_get_weights(struct gl_video *p,
GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord));)
GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);)
} else {
- GLSL(float weights[N];)
- GLSL(for (int n = 0; n < N / 4; n++) {)
- GLSL( vec4 c = texture(lut, vec2(1.0 / (N / 2) + n / float(N / 4), fcoord));)
- GLSL( weights[n * 4 + 0] = c.r;)
- GLSL( weights[n * 4 + 1] = c.g;)
- GLSL( weights[n * 4 + 2] = c.b;)
- GLSL( weights[n * 4 + 3] = c.a;)
- GLSL(})
+ GLSLF("float weights[%d];\n", N);
+ for (int n = 0; n < N / 4; n++) {
+ GLSLF("c = texture(lut, vec2(1.0 / %d + %d / float(%d), fcoord));\n",
+ N / 2, n, N / 4);
+ GLSLF("weights[%d] = c.r;\n", n * 4 + 0);
+ GLSLF("weights[%d] = c.g;\n", n * 4 + 1);
+ GLSLF("weights[%d] = c.b;\n", n * 4 + 2);
+ GLSLF("weights[%d] = c.a;\n", n * 4 + 3);
+ }
}
}
@@ -937,117 +981,294 @@ static void pass_sample_separated_gen(struct gl_video *p, struct scaler *scaler,
int d_x, int d_y)
{
int N = scaler->kernel->size;
+ bool use_ar = scaler->antiring > 0;
+ GLSL(vec4 color = vec4(0.0);)
+ GLSLF("{\n");
GLSLF("vec2 dir = vec2(%d, %d);\n", d_x, d_y);
- GLSLF("#define N %d\n", N);
- GLSLF("#define ANTIRING %f\n", scaler->antiring);
- GLSL(vec2 pt = (vec2(1.0) / texture_size0) * dir;)
- GLSL(float fcoord = dot(fract(texcoord0 * texture_size0 - vec2(0.5)), dir);)
- GLSL(vec2 base = texcoord0 - fcoord * pt - pt * vec2(N / 2 - 1);)
+ GLSL(vec2 pt = (vec2(1.0) / sample_size) * dir;)
+ GLSL(float fcoord = dot(fract(sample_pos * sample_size - vec2(0.5)), dir);)
+ GLSLF("vec2 base = sample_pos - fcoord * pt - pt * vec2(%d);\n", N / 2 - 1);
+ GLSL(vec4 c;)
+ if (use_ar) {
+ GLSL(vec4 hi = vec4(0.0);)
+ GLSL(vec4 lo = vec4(1.0);)
+ }
pass_sample_separated_get_weights(p, scaler);
- GLSL(vec4 color = vec4(0);)
- GLSL(vec4 hi = vec4(0);)
- GLSL(vec4 lo = vec4(1);)
- GLSL(for (int n = 0; n < N; n++) {)
- GLSL( vec4 c = texture(texture0, base + pt * vec2(n));)
- GLSL( color += vec4(weights[n]) * c;)
- GLSL( if (n == N/2-1 || n == N/2) {)
- GLSL( lo = min(lo, c);)
- GLSL( hi = max(hi, c);)
- GLSL( })
- GLSL(})
- GLSL(color = mix(color, clamp(color, lo, hi), ANTIRING);)
-}
-
-static void pass_sample_separated(struct gl_video *p, struct scaler *scaler,
- int w, int h)
+ GLSLF("// scaler samples\n");
+ for (int n = 0; n < N; n++) {
+ GLSLF("c = texture(texture0, base + pt * vec2(%d));\n", n);
+ GLSLF("color += vec4(weights[%d]) * c;\n", n);
+ if (use_ar && (n == N/2-1 || n == N/2)) {
+ GLSL(lo = min(lo, c);)
+ GLSL(hi = max(hi, c);)
+ }
+ }
+ if (use_ar)
+ GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", scaler->antiring);
+ GLSLF("}\n");
+}
+
+static void pass_sample_separated(struct gl_video *p, int src_tex,
+ struct scaler *scaler, int w, int h,
+ float transform[3][2])
{
+ // Keep the x components untouched for the first pass
+ struct mp_rect_f src_new = p->pass_tex[0].src;
+ gl_matrix_mul_rect(transform, &src_new);
GLSLF("// pass 1\n");
+ p->pass_tex[0].src.y0 = src_new.y0;
+ p->pass_tex[0].src.y1 = src_new.y1;
pass_sample_separated_gen(p, scaler, 0, 1);
int src_w = p->pass_tex[0].src.x1 - p->pass_tex[0].src.x0;
- finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, 0);
+ finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, src_tex, FBOTEX_FUZZY_H);
+ // Restore the sample source for the second pass
+ GLSLF("#define sample_tex texture%d\n", src_tex);
+ GLSLF("#define sample_pos texcoord%d\n", src_tex);
+ GLSLF("#define sample_size texture_size%d\n", src_tex);
GLSLF("// pass 2\n");
+ p->pass_tex[0].src.x0 = src_new.x0;
+ p->pass_tex[0].src.x1 = src_new.x1;
pass_sample_separated_gen(p, scaler, 1, 0);
}
-// Scale. This uses the p->pass_tex[0] texture as source. It's hardcoded to
-// use all variables and values associated with p->pass_tex[0] (which includes
-// texture0/texcoord0/texture_size0).
-// The src rectangle is implicit in p->pass_tex.
+static void pass_sample_polar(struct gl_video *p, struct scaler *scaler)
+{
+ double radius = scaler->kernel->radius;
+ int bound = (int)ceil(radius);
+ bool use_ar = scaler->antiring > 0;
+ GLSL(vec4 color = vec4(0.0);)
+ GLSLF("{\n");
+ GLSL(vec2 pt = vec2(1.0) / sample_size;)
+ GLSL(vec2 fcoord = fract(sample_pos * sample_size - vec2(0.5));)
+ GLSL(vec2 base = sample_pos - fcoord * pt;)
+ GLSL(vec4 c;)
+ GLSLF("float w, d, wsum = 0.0;\n");
+ if (use_ar) {
+ GLSL(vec4 lo = vec4(1.0);)
+ GLSL(vec4 hi = vec4(0.0);)
+ }
+ gl_sc_uniform_sampler(p->sc, "lut", scaler->gl_target,
+ TEXUNIT_SCALERS + scaler->index);
+ GLSLF("// scaler samples\n");
+ for (int y = 1-bound; y <= bound; y++) {
+ for (int x = 1-bound; x <= bound; x++) {
+ // Since we can't know the subpixel position in advance, assume a
+ // worst case scenario
+ int yy = y > 0 ? y-1 : y;
+ int xx = x > 0 ? x-1 : x;
+ double dmax = sqrt(xx*xx + yy*yy);
+ // Skip samples definitely outside the radius
+ if (dmax >= radius)
+ continue;
+ GLSLF("d = length(vec2(%d, %d) - fcoord)/%f;\n", x, y, radius);
+ // Check for samples that might be skippable
+ if (dmax >= radius - 1)
+ GLSLF("if (d < 1.0) {\n");
+ GLSL(w = texture1D(lut, d).r;)
+ GLSL(wsum += w;)
+ GLSLF("c = texture(sample_tex, base + pt * vec2(%d, %d));\n", x, y);
+ GLSL(color += vec4(w) * c;)
+ if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) {
+ GLSL(lo = min(lo, c);)
+ GLSL(hi = max(hi, c);)
+ }
+ if (dmax >= radius -1)
+ GLSLF("}\n");
+ }
+ }
+ GLSL(color = color / vec4(wsum);)
+ if (use_ar)
+ GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", scaler->antiring);
+ GLSLF("}\n");
+}
+
+static void bicubic_calcweights(struct gl_video *p, const char *t, const char *s)
+{
+ // Explanation of how bicubic scaling with only 4 texel fetches is done:
+ // http://www.mate.tue.nl/mate/pdfs/10318.pdf
+ // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines'
+ // Explanation why this algorithm normally always blurs, even with unit
+ // scaling:
+ // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf
+ // 'GPU Prefilter for Accurate Cubic B-spline Interpolation'
+ GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s"
+ " + vec4(1, 0, -0.5, 0.5);\n", t, s);
+ GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s);
+ GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s);
+ GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t);
+ GLSLF("%s.xy += vec2(1 + %s, 1 - %s);\n", t, s, s);
+}
+
+static void pass_sample_bicubic_fast(struct gl_video *p)
+{
+ GLSL(vec4 color;)
+ GLSLF("{\n");
+ GLSL(vec2 pt = 1.0 / sample_size;)
+ GLSL(vec2 fcoord = fract(sample_tex * sample_size + vec2(0.5, 0.5));)
+ bicubic_calcweights(p, "parmx", "fcoord.x");
+ bicubic_calcweights(p, "parmy", "fcoord.y");
+ GLSL(vec4 cdelta;)
+ GLSL(cdelta.xz = parmx.RG * vec2(-pt.x, pt.x);)
+ GLSL(cdelta.yw = parmy.RG * vec2(-pt.y, pt.y);)
+ // first y-interpolation
+ GLSL(vec4 ar = texture(sample_tex, sample_pos + cdelta.xy);)
+ GLSL(vec4 ag = texture(sample_tex, sample_pos + cdelta.xw);)
+ GLSL(vec4 ab = mix(ag, ar, parmy.b);)
+ // second y-interpolation
+ GLSL(vec4 br = texture(sample_tex, sample_pos + cdelta.zy);)
+ GLSL(vec4 bg = texture(sample_tex, sample_pos + cdelta.zw);)
+ GLSL(vec4 aa = mix(bg, br, parmy.b);)
+ // x-interpolation
+ GLSL(color = mix(aa, ab, parmx.b);)
+ GLSLF("}\n");
+}
+
+static void pass_sample_sharpen3(struct gl_video *p, struct scaler *scaler)
+{
+ GLSL(vec4 color;)
+ GLSLF("{\n");
+ GLSL(vec2 pt = 1.0 / sample_size;)
+ GLSL(vec2 st = pt * 0.5;)
+ GLSL(vec4 p = texture(sample_tex, sample_pos);)
+ GLSL(vec4 sum = texture(sample_tex, sample_pos + st * vec2(+1, +1))
+ + texture(sample_tex, sample_pos + st * vec2(+1, -1))
+ + texture(sample_tex, sample_pos + st * vec2(-1, +1))
+ + texture(sample_tex, sample_pos + st * vec2(-1, -1));)
+ double param = isnan(scaler->params[0]) ? 0.5 : scaler->params[0];
+ GLSLF("color = p + (p - 0.25 * sum) * %f;\n", param);
+ GLSLF("}\n");
+}
+
+static void pass_sample_sharpen5(struct gl_video *p, struct scaler *scaler)
+{
+ GLSL(vec4 color;)
+ GLSLF("{\n");
+ GLSL(vec2 pt = 1.0 / sample_size;)
+ GLSL(vec2 st1 = pt * 1.2;)
+ GLSL(vec4 p = texture(sample_tex, sample_pos);)
+ GLSL(vec4 sum1 = texture(sample_tex, sample_pos + st1 * vec2(+1, +1))
+ + texture(sample_tex, sample_pos + st1 * vec2(+1, -1))
+ + texture(sample_tex, sample_pos + st1 * vec2(-1, +1))
+ + texture(sample_tex, sample_pos + st1 * vec2(-1, -1));)
+ GLSL(vec2 st2 = pt * 1.5;)
+ GLSL(vec4 sum2 = texture(sample_tex, sample_pos + st2 * vec2(+1, 0))
+ + texture(sample_tex, sample_pos + st2 * vec2( 0, +1))
+ + texture(sample_tex, sample_pos + st2 * vec2(-1, 0))
+ + texture(sample_tex, sample_pos + st2 * vec2( 0, -1));)
+ GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;)
+ double param = isnan(scaler->params[0]) ? 0.5 : scaler->params[0];
+ GLSLF("color = p + t * %f;\n", param);
+ GLSLF("}\n");
+
+}
+
+// Sample. This samples from the texture ID given by src_tex. It's hardcoded to
+// use all variables and values associated with it (which includes textureN,
+// texcoordN and texture_sizeN).
+// The src rectangle is implicit in p->pass_tex + transform.
// The dst rectangle is implicit by what the caller will do next, but w and h
// must still be what is going to be used (to dimension FBOs correctly).
// This will declare "vec4 color;", which contains the scaled contents.
// The scaler unit is initialized by this function; in order to avoid cache
// thrashing, the scaler unit should usually use the same parameters.
-static void pass_scale(struct gl_video *p, int scaler_unit, const char *name,
- double scale_factor, int w, int h)
+static void pass_sample(struct gl_video *p, int src_tex,
+ int scaler_unit, const char *name, double scale_factor,
+ int w, int h, float transform[3][2])
{
struct scaler *scaler = &p->scalers[scaler_unit];
reinit_scaler(p, scaler_unit, name, scale_factor);
+ // Set up the sample parameters appropriately
+ GLSLF("#define sample_tex texture%d\n", src_tex);
+ GLSLF("#define sample_pos texcoord%d\n", src_tex);
+ GLSLF("#define sample_size texture_size%d\n", src_tex);
+
+ // Set up the transformation for everything other than separated scaling
+ if (!scaler->kernel || scaler->kernel->polar)
+ gl_matrix_mul_rect(transform, &p->pass_tex[src_tex].src);
+
// Dispatch the scaler. They're all wildly different.
if (strcmp(scaler->name, "bilinear") == 0) {
- GLSL(vec4 color = texture(texture0, texcoord0);)
- } else if (scaler->kernel && !scaler->kernel->polar) {
- pass_sample_separated(p, scaler, w, h);
+ GLSL(vec4 color = texture(sample_tex, sample_pos);)
+ } else if (strcmp(scaler->name, "bicubic_fast") == 0) {
+ pass_sample_bicubic_fast(p);
+ } else if (strcmp(scaler->name, "sharpen3") == 0) {
+ pass_sample_sharpen3(p, scaler);
+ } else if (strcmp(scaler->name, "sharpen5") == 0) {
+ pass_sample_sharpen5(p, scaler);
+ } else if (scaler->kernel && scaler->kernel->polar) {
+ pass_sample_polar(p, scaler);
+ } else if (scaler->kernel) {
+ pass_sample_separated(p, src_tex, scaler, w, h, transform);
} else {
- abort(); //not implemented yet
+ // Should never happen
+ abort();
}
+
+ // Micro-optimization: Avoid scaling unneeded channels
+ if (!p->has_alpha || p->opts.alpha_mode != 1)
+ GLSL(color.a = 1.0;)
}
// sample from video textures, set "color" variable to yuv value
-// (not sure how exactly this should involve the resamplers)
-static void pass_read_video(struct gl_video *p, bool *use_indirect)
+static void pass_read_video(struct gl_video *p)
{
- pass_set_image_textures(p, &p->image);
+ float chromafix[3][2];
+ pass_set_image_textures(p, &p->image, chromafix);
+
+ if (p->plane_count == 1) {
+ GLSL(vec4 color = texture(texture0, texcoord0);)
+ goto fixalpha;
+ }
- if (p->plane_count > 1) {
+ const char *cscale = p->opts.scalers[1];
+ if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED &&
+ strcmp(cscale, "bilinear") != 0) {
+ struct src_tex luma = p->pass_tex[0];
+ if (p->plane_count > 2) {
+ // For simplicity and performance, we merge the chroma planes
+ // into a single texture before scaling, so the scaler doesn't
+ // need to run multiple times.
+ GLSLF("// chroma merging\n");
+ GLSL(vec4 color = vec4(texture(texture1, texcoord0).r,
+ texture(texture2, texcoord2).r,
+ 0.0, 1.0);)
+ int c_w = p->pass_tex[1].src.x1 - p->pass_tex[1].src.x0;
+ int c_h = p->pass_tex[1].src.y1 - p->pass_tex[1].src.y0;
+ assert(c_w == p->pass_tex[2].src.x1 - p->pass_tex[2].src.x0);
+ assert(c_h == p->pass_tex[2].src.y1 - p->pass_tex[2].src.y0);
+ finish_pass_fbo(p, &p->chroma_merge_fbo, c_w, c_h, 1, 0);
+ }
+ GLSLF("// chroma scaling\n");
+ pass_sample(p, 1, 1, cscale, 1.0, p->image_w, p->image_h, chromafix);
+ GLSL(vec2 chroma = color.rg;)
+ // Always force rendering to a FBO before main scaling, or we would
+ // scale chroma incorrectly.
+ p->use_indirect = true;
+ p->pass_tex[0] = luma; // Restore luma after scaling
+ } else {
+ GLSL(vec4 color;)
if (p->plane_count == 2) {
- GLSL(vec2 chroma = texture(texture1, texcoord1).RG;) // NV formats
+ gl_matrix_mul_rect(chromafix, &p->pass_tex[1].src);
+ GLSL(vec2 chroma = texture(texture1, texcoord0).rg;) // NV formats
} else {
+ gl_matrix_mul_rect(chromafix, &p->pass_tex[1].src);
+ gl_matrix_mul_rect(chromafix, &p->pass_tex[2].src);
GLSL(vec2 chroma = vec2(texture(texture1, texcoord1).r,
texture(texture2, texcoord2).r);)
}
+ }
- const char *cscale = p->opts.scalers[1];
- if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED &&
- strcmp(cscale, "bilinear") != 0) {
- GLSLF("// chroma merging\n");
- GLSL(vec4 color = vec4(chroma.r, chroma.g, 0.0, 0.0);)
- if (1) { //p->plane_count > 2) {
- // For simplicity - and maybe also for performance - we merge
- // the chroma planes into one texture before scaling. So the
- // scaler doesn't need to deal with more than 1 source texture.
- int c_w = p->pass_tex[1].src.x1 - p->pass_tex[1].src.x0;
- int c_h = p->pass_tex[1].src.y1 - p->pass_tex[1].src.y0;
- finish_pass_fbo(p, &p->chroma_merge_fbo, c_w, c_h, 0);
- }
- GLSLF("// chroma scaling\n");
- pass_scale(p, 1, cscale, 1.0, p->image_w, p->image_h);
- GLSL(vec2 chroma = color.rg;)
- // Always force rendering to a FBO before main scaling, or we would
- // scale chroma incorrectly.
- *use_indirect = true;
-
- // What we'd really like to do is putting the output of the chroma
- // scaler on texture unit 1, and leave luma on unit 0 (alpha on 3).
- // But this obviously doesn't work, so here's an extremely shitty
- // hack. Keep in mind that the shader already uses tex unit 0, so
- // it can't be changed. alpha is missing too.
- struct src_tex prev = p->pass_tex[0];
- pass_set_image_textures(p, &p->image);
- p->pass_tex[1] = p->pass_tex[0];
- p->pass_tex[0] = prev;
- GLSL(color = vec4(texture(texture1, texcoord1).r, chroma, 0);)
- } else {
- GLSL(vec4 color = vec4(0.0, chroma, 0.0);)
- // These always use bilinear; either because the scaler is bilinear,
- // or because we use an indirect pass.
- GLSL(color.r = texture(texture0, texcoord0).r;)
- if (p->has_alpha && p->plane_count >= 4)
- GLSL(color.a = texture(texture3, texcoord3).r;)
- }
- } else {
- GLSL(vec4 color