summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
Diffstat (limited to 'video')
-rw-r--r--video/out/opengl/common.c5
-rw-r--r--video/out/opengl/common.h1
-rw-r--r--video/out/opengl/gl_headers.h5
-rw-r--r--video/out/opengl/utils.c60
-rw-r--r--video/out/opengl/utils.h2
-rw-r--r--video/out/opengl/video.c57
-rw-r--r--video/out/opengl/video.h4
-rw-r--r--video/out/opengl/video_shaders.c78
-rw-r--r--video/out/opengl/video_shaders.h3
9 files changed, 193 insertions, 22 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index b9536b6c59..9af21856ab 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -346,6 +346,11 @@ static const struct gl_functions gl_functions[] = {
},
{
.ver_core = 430,
+ .extension = "GL_ARB_shader_storage_buffer_object",
+ .provides = MPGL_CAP_SSBO,
+ },
+ {
+ .ver_core = 430,
.extension = "GL_ARB_compute_shader",
.functions = (const struct gl_function[]) {
DEF_FN(DispatchCompute),
diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h
index 40208c45e5..eec7806624 100644
--- a/video/out/opengl/common.h
+++ b/video/out/opengl/common.h
@@ -54,6 +54,7 @@ enum {
MPGL_CAP_EXT16 = (1 << 18), // GL_EXT_texture_norm16
MPGL_CAP_ARB_FLOAT = (1 << 19), // GL_ARB_texture_float
MPGL_CAP_EXT_CR_HFLOAT = (1 << 20), // GL_EXT_color_buffer_half_float
+ MPGL_CAP_SSBO = (1 << 21), // GL_ARB_shader_storage_buffer_object
MPGL_CAP_SW = (1 << 30), // indirect or sw renderer
};
diff --git a/video/out/opengl/gl_headers.h b/video/out/opengl/gl_headers.h
index 8f201bb64c..a55749cbb7 100644
--- a/video/out/opengl/gl_headers.h
+++ b/video/out/opengl/gl_headers.h
@@ -83,6 +83,11 @@
#define GL_COMPUTE_SHADER 0x91B9
+// -- GL 4.3 or GL_ARB_shader_storage_buffer_object
+
+#define GL_SHADER_STORAGE_BUFFER 0x90D2
+#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000
+
// --- GL_NV_vdpau_interop
#define GLvdpauSurfaceNV GLintptr
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index f1e0081b10..afbd6f65af 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -473,6 +473,13 @@ struct sc_uniform {
GLenum img_iformat;
};
+struct sc_buffer {
+ char *name;
+ char *format;
+ GLuint binding;
+ GLuint ssbo;
+};
+
struct sc_cached_uniform {
GLint loc;
union uniform_val v;
@@ -503,6 +510,7 @@ struct gl_shader_cache {
bstr text;
int next_texture_unit;
int next_image_unit;
+ int next_buffer_binding;
struct gl_vao *vao; // deprecated
struct sc_entry *entries;
@@ -512,6 +520,8 @@ struct gl_shader_cache {
struct sc_uniform *uniforms;
int num_uniforms;
+ struct sc_buffer *buffers;
+ int num_buffers;
const struct gl_vao_entry *vertex_entries;
size_t vertex_size;
@@ -562,6 +572,11 @@ void gl_sc_reset(struct gl_shader_cache *sc)
}
}
gl->ActiveTexture(GL_TEXTURE0);
+
+ for (int n = 0; n < sc->num_buffers; n++) {
+ struct sc_buffer *b = &sc->buffers[n];
+ gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, b->binding, 0);
+ }
}
sc->prelude_text.len = 0;
@@ -570,8 +585,14 @@ void gl_sc_reset(struct gl_shader_cache *sc)
for (int n = 0; n < sc->num_uniforms; n++)
talloc_free(sc->uniforms[n].name);
sc->num_uniforms = 0;
+ for (int n = 0; n < sc->num_buffers; n++) {
+ talloc_free(sc->buffers[n].name);
+ talloc_free(sc->buffers[n].format);
+ }
+ sc->num_buffers = 0;
sc->next_texture_unit = 1; // not 0, as 0 is "free for use"
sc->next_image_unit = 1;
+ sc->next_buffer_binding = 1;
sc->vertex_entries = NULL;
sc->vertex_size = 0;
sc->current_shader = NULL;
@@ -680,6 +701,21 @@ static struct sc_uniform *find_uniform(struct gl_shader_cache *sc,
return &sc->uniforms[sc->num_uniforms - 1];
}
+static struct sc_buffer *find_buffer(struct gl_shader_cache *sc,
+ const char *name)
+{
+ for (int n = 0; n < sc->num_buffers; n++) {
+ if (strcmp(sc->buffers[n].name, name) == 0)
+ return &sc->buffers[n];
+ }
+ // not found -> add it
+ struct sc_buffer new = {
+ .name = talloc_strdup(NULL, name),
+ };
+ MP_TARRAY_APPEND(sc, sc->buffers, sc->num_buffers, new);
+ return &sc->buffers[sc->num_buffers - 1];
+}
+
const char *mp_sampler_type(GLenum texture_target)
{
switch (texture_target) {
@@ -738,6 +774,20 @@ void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint textur
u->img_iformat = iformat;
}
+void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo,
+ char *format, ...)
+{
+ struct sc_buffer *b = find_buffer(sc, name);
+ b->binding = sc->next_buffer_binding++;
+ b->ssbo = ssbo;
+ b->format = format;
+
+ va_list ap;
+ va_start(ap, format);
+ b->format = ta_vasprintf(sc, format, ap);
+ va_end(ap);
+}
+
void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f)
{
struct sc_uniform *u = find_uniform(sc, name);
@@ -1217,6 +1267,12 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type)
ADD(comp, "uniform %s %s;\n", u->glsl_type, u->name);
}
+ for (int n = 0; n < sc->num_buffers; n++) {
+ struct sc_buffer *b = &sc->buffers[n];
+ ADD(comp, "layout(std430, binding=%d) buffer %s { %s };\n",
+ b->binding, b->name, b->format);
+ }
+
ADD_BSTR(comp, sc->prelude_text);
ADD_BSTR(comp, sc->header_text);
@@ -1271,6 +1327,10 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type)
for (int n = 0; n < sc->num_uniforms; n++)
update_uniform(gl, entry, &sc->uniforms[n], n);
+ for (int n = 0; n < sc->num_buffers; n++) {
+ struct sc_buffer *b = &sc->buffers[n];
+ gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, b->binding, b->ssbo);
+ }
gl->ActiveTexture(GL_TEXTURE0);
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 3dc7e5d72d..f2c405fa9a 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -149,6 +149,8 @@ void gl_sc_uniform_tex(struct gl_shader_cache *sc, char *name, GLenum target,
void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture);
void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint texture,
GLuint iformat, GLenum access);
+void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo,
+ char *format, ...);
void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f);
void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint f);
void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]);
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index ab8f311191..76b9d829ab 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -236,9 +236,11 @@ struct gl_video {
struct fbotex integer_fbo[4];
struct fbotex indirect_fbo;
struct fbotex blend_subs_fbo;
+ struct fbotex screen_fbo;
struct fbotex output_fbo;
struct fbosurface surfaces[FBOSURFACES_MAX];
struct fbotex vdpau_deinterleave_fbo[2];
+ GLuint hdr_peak_ssbo;
int surface_idx;
int surface_now;
@@ -368,6 +370,7 @@ const struct m_sub_options gl_video_conf = {
{"hable", TONE_MAPPING_HABLE},
{"gamma", TONE_MAPPING_GAMMA},
{"linear", TONE_MAPPING_LINEAR})),
+ OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0),
OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0),
OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0),
OPT_FLAG("opengl-pbo", pbo, 0),
@@ -541,6 +544,7 @@ static void uninit_rendering(struct gl_video *p)
fbotex_uninit(&p->indirect_fbo);
fbotex_uninit(&p->blend_subs_fbo);
+ fbotex_uninit(&p->screen_fbo);
for (int n = 0; n < FBOSURFACES_MAX; n++)
fbotex_uninit(&p->surfaces[n].fbotex);
@@ -2358,6 +2362,8 @@ static void pass_scale_main(struct gl_video *p)
// by previous passes (i.e. linear scaling)
static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool osd)
{
+ GL *gl = p->gl;
+
// Figure out the target color space from the options, or auto-guess if
// none were set
struct mp_colorspace dst = {
@@ -2417,10 +2423,42 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
dst.gamma = MP_CSP_TRC_GAMMA22;
}
+ bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma);
+ if (detect_peak) {
+ pass_describe(p, "detect HDR peak");
+ compute_size_minimum(p, 8, 8); // 8x8 is good for performance
+
+ if (!p->hdr_peak_ssbo) {
+ struct {
+ GLuint sig_peak_raw;
+ GLuint index;
+ GLuint frame_max[PEAK_DETECT_FRAMES+1];
+ } peak_ssbo = {0};
+
+ // Prefill with safe values
+ int safe = MP_REF_WHITE * mp_trc_nom_peak(p->image_params.color.gamma);
+ peak_ssbo.sig_peak_raw = PEAK_DETECT_FRAMES * safe;
+ for (int i = 0; i < PEAK_DETECT_FRAMES+1; i++)
+ peak_ssbo.frame_max[i] = safe;
+
+ gl->GenBuffers(1, &p->hdr_peak_ssbo);
+ gl->BindBuffer(GL_SHADER_STORAGE_BUFFER, p->hdr_peak_ssbo);
+ gl->BufferData(GL_SHADER_STORAGE_BUFFER, sizeof(peak_ssbo),
+ &peak_ssbo, GL_STREAM_COPY);
+ gl->BindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
+ }
+
+ gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
+ "uint sig_peak_raw;"
+ "uint index;"
+ "uint frame_max[%d];", PEAK_DETECT_FRAMES + 1
+ );
+ }
+
// Adapt from src to dst as necessary
pass_color_map(p->sc, src, dst, p->opts.hdr_tone_mapping,
p->opts.tone_mapping_param, p->opts.tone_mapping_desat,
- p->use_linear && !osd);
+ detect_peak, p->use_linear && !osd);
if (p->use_lut_3d) {
gl_sc_uniform_tex(p->sc, "lut_3d", GL_TEXTURE_3D, p->lut_3d_texture);
@@ -2710,6 +2748,17 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo)
pass_colormanage(p, p->image_params.color, false);
+ // Since finish_pass_direct doesn't work with compute shaders, and neither
+ // does the checkerboard/dither code, we may need an indirection via
+ // p->screen_fbo here.
+ if (p->compute_w > 0 && p->compute_h > 0) {
+ int o_w = p->dst_rect.x1 - p->dst_rect.x0,
+ o_h = p->dst_rect.y1 - p->dst_rect.y0;
+ finish_pass_fbo(p, &p->screen_fbo, o_w, o_h, FBOTEX_FUZZY);
+ struct img_tex tmp = img_tex_fbo(&p->screen_fbo, PLANE_RGB, p->components);
+ copy_img_tex(p, &(int){0}, tmp);
+ }
+
if (p->has_alpha){
if (p->opts.alpha_mode == ALPHA_BLEND_TILES) {
// Draw checkerboard pattern to indicate transparency
@@ -3326,6 +3375,7 @@ static void check_gl_features(struct gl_video *p)
bool have_mglsl = gl->glsl_version >= 130; // modern GLSL (1st class arrays etc.)
bool have_texrg = gl->mpgl_caps & MPGL_CAP_TEX_RG;
bool have_tex16 = !gl->es || (gl->mpgl_caps & MPGL_CAP_EXT16);
+ bool have_compute = gl->glsl_version >= 430; // easiest way to ensure all
const GLint auto_fbo_fmts[] = {GL_RGBA16, GL_RGBA16F, GL_RGB10_A2,
GL_RGBA8, 0};
@@ -3436,6 +3486,10 @@ static void check_gl_features(struct gl_video *p)
p->opts.deband = 0;
MP_WARN(p, "Disabling debanding (GLSL version too old).\n");
}
+ if (!have_compute && p->opts.compute_hdr_peak) {
+ p->opts.compute_hdr_peak = 0;
+ MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n");
+ }
}
static void init_gl(struct gl_video *p)
@@ -3471,6 +3525,7 @@ void gl_video_uninit(struct gl_video *p)
gl_sc_destroy(p->sc);
gl->DeleteTextures(1, &p->lut_3d_texture);
+ gl->DeleteBuffers(1, &p->hdr_peak_ssbo);
gl_timer_free(p->upload_timer);
gl_timer_free(p->blit_timer);
diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h
index f3608626e4..b19f6e099d 100644
--- a/video/out/opengl/video.h
+++ b/video/out/opengl/video.h
@@ -99,6 +99,9 @@ enum tone_mapping {
TONE_MAPPING_LINEAR,
};
+// How many frames to average over for HDR peak detection
+#define PEAK_DETECT_FRAMES 100
+
struct gl_video_opts {
int dumb_mode;
struct scaler_config scaler[4];
@@ -109,6 +112,7 @@ struct gl_video_opts {
int target_trc;
int target_brightness;
int hdr_tone_mapping;
+ int compute_hdr_peak;
float tone_mapping_param;
float tone_mapping_desat;
int linear_scaling;
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 3381d532b6..a7ecf1a448 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -521,7 +521,8 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa
GLSLF("color.rgb *= vec3(1.0/%f);\n", peak);
}
-// Tone map from a known peak brightness to the range [0,1]
+// Tone map from a known peak brightness to the range [0,1]. If ref_peak
+// is 0, we will use peak detection instead
static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
enum tone_mapping algo, float param, float desat)
{
@@ -531,8 +532,42 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
GLSL(float luma = dot(src_luma, color.rgb);)
GLSL(float luma_orig = luma;)
+ if (!ref_peak) {
+ // For performance, we want to do as few atomic operations on global
+ // memory as possible, so use an atomic in shmem for the work group.
+ // We also want slightly more stable values, so use the group average
+ // instead of the group max
+ GLSLHF("shared uint group_sum = 0;\n");
+ GLSLF("atomicAdd(group_sum, uint(luma * %f));\n", MP_REF_WHITE);
+
+ // Have one thread in each work group update the frame maximum
+ GLSL(memoryBarrierBuffer();)
+ GLSL(barrier();)
+ GLSL(if (gl_LocalInvocationIndex == 0))
+ GLSL(atomicMax(frame_max[index], group_sum /
+ (gl_WorkGroupSize.x * gl_WorkGroupSize.y));)
+
+ // Finally, have one thread per invocation update the total maximum
+ // and advance the index
+ GLSL(memoryBarrierBuffer();)
+ GLSL(barrier();)
+ GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation
+ GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
+ GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n");
+ GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE);
+ GLSL(index = next;)
+ GLSL(})
+
+ GLSL(memoryBarrierBuffer();)
+ GLSL(barrier();)
+ GLSLF("const float sig_peak = 1.0/%f * float(sig_peak_raw);\n",
+ MP_REF_WHITE * PEAK_DETECT_FRAMES);
+ } else {
+ GLSLHF("const float sig_peak = %f;\n", ref_peak);
+ }
+
// Desaturate the color using a coefficient dependent on the brightness
- if (desat > 0 && ref_peak > desat) {
+ if (desat > 0) {
GLSLF("float overbright = max(luma - %f, 1e-6) / max(luma, 1e-6);\n", desat);
GLSL(color.rgb = mix(color.rgb, vec3(luma), overbright);)
}
@@ -542,23 +577,23 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
GLSLF("luma = clamp(%f * luma, 0.0, 1.0);\n", isnan(param) ? 1.0 : param);
break;
- case TONE_MAPPING_MOBIUS: {
- float j = isnan(param) ? 0.3 : param;
- // solve for M(j) = j; M(ref_peak) = 1.0; M'(j) = 1.0
+ case TONE_MAPPING_MOBIUS:
+ GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param);
+ // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0
// where M(x) = scale * (x+a)/(x+b)
- float a = -j*j * (ref_peak - 1) / (j*j - 2*j + ref_peak),
- b = (j*j - 2*j*ref_peak + ref_peak) / (ref_peak - 1);
-
- GLSLF("luma = mix(%f * (luma + %f) / (luma + %f), luma, luma <= %f);\n",
- (b*b + 2*b*j + j*j) / (b - a), a, b, j);
+ GLSLF("const float a = -j*j * (sig_peak - 1) / (j*j - 2*j + sig_peak);\n");
+ GLSLF("const float b = (j*j - 2*j*sig_peak + sig_peak) / "
+ "max(1e-6, sig_peak - 1);\n");
+ GLSLF("const float scale = (b*b + 2*b*j + j*j) / (b-a);\n");
+ GLSL(luma = mix(luma, scale * (luma + a) / (luma + b), luma > j);)
break;
- }
case TONE_MAPPING_REINHARD: {
float contrast = isnan(param) ? 0.5 : param,
offset = (1.0 - contrast) / contrast;
GLSLF("luma = luma / (luma + %f);\n", offset);
- GLSLF("luma *= %f;\n", (ref_peak + offset) / ref_peak);
+ GLSLF("const float lumascale = (sig_peak + %f) / sig_peak;\n", offset);
+ GLSL(luma *= lumascale;)
break;
}
@@ -568,20 +603,19 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n",
A, C*B, D*E, A, B, D*F, E/F);
GLSLHF("}\n");
-
- GLSLF("luma = hable(luma) / hable(%f);\n", ref_peak);
+ GLSL(luma = hable(luma) / hable(sig_peak);)
break;
}
case TONE_MAPPING_GAMMA: {
float gamma = isnan(param) ? 1.8 : param;
- GLSLF("luma = pow(luma * 1.0/%f, %f);\n", ref_peak, 1.0/gamma);
+ GLSLF("luma = pow(luma / sig_peak, %f);\n", 1.0/gamma);
break;
}
case TONE_MAPPING_LINEAR: {
float coeff = isnan(param) ? 1.0 : param;
- GLSLF("luma = %f * luma;\n", coeff / ref_peak);
+ GLSLF("luma = %f / sig_peak * luma;\n", coeff);
break;
}
@@ -596,11 +630,15 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
// Map colors from one source space to another. These source spaces must be
// known (i.e. not MP_CSP_*_AUTO), as this function won't perform any
// auto-guessing. If is_linear is true, we assume the input has already been
-// linearized (e.g. for linear-scaling)
+// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will
+// detect the peak instead of relying on metadata. Note that this requires
+// the caller to have already bound the appropriate SSBO and set up the
+// compute shader metadata
void pass_color_map(struct gl_shader_cache *sc,
struct mp_colorspace src, struct mp_colorspace dst,
enum tone_mapping algo, float tone_mapping_param,
- float tone_mapping_desat, bool is_linear)
+ float tone_mapping_desat, bool detect_peak,
+ bool is_linear)
{
GLSLF("// color mapping\n");
@@ -643,8 +681,8 @@ void pass_color_map(struct gl_shader_cache *sc,
// Tone map to prevent clipping when the source signal peak exceeds the
// encodable range
if (src.sig_peak > dst_range) {
- pass_tone_map(sc, src.sig_peak / dst_range, algo, tone_mapping_param,
- tone_mapping_desat);
+ float ref_peak = detect_peak ? 0 : src.sig_peak / dst_range;
+ pass_tone_map(sc, ref_peak, algo, tone_mapping_param, tone_mapping_desat);
}
// Adapt to the right colorspace if necessary
diff --git a/video/out/opengl/video_shaders.h b/video/out/opengl/video_shaders.h
index b0b8b4214e..e0594f28f3 100644
--- a/video/out/opengl/video_shaders.h
+++ b/video/out/opengl/video_shaders.h
@@ -44,7 +44,8 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa
void pass_color_map(struct gl_shader_cache *sc,
struct mp_colorspace src, struct mp_colorspace dst,
enum tone_mapping algo, float tone_mapping_param,
- float tone_mapping_desat, bool is_linear);
+ float tone_mapping_desat, bool use_detected_peak,
+ bool is_linear);
void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
AVLFG *lfg);