From 51014e1c03dc4b3dc8b038d29bdc4d0c1998651b Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Mon, 17 Jul 2017 05:23:55 +0200 Subject: vo_opengl: avoid constant divisions These are apparently expensive on some drivers which are not smart enough to turn x/42 into x*1.0/42. So, do it for them. My great test framework says it's okay --- video/out/opengl/video.c | 19 ++++++------ video/out/opengl/video_shaders.c | 63 ++++++++++++++++++++-------------------- 2 files changed, 42 insertions(+), 40 deletions(-) (limited to 'video') diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index 50e70ce08f..742c36850a 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -2074,12 +2074,13 @@ static void pass_convert_yuv(struct gl_video *p) // assumes everything uses the BT.2020 12-bit gamma function, since the // difference between 10 and 12-bit is negligible for anything other // than 12-bit content. - GLSL(color.rgb = mix(color.rgb / vec3(4.5), - pow((color.rgb + vec3(0.0993))/vec3(1.0993), vec3(1.0/0.45)), + GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5), + pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), + vec3(1.0/0.45)), lessThanEqual(vec3(0.08145), color.rgb));) // Calculate the green channel from the expanded RYcB // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B - GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)/0.6780;) + GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;) // Recompress to receive the R'G'B' result, same as other systems GLSL(color.rgb = mix(color.rgb * vec3(4.5), vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), @@ -2178,7 +2179,7 @@ static void pass_scale_main(struct gl_video *p) // values at 1 and 0, and then scale/shift them, respectively. sig_offset = 1.0/(1+expf(sig_slope * sig_center)); sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset; - GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0)/%f;\n", + GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n", sig_center, sig_scale, sig_offset, sig_slope); pass_opt_hook_point(p, "SIGMOID", NULL); } @@ -2205,7 +2206,7 @@ static void pass_scale_main(struct gl_video *p) GLSLF("// scaler post-conversion\n"); if (use_sigmoid) { // Inverse of the transformation above - GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) / %f;\n", + GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n", sig_slope, sig_center, sig_offset, sig_scale); } } @@ -2377,7 +2378,7 @@ static void pass_dither(struct gl_video *p) gl_sc_uniform_tex(p->sc, "dither", GL_TEXTURE_2D, p->dither_texture); - GLSLF("vec2 dither_pos = gl_FragCoord.xy / %d.0;\n", p->dither_size); + GLSLF("vec2 dither_pos = gl_FragCoord.xy * 1.0/%d.0;\n", p->dither_size); if (p->opts.temporal_dither) { int phase = (p->frames_rendered / p->opts.temporal_dither_period) % 8u; @@ -2392,7 +2393,7 @@ static void pass_dither(struct gl_video *p) } GLSL(float dither_value = texture(dither, dither_pos).r;) - GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) / %d.0;\n", + GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) * 1.0/%d.0;\n", dither_quantization, p->dither_size * p->dither_size, dither_quantization); } @@ -2590,7 +2591,7 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo) if (p->opts.alpha_mode == ALPHA_BLEND_TILES) { // Draw checkerboard pattern to indicate transparency GLSLF("// transparency checkerboard\n"); - GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy / 32.0), vec2(0.5));) + GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy * 1.0/32.0), vec2(0.5));) GLSL(vec3 background = vec3(tile.x == tile.y ? 1.0 : 0.75);) GLSL(color.rgb = mix(background, color.rgb, color.a);) } else if (p->opts.alpha_mode == ALPHA_BLEND) { @@ -3020,7 +3021,7 @@ static void reinterleave_vdpau(struct gl_video *p, struct gl_hwdec_frame *frame) }); } - GLSLF("color = fract(gl_FragCoord.y / 2) < 0.5\n"); + GLSLF("color = fract(gl_FragCoord.y * 0.5) < 0.5\n"); GLSLF(" ? texture(texture%d, texcoord%d)\n", ids[0], ids[0]); GLSLF(" : texture(texture%d, texcoord%d);", ids[1], ids[1]); diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c index 83b9f83143..e83973b4b8 100644 --- a/video/out/opengl/video_shaders.c +++ b/video/out/opengl/video_shaders.c @@ -121,19 +121,19 @@ static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, // Skip samples definitely outside the radius if (dmax >= radius_cutoff) return; - GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius); + GLSLF("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y); // Check for samples that might be skippable bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2; if (maybe_skippable) - GLSLF("if (d < %f) {\n", radius_cutoff / radius); + GLSLF("if (d < %f) {\n", radius_cutoff); // get the weight for this pixel if (scaler->gl_target == GL_TEXTURE_1D) { - GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n", - scaler->lut_size); + GLSLF("w = texture1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n", + radius, scaler->lut_size); } else { - GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d, %d.0))).r;\n", - scaler->lut_size); + GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n", + radius, scaler->lut_size); } GLSL(wsum += w;) @@ -258,7 +258,7 @@ void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, GLSL(vec2 coeff = fcoord * output_size/size;) float threshold = scaler->conf.kernel.params[0]; threshold = isnan(threshold) ? 0.0 : threshold; - GLSLF("coeff = (coeff - %f) / %f;\n", threshold, 1.0 - 2 * threshold); + GLSLF("coeff = (coeff - %f) * 1.0/%f;\n", threshold, 1.0 - 2 * threshold); GLSL(coeff = clamp(coeff, 0.0, 1.0);) // Compute the right blend of colors GLSL(color = texture(tex, pos + pt * (coeff - fcoord));) @@ -309,7 +309,7 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) switch (trc) { case MP_CSP_TRC_SRGB: - GLSL(color.rgb = mix(color.rgb / vec3(12.92), + GLSL(color.rgb = mix(color.rgb * vec3(1.0/12.92), pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), lessThan(vec3(0.04045), color.rgb));) break; @@ -326,7 +326,7 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) GLSL(color.rgb = pow(color.rgb, vec3(2.8));) break; case MP_CSP_TRC_PRO_PHOTO: - GLSL(color.rgb = mix(color.rgb / vec3(16.0), + GLSL(color.rgb = mix(color.rgb * vec3(1.0/16.0), pow(color.rgb, vec3(1.8)), lessThan(vec3(0.03125), color.rgb));) break; @@ -342,27 +342,27 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) break; case MP_CSP_TRC_HLG: GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n" - " exp((color.rgb - vec3(%f)) / vec3(%f)) + vec3(%f),\n" + " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n" " lessThan(vec3(0.5), color.rgb));\n", HLG_C, HLG_A, HLG_B); break; case MP_CSP_TRC_V_LOG: - GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) / vec3(5.6), \n" - " pow(vec3(10.0), (color.rgb - vec3(%f)) / vec3(%f)) \n" - " - vec3(%f), \n" - " lessThanEqual(vec3(0.181), color.rgb)); \n", + GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n" + " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" + " - vec3(%f), \n" + " lessThanEqual(vec3(0.181), color.rgb)); \n", VLOG_D, VLOG_C, VLOG_B); break; case MP_CSP_TRC_S_LOG1: - GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) / vec3(%f))\n" + GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n" " - vec3(%f);\n", SLOG_C, SLOG_A, SLOG_B); break; case MP_CSP_TRC_S_LOG2: - GLSLF("color.rgb = mix((color.rgb - vec3(%f)) / vec3(%f), \n" - " (pow(vec3(10.0), (color.rgb - vec3(%f)) / vec3(%f)) \n" - " - vec3(%f)) / vec3(%f), \n" - " lessThanEqual(vec3(%f), color.rgb)); \n", + GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n" + " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" + " - vec3(%f)) * vec3(1.0/%f), \n" + " lessThanEqual(vec3(%f), color.rgb)); \n", SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, SLOG_Q); break; default: @@ -370,7 +370,7 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) } // Rescale to prevent clipping on non-float textures - GLSLF("color.rgb /= vec3(%f);\n", mp_trc_nom_peak(trc)); + GLSLF("color.rgb *= vec3(1.0/%f);\n", mp_trc_nom_peak(trc)); } // Delinearize (compress), given a TRC as output. This corresponds to the @@ -410,7 +410,7 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) lessThanEqual(vec3(0.001953), color.rgb));) break; case MP_CSP_TRC_PQ: - GLSLF("color.rgb /= vec3(%f);\n", 10000 / MP_REF_WHITE); + GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE); GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1); GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" " / (vec3(1.0) + vec3(%f) * color.rgb);\n", @@ -481,7 +481,7 @@ void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak) abort(); } - GLSLF("color.rgb /= vec3(%f);\n", peak); + GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); } // Inverse of the function pass_ootf, for completeness' sake. Note that the @@ -505,19 +505,20 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa abort(); break; case MP_CSP_LIGHT_SCENE_709_1886: - GLSL(color.rgb = pow(color.rgb, vec3(1/2.4));) - GLSL(color.rgb = mix(color.rgb / vec3(4.5), - pow((color.rgb + vec3(0.0993)) / vec3(1.0993), vec3(1/0.45)), + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) + GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5), + pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), + vec3(1/0.45)), lessThan(vec3(0.08145), color.rgb));) break; case MP_CSP_LIGHT_SCENE_1_2: - GLSL(color.rgb = pow(color.rgb, vec3(1/1.2));) + GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));) break; default: abort(); } - GLSLF("color.rgb /= vec3(%f);\n", peak); + GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); } // Tone map from a known peak brightness to the range [0,1] @@ -574,7 +575,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, case TONE_MAPPING_GAMMA: { float gamma = isnan(param) ? 1.8 : param; - GLSLF("luma = pow(luma / %f, %f);\n", ref_peak, 1.0/gamma); + GLSLF("luma = pow(luma * 1.0/%f, %f);\n", ref_peak, 1.0/gamma); break; } @@ -668,9 +669,9 @@ void pass_color_map(struct gl_shader_cache *sc, // update the state. Assumes the texture was hooked. static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) { - GLSLH(float mod289(float x) { return x - floor(x / 289.0) * 289.0; }) + GLSLH(float mod289(float x) { return x - floor(x * 1.0/289.0) * 289.0; }) GLSLH(float permute(float x) { return mod289((34.0*x + 1.0) * x); }) - GLSLH(float rand(float x) { return fract(x / 41.0); }) + GLSLH(float rand(float x) { return fract(x * 1.0/41.0); }) // Initialize the PRNG by hashing the position + a random uniform GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);) @@ -730,7 +731,7 @@ void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));) // Return the (normalized) average - GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])/4.0;) + GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])*0.25;) GLSLHF("}\n"); // Sample the source pixel -- cgit v1.2.3