summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-07-17 05:23:55 +0200
committerNiklas Haas <git@haasn.xyz>2017-07-17 05:29:16 +0200
commit51014e1c03dc4b3dc8b038d29bdc4d0c1998651b (patch)
tree69b5ba98951014471d9791503daee4676b0cfa9d /video
parentc1dcf74458b6453dfe5ec9096101773c7a4ceadf (diff)
downloadmpv-51014e1c03dc4b3dc8b038d29bdc4d0c1998651b.tar.bz2
mpv-51014e1c03dc4b3dc8b038d29bdc4d0c1998651b.tar.xz
vo_opengl: avoid constant divisions
These are apparently expensive on some drivers which are not smart enough to turn x/42 into x*1.0/42. So, do it for them. My great test framework says it's okay
Diffstat (limited to 'video')
-rw-r--r--video/out/opengl/video.c19
-rw-r--r--video/out/opengl/video_shaders.c63
2 files changed, 42 insertions, 40 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 50e70ce08f..742c36850a 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -2074,12 +2074,13 @@ static void pass_convert_yuv(struct gl_video *p)
// assumes everything uses the BT.2020 12-bit gamma function, since the
// difference between 10 and 12-bit is negligible for anything other
// than 12-bit content.
- GLSL(color.rgb = mix(color.rgb / vec3(4.5),
- pow((color.rgb + vec3(0.0993))/vec3(1.0993), vec3(1.0/0.45)),
+ GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5),
+ pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993),
+ vec3(1.0/0.45)),
lessThanEqual(vec3(0.08145), color.rgb));)
// Calculate the green channel from the expanded RYcB
// The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B
- GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)/0.6780;)
+ GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;)
// Recompress to receive the R'G'B' result, same as other systems
GLSL(color.rgb = mix(color.rgb * vec3(4.5),
vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993),
@@ -2178,7 +2179,7 @@ static void pass_scale_main(struct gl_video *p)
// values at 1 and 0, and then scale/shift them, respectively.
sig_offset = 1.0/(1+expf(sig_slope * sig_center));
sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset;
- GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0)/%f;\n",
+ GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n",
sig_center, sig_scale, sig_offset, sig_slope);
pass_opt_hook_point(p, "SIGMOID", NULL);
}
@@ -2205,7 +2206,7 @@ static void pass_scale_main(struct gl_video *p)
GLSLF("// scaler post-conversion\n");
if (use_sigmoid) {
// Inverse of the transformation above
- GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) / %f;\n",
+ GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n",
sig_slope, sig_center, sig_offset, sig_scale);
}
}
@@ -2377,7 +2378,7 @@ static void pass_dither(struct gl_video *p)
gl_sc_uniform_tex(p->sc, "dither", GL_TEXTURE_2D, p->dither_texture);
- GLSLF("vec2 dither_pos = gl_FragCoord.xy / %d.0;\n", p->dither_size);
+ GLSLF("vec2 dither_pos = gl_FragCoord.xy * 1.0/%d.0;\n", p->dither_size);
if (p->opts.temporal_dither) {
int phase = (p->frames_rendered / p->opts.temporal_dither_period) % 8u;
@@ -2392,7 +2393,7 @@ static void pass_dither(struct gl_video *p)
}
GLSL(float dither_value = texture(dither, dither_pos).r;)
- GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) / %d.0;\n",
+ GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) * 1.0/%d.0;\n",
dither_quantization, p->dither_size * p->dither_size,
dither_quantization);
}
@@ -2590,7 +2591,7 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo)
if (p->opts.alpha_mode == ALPHA_BLEND_TILES) {
// Draw checkerboard pattern to indicate transparency
GLSLF("// transparency checkerboard\n");
- GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy / 32.0), vec2(0.5));)
+ GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy * 1.0/32.0), vec2(0.5));)
GLSL(vec3 background = vec3(tile.x == tile.y ? 1.0 : 0.75);)
GLSL(color.rgb = mix(background, color.rgb, color.a);)
} else if (p->opts.alpha_mode == ALPHA_BLEND) {
@@ -3020,7 +3021,7 @@ static void reinterleave_vdpau(struct gl_video *p, struct gl_hwdec_frame *frame)
});
}
- GLSLF("color = fract(gl_FragCoord.y / 2) < 0.5\n");
+ GLSLF("color = fract(gl_FragCoord.y * 0.5) < 0.5\n");
GLSLF(" ? texture(texture%d, texcoord%d)\n", ids[0], ids[0]);
GLSLF(" : texture(texture%d, texcoord%d);", ids[1], ids[1]);
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 83b9f83143..e83973b4b8 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -121,19 +121,19 @@ static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler,
// Skip samples definitely outside the radius
if (dmax >= radius_cutoff)
return;
- GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
+ GLSLF("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y);
// Check for samples that might be skippable
bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
if (maybe_skippable)
- GLSLF("if (d < %f) {\n", radius_cutoff / radius);
+ GLSLF("if (d < %f) {\n", radius_cutoff);
// get the weight for this pixel
if (scaler->gl_target == GL_TEXTURE_1D) {
- GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
- scaler->lut_size);
+ GLSLF("w = texture1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n",
+ radius, scaler->lut_size);
} else {
- GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d, %d.0))).r;\n",
- scaler->lut_size);
+ GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n",
+ radius, scaler->lut_size);
}
GLSL(wsum += w;)
@@ -258,7 +258,7 @@ void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
GLSL(vec2 coeff = fcoord * output_size/size;)
float threshold = scaler->conf.kernel.params[0];
threshold = isnan(threshold) ? 0.0 : threshold;
- GLSLF("coeff = (coeff - %f) / %f;\n", threshold, 1.0 - 2 * threshold);
+ GLSLF("coeff = (coeff - %f) * 1.0/%f;\n", threshold, 1.0 - 2 * threshold);
GLSL(coeff = clamp(coeff, 0.0, 1.0);)
// Compute the right blend of colors
GLSL(color = texture(tex, pos + pt * (coeff - fcoord));)
@@ -309,7 +309,7 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
switch (trc) {
case MP_CSP_TRC_SRGB:
- GLSL(color.rgb = mix(color.rgb / vec3(12.92),
+ GLSL(color.rgb = mix(color.rgb * vec3(1.0/12.92),
pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)),
lessThan(vec3(0.04045), color.rgb));)
break;
@@ -326,7 +326,7 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
GLSL(color.rgb = pow(color.rgb, vec3(2.8));)
break;
case MP_CSP_TRC_PRO_PHOTO:
- GLSL(color.rgb = mix(color.rgb / vec3(16.0),
+ GLSL(color.rgb = mix(color.rgb * vec3(1.0/16.0),
pow(color.rgb, vec3(1.8)),
lessThan(vec3(0.03125), color.rgb));)
break;
@@ -342,27 +342,27 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
break;
case MP_CSP_TRC_HLG:
GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n"
- " exp((color.rgb - vec3(%f)) / vec3(%f)) + vec3(%f),\n"
+ " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n"
" lessThan(vec3(0.5), color.rgb));\n",
HLG_C, HLG_A, HLG_B);
break;
case MP_CSP_TRC_V_LOG:
- GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) / vec3(5.6), \n"
- " pow(vec3(10.0), (color.rgb - vec3(%f)) / vec3(%f)) \n"
- " - vec3(%f), \n"
- " lessThanEqual(vec3(0.181), color.rgb)); \n",
+ GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n"
+ " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
+ " - vec3(%f), \n"
+ " lessThanEqual(vec3(0.181), color.rgb)); \n",
VLOG_D, VLOG_C, VLOG_B);
break;
case MP_CSP_TRC_S_LOG1:
- GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) / vec3(%f))\n"
+ GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n"
" - vec3(%f);\n",
SLOG_C, SLOG_A, SLOG_B);
break;
case MP_CSP_TRC_S_LOG2:
- GLSLF("color.rgb = mix((color.rgb - vec3(%f)) / vec3(%f), \n"
- " (pow(vec3(10.0), (color.rgb - vec3(%f)) / vec3(%f)) \n"
- " - vec3(%f)) / vec3(%f), \n"
- " lessThanEqual(vec3(%f), color.rgb)); \n",
+ GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n"
+ " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
+ " - vec3(%f)) * vec3(1.0/%f), \n"
+ " lessThanEqual(vec3(%f), color.rgb)); \n",
SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, SLOG_Q);
break;
default:
@@ -370,7 +370,7 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
}
// Rescale to prevent clipping on non-float textures
- GLSLF("color.rgb /= vec3(%f);\n", mp_trc_nom_peak(trc));
+ GLSLF("color.rgb *= vec3(1.0/%f);\n", mp_trc_nom_peak(trc));
}
// Delinearize (compress), given a TRC as output. This corresponds to the
@@ -410,7 +410,7 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
lessThanEqual(vec3(0.001953), color.rgb));)
break;
case MP_CSP_TRC_PQ:
- GLSLF("color.rgb /= vec3(%f);\n", 10000 / MP_REF_WHITE);
+ GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE);
GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1);
GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n"
" / (vec3(1.0) + vec3(%f) * color.rgb);\n",
@@ -481,7 +481,7 @@ void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak)
abort();
}
- GLSLF("color.rgb /= vec3(%f);\n", peak);
+ GLSLF("color.rgb *= vec3(1.0/%f);\n", peak);
}
// Inverse of the function pass_ootf, for completeness' sake. Note that the
@@ -505,19 +505,20 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa
abort();
break;
case MP_CSP_LIGHT_SCENE_709_1886:
- GLSL(color.rgb = pow(color.rgb, vec3(1/2.4));)
- GLSL(color.rgb = mix(color.rgb / vec3(4.5),
- pow((color.rgb + vec3(0.0993)) / vec3(1.0993), vec3(1/0.45)),
+ GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
+ GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5),
+ pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993),
+ vec3(1/0.45)),
lessThan(vec3(0.08145), color.rgb));)
break;
case MP_CSP_LIGHT_SCENE_1_2:
- GLSL(color.rgb = pow(color.rgb, vec3(1/1.2));)
+ GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));)
break;
default:
abort();
}
- GLSLF("color.rgb /= vec3(%f);\n", peak);
+ GLSLF("color.rgb *= vec3(1.0/%f);\n", peak);
}
// Tone map from a known peak brightness to the range [0,1]
@@ -574,7 +575,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
case TONE_MAPPING_GAMMA: {
float gamma = isnan(param) ? 1.8 : param;
- GLSLF("luma = pow(luma / %f, %f);\n", ref_peak, 1.0/gamma);
+ GLSLF("luma = pow(luma * 1.0/%f, %f);\n", ref_peak, 1.0/gamma);
break;
}
@@ -668,9 +669,9 @@ void pass_color_map(struct gl_shader_cache *sc,
// update the state. Assumes the texture was hooked.
static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg)
{
- GLSLH(float mod289(float x) { return x - floor(x / 289.0) * 289.0; })
+ GLSLH(float mod289(float x) { return x - floor(x * 1.0/289.0) * 289.0; })
GLSLH(float permute(float x) { return mod289((34.0*x + 1.0) * x); })
- GLSLH(float rand(float x) { return fract(x / 41.0); })
+ GLSLH(float rand(float x) { return fract(x * 1.0/41.0); })
// Initialize the PRNG by hashing the position + a random uniform
GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);)
@@ -730,7 +731,7 @@ void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));)
// Return the (normalized) average
- GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])/4.0;)
+ GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])*0.25;)
GLSLHF("}\n");
// Sample the source pixel