From 584ab29c88d6c5ffa03666bffbbc93e4f0740f67 Mon Sep 17 00:00:00 2001 From: Philip Langdale Date: Sat, 11 Dec 2021 19:28:46 -0800 Subject: vo_gpu: opengl: some fixes to make compute shaders work with GLES It's supposed to work with GLES >= 3.1 but we had all sorts of bad assumptions in our version handling, and then our compute shaders turn out not to be GLSL-ES compliant. This change contains some necessary, but insufficient, tweaks to the shaders. Perhaps we'll make it actually work some day. --- video/out/gpu/error_diffusion.c | 4 ++-- video/out/gpu/shader_cache.c | 2 ++ video/out/gpu/video.c | 9 ++++++--- video/out/gpu/video_shaders.c | 10 +++++----- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/video/out/gpu/error_diffusion.c b/video/out/gpu/error_diffusion.c index 88c0903d35..c1ea542c86 100644 --- a/video/out/gpu/error_diffusion.c +++ b/video/out/gpu/error_diffusion.c @@ -110,7 +110,7 @@ void pass_error_diffusion(struct gl_shader_cache *sc, // Initialize the ring buffer. GLSL("for (int i = int(gl_LocalInvocationIndex); i < %d; i += %d) ", ring_buffer_size, block_size); - GLSL("err_rgb8[i] = 0;\n"); + GLSL("err_rgb8[i] = 0u;\n"); GLSL("for (int block_id = 0; block_id < %d; ++block_id) {\n", blocks); @@ -170,7 +170,7 @@ void pass_error_diffusion(struct gl_shader_cache *sc, "int((err_u32 >> %d) & 255u) - 128," "int( err_u32 & 255u) - 128" ") / %d.0;\n", dither_quant, bitshift_r, bitshift_g, uint8_mul); - GLSL("err_rgb8[idx] = 0;\n"); + GLSL("err_rgb8[idx] = 0u;\n"); // Write the dithered pixel. GLSL("vec3 dithered = round(pix);\n"); diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c index c19a7e8323..c2b701382c 100644 --- a/video/out/gpu/shader_cache.c +++ b/video/out/gpu/shader_cache.c @@ -783,8 +783,10 @@ static void gl_sc_generate(struct gl_shader_cache *sc, if (glsl_es) { ADD(header, "#ifdef GL_FRAGMENT_PRECISION_HIGH\n"); ADD(header, "precision highp float;\n"); + ADD(header, "precision highp image2D;\n"); ADD(header, "#else\n"); ADD(header, "precision mediump float;\n"); + ADD(header, "precision mediump image2D;\n"); ADD(header, "#endif\n"); ADD(header, "precision mediump sampler2D;\n"); diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index add7985f78..035619891b 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -2698,9 +2698,12 @@ static void pass_dither(struct gl_video *p) struct image img = image_wrap(p->error_diffusion_tex[0], PLANE_RGB, p->components); - // 1024 is minimal required number of invocation allowed in single - // work group in OpenGL. Use it for maximal performance. - int block_size = MPMIN(1024, o_h); + // Ensure the block size doesn't exceed the minimum defined by the + // specification (1024 in desktop GL, 128 in GLES). + // TODO: Look up the actual maximum block size for the + // implementation using: + // glGetIntegerv(MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &value); + int block_size = MPMIN(p->ra->glsl_es ? 128 : 1024, o_h); pass_describe(p, "dither=error-diffusion (kernel=%s, depth=%d)", kernel->name, dst_depth); diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index d5bc678145..df379778fc 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -609,7 +609,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc, // pixel using shared memory first GLSLH(shared int wg_sum;) GLSLH(shared uint wg_max;) - GLSL(wg_sum = 0; wg_max = 0;) + GLSL(wg_sum = 0; wg_max = 0u;) GLSL(barrier();) GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min); GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale); @@ -618,7 +618,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc, // Have one thread per work group update the global atomics GLSL(memoryBarrierShared();) GLSL(barrier();) - GLSL(if (gl_LocalInvocationIndex == 0) {) + GLSL(if (gl_LocalInvocationIndex == 0u) {) GLSL( int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);) GLSL( atomicAdd(frame_sum, wg_avg);) GLSL( atomicMax(frame_max, wg_max);) @@ -628,8 +628,8 @@ static void hdr_update_peak(struct gl_shader_cache *sc, // Finally, to update the global state, we increment a counter per dispatch GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;) - GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {) - GLSL( counter = 0;) + GLSL(if (gl_LocalInvocationIndex == 0u && atomicAdd(counter, 1u) == num_wg - 1u) {) + GLSL( counter = 0u;) GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);) GLSLF(" cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale); GLSL( cur.x = exp(cur.x);) @@ -650,7 +650,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc, GLSL( average = mix(average, cur, weight);) // Reset SSBO state for the next frame - GLSL( frame_sum = 0; frame_max = 0;) + GLSL( frame_sum = 0; frame_max = 0u;) GLSL( memoryBarrierBuffer();) GLSL(}) } -- cgit v1.2.3