summaryrefslogtreecommitdiffstats
path: root/video/out/gpu/error_diffusion.c
diff options
context:
space:
mode:
Diffstat (limited to 'video/out/gpu/error_diffusion.c')
-rw-r--r--video/out/gpu/error_diffusion.c288
1 files changed, 288 insertions, 0 deletions
diff --git a/video/out/gpu/error_diffusion.c b/video/out/gpu/error_diffusion.c
new file mode 100644
index 0000000000..2bcd2084dd
--- /dev/null
+++ b/video/out/gpu/error_diffusion.c
@@ -0,0 +1,288 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+
+#include "error_diffusion.h"
+
+#include "common/common.h"
+
+#define GLSL(...) gl_sc_addf(sc, __VA_ARGS__)
+#define GLSLH(...) gl_sc_haddf(sc, __VA_ARGS__)
+
+// After a (y, x) -> (y, x + y * shift) mapping, find the right most column that
+// will be affected by the current column.
+static int compute_rightmost_shifted_column(const struct error_diffusion_kernel *k)
+{
+ int ret = 0;
+ for (int y = 0; y <= EF_MAX_DELTA_Y; y++) {
+ for (int x = EF_MIN_DELTA_X; x <= EF_MAX_DELTA_X; x++) {
+ if (k->pattern[y][x - EF_MIN_DELTA_X] != 0) {
+ int shifted_x = x + y * k->shift;
+
+ // The shift mapping guarantees current column (or left of it)
+ // won't be affected by error diffusion.
+ assert(shifted_x > 0);
+
+ ret = MPMAX(ret, shifted_x);
+ }
+ }
+ }
+ return ret;
+}
+
+const struct error_diffusion_kernel *mp_find_error_diffusion_kernel(const char *name)
+{
+ if (!name)
+ return NULL;
+ for (const struct error_diffusion_kernel *k = mp_error_diffusion_kernels;
+ k->name;
+ k++) {
+ if (strcmp(k->name, name) == 0)
+ return k;
+ }
+ return NULL;
+}
+
+int mp_ef_compute_shared_memory_size(const struct error_diffusion_kernel *k,
+ int height)
+{
+ // We add EF_MAX_DELTA_Y empty lines on the bottom to handle errors
+ // propagated out from bottom side.
+ int rows = height + EF_MAX_DELTA_Y;
+ int shifted_columns = compute_rightmost_shifted_column(k) + 1;
+
+ // The shared memory is an array of size rows*shifted_columns. Each element
+ // is three int, for each RGB component.
+ return rows * shifted_columns * 3 * 4;
+}
+
+void pass_error_diffusion(struct gl_shader_cache *sc,
+ const struct error_diffusion_kernel *k,
+ int tex, int width, int height, int depth, int block_size)
+{
+ assert(block_size <= height);
+
+ // The parallel error diffusion works by applying the shift mapping first.
+ // Taking the Floyd and Steinberg algorithm for example. After applying
+ // the (y, x) -> (y, x + y * shift) mapping (with shift=2), all errors are
+ // propagated into the next few columns, which makes parallel processing on
+ // the same column possible.
+ //
+ // X 7/16 X 7/16
+ // 3/16 5/16 1/16 ==> 0 0 3/16 5/16 1/16
+
+ // Figuring out the size of rectangle containing all shifted pixels.
+ // The rectangle height is not changed.
+ int shifted_width = width + (height - 1) * k->shift;
+
+ // We process all pixels from the shifted rectangles column by column, with
+ // a single global work group of size |block_size|.
+ // Figuring out how many block are required to process all pixels. We need
+ // this explicitly to make the number of barrier() calls match.
+ int blocks = (height * shifted_width + block_size - 1) / block_size;
+
+ // If we figure out how many of the next columns will be affected while the
+ // current columns is being processed. We can store errors of only a few
+ // columns in the shared memory. Using a ring buffer will further save the
+ // cost while iterating to next column.
+ int ring_buffer_rows = height + EF_MAX_DELTA_Y;
+ int ring_buffer_columns = compute_rightmost_shifted_column(k) + 1;
+ int ring_buffer_size = ring_buffer_rows * ring_buffer_columns;
+
+ const char *rgb = "rgb";
+
+ // Defines the ring buffer in shared memory.
+ for (int comp = 0; comp < 3; comp++)
+ GLSLH("shared int err_%c[%d];\n", rgb[comp], ring_buffer_size);
+
+ // Initialize the ring buffer.
+ GLSL("for (int i = int(gl_LocalInvocationIndex); i < %d; i += %d) {\n",
+ ring_buffer_size, block_size);
+ for (int comp = 0; comp < 3; comp++)
+ GLSL("err_%c[i] = 0;\n", rgb[comp]);
+ GLSL("}\n");
+
+ GLSL("for (int block_id = 0; block_id < %d; ++block_id) {\n", blocks);
+
+ // Add barrier here to have previous block all processed before starting
+ // the processing of the next.
+ GLSL("groupMemoryBarrier();\n");
+ GLSL("barrier();\n");
+
+ // Compute the coordinate of the pixel we are currently processing, both
+ // before and after the shift mapping.
+ GLSL("int id = int(gl_LocalInvocationIndex) + block_id * %d;\n", block_size);
+ GLSL("int y = id %% %d, x_shifted = id / %d;\n", height, height);
+ GLSL("int x = x_shifted - y * %d;\n", k->shift);
+
+ // Proceed only if we are processing a valid pixel.
+ GLSL("if (0 <= x && x < %d) {\n", width);
+
+ // The index that the current pixel have on the ring buffer.
+ GLSL("int idx = (x_shifted * %d + y) %% %d;\n", ring_buffer_rows, ring_buffer_size);
+
+ // Fetch the current pixel.
+ GLSL("vec3 pix = texelFetch(texture%d, ivec2(x, y), 0).rgb;\n", tex);
+
+ // The dithering will quantize pixel value into multiples of 1/dither_quant.
+ int dither_quant = (1 << depth) - 1;
+ // The absolute value of the errors to propagate is less than 1/dither_quant,
+ // multiply by dither_quant24 to have them processed with int in 24 bit
+ // precision.
+ double dither_quant24 = (double)(1 << 24) * dither_quant;
+
+ // Adding the error previously propagated into current pixel, and clear it
+ // in the buffer.
+ GLSL("pix += vec3(err_r[idx], err_g[idx], err_b[idx]) / %f;\n", dither_quant24);
+ for (int comp = 0; comp < 3; comp++)
+ GLSL("err_%c[idx] = 0;\n", rgb[comp]);
+
+ // Dithering to depth.
+ GLSL("vec3 dithered = floor(pix * %d.0 + 0.5) / %d.0;\n", dither_quant, dither_quant);
+ GLSL("ivec3 err = ivec3((pix - dithered) * %f + 0.5);\n", dither_quant24);
+
+ // Write the dithered pixel.
+ GLSL("imageStore(out_image, ivec2(x, y), vec4(dithered, 0.0));\n");
+
+ GLSL("int nidx;\n");
+ for (int y = 0; y <= EF_MAX_DELTA_Y; y++) {
+ for (int x = EF_MIN_DELTA_X; x <= EF_MAX_DELTA_X; x++) {
+ if (k->pattern[y][x - EF_MIN_DELTA_X] != 0) {
+ int shifted_x = x + y * k->shift;
+
+ // Unlike the right border, errors propagated out from left
+ // border will remain in the ring buffer. This will produce
+ // visible artifacts near the left border, especially for
+ // shift=3 kernels.
+ bool left_border_check = x < 0;
+
+ if (left_border_check)
+ GLSL("if (x >= %d) {\n", -x);
+
+ // Calculate the new position in the ring buffer to propagate
+ // the error into.
+ int ring_buffer_delta = shifted_x * ring_buffer_rows + y;
+ GLSL("nidx = (idx + %d) %% %d;\n", ring_buffer_delta, ring_buffer_size);
+
+ // Propagate the error with atomic operation.
+ for (int comp = 0; comp < 3; comp++) {
+ GLSL("atomicAdd(err_%c[nidx], err.%c * %d / %d);\n",
+ rgb[comp], rgb[comp],
+ k->pattern[y][x - EF_MIN_DELTA_X],
+ k->divisor);
+ }
+
+ if (left_border_check)
+ GLSL("}\n");
+ }
+ }
+ }
+
+ GLSL("}\n"); // if (0 <= x && x < width)
+
+ GLSL("}\n"); // block_id
+}
+
+// Different kernels for error diffusion.
+// Patterns are from http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT
+const struct error_diffusion_kernel mp_error_diffusion_kernels[] = {
+ {
+ .name = "simple",
+ .shift = 1,
+ .pattern = {{0, 0, 0, 1, 0},
+ {0, 0, 1, 0, 0},
+ {0, 0, 0, 0, 0}},
+ .divisor = 2
+ },
+ {
+ // The "false" Floyd-Steinberg kernel
+ .name = "false-fs",
+ .shift = 1,
+ .pattern = {{0, 0, 0, 3, 0},
+ {0, 0, 3, 2, 0},
+ {0, 0, 0, 0, 0}},
+ .divisor = 8
+ },
+ {
+ .name = "sierra-lite",
+ .shift = 2,
+ .pattern = {{0, 0, 0, 2, 0},
+ {0, 1, 1, 0, 0},
+ {0, 0, 0, 0, 0}},
+ .divisor = 4
+ },
+ {
+ .name = "floyd-steinberg",
+ .shift = 2,
+ .pattern = {{0, 0, 0, 7, 0},
+ {0, 3, 5, 1, 0},
+ {0, 0, 0, 0, 0}},
+ .divisor = 16
+ },
+ {
+ .name = "atkinson",
+ .shift = 2,
+ .pattern = {{0, 0, 0, 1, 1},
+ {0, 1, 1, 1, 0},
+ {0, 0, 1, 0, 0}},
+ .divisor = 8
+ },
+ // All kernels below have shift value of 3, and probably are too heavy for
+ // low end GPU.
+ {
+ .name = "jarvis-judice-ninke",
+ .shift = 3,
+ .pattern = {{0, 0, 0, 7, 5},
+ {3, 5, 7, 5, 3},
+ {1, 3, 5, 3, 1}},
+ .divisor = 48
+ },
+ {
+ .name = "stucki",
+ .shift = 3,
+ .pattern = {{0, 0, 0, 8, 4},
+ {2, 4, 8, 4, 2},
+ {1, 2, 4, 2, 1}},
+ .divisor = 42
+ },
+ {
+ .name = "burkes",
+ .shift = 3,
+ .pattern = {{0, 0, 0, 8, 4},
+ {2, 4, 8, 4, 2},
+ {0, 0, 0, 0, 0}},
+ .divisor = 32
+ },
+ {
+ .name = "sierra-3",
+ .shift = 3,
+ .pattern = {{0, 0, 0, 5, 3},
+ {2, 4, 5, 4, 2},
+ {0, 2, 3, 2, 0}},
+ .divisor = 32
+ },
+ {
+ .name = "sierra-2",
+ .shift = 3,
+ .pattern = {{0, 0, 0, 4, 3},
+ {1, 2, 3, 2, 1},
+ {0, 0, 0, 0, 0}},
+ .divisor = 16
+ },
+ {0}
+};