summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2013-05-26 01:48:39 +0200
committerwm4 <wm4@nowhere>2013-05-26 16:44:20 +0200
commit58a7d81dc55835fb0f5cc6a3f14288d722f83c91 (patch)
tree2f7cce21b79b6685348aca207938512cb2917814 /video
parent39225ed19676aa054aa36eb1e09b72ec712ae368 (diff)
downloadmpv-58a7d81dc55835fb0f5cc6a3f14288d722f83c91.tar.bz2
mpv-58a7d81dc55835fb0f5cc6a3f14288d722f83c91.tar.xz
gl_video: improve dithering
Use a different algorithm to generate the dithering matrix. This looks much better than the previous ordered dither matrix with its cross-hatch artifacts. The matrix generation algorithm as well as its implementation was contributed by Wessel Dankers aka Fruit. The code in dither.c is his implementation, reformatted and with static global variables removed by me. The new matrix is uploaded as float texture - before this commit, it was a normal integer fixed point matrix. This means dithering will be disabled on systems without float textures. The size of the dithering matrix can be configured, as the matrix is generated at runtime. The generation of the matrix can take rather long, and is already unacceptable with size 8. The default is at 6, which takes about 100 ms on a Core2 Duo system with dither.c compiled at -O2, which I consider just about acceptable. The old ordered dithering is still available and can be selected by putting the dither=ordered sub-option. The ordered dither matrix generation code was moved to dither.c. This function was originally written by Uoti Urpala.
Diffstat (limited to 'video')
-rw-r--r--video/out/dither.c239
-rw-r--r--video/out/dither.h2
-rw-r--r--video/out/gl_common.c1
-rw-r--r--video/out/gl_common.h2
-rw-r--r--video/out/gl_video.c99
-rw-r--r--video/out/gl_video.h3
-rw-r--r--video/out/gl_video_shaders.glsl7
7 files changed, 330 insertions, 23 deletions
diff --git a/video/out/dither.c b/video/out/dither.c
new file mode 100644
index 0000000000..1919010e74
--- /dev/null
+++ b/video/out/dither.c
@@ -0,0 +1,239 @@
+/******************************************************************************
+
+ dither.c - generate a dithering matrix for downsampling images
+ Copyright © 2013 Wessel Dankers <wsl@fruit.je>
+ This file is part of mpv.
+
+ mpv is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ mpv is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with mpv. If not, see <http://www.gnu.org/licenses/>.
+
+ You can alternatively redistribute this file and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+******************************************************************************/
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+
+#include <libavutil/lfg.h>
+
+#include "talloc.h"
+#include "dither.h"
+
+#define MAX_SIZEB 8
+#define MAX_SIZE (1 << MAX_SIZEB)
+#define MAX_SIZE2 (MAX_SIZE * MAX_SIZE)
+
+typedef uint_fast32_t index_t;
+
+#define WRAP_SIZE2(k, x) ((index_t)((index_t)(x) & ((k)->size2 - 1)))
+#define XY(k, x, y) ((index_t)(((x) | ((y) << (k)->sizeb))))
+
+struct ctx {
+ unsigned int sizeb, size, size2;
+ unsigned int gauss_radius;
+ unsigned int gauss_middle;
+ uint64_t gauss[MAX_SIZE2];
+ index_t randomat[MAX_SIZE2];
+ bool calcmat[MAX_SIZE2];
+ uint64_t gaussmat[MAX_SIZE2];
+ index_t unimat[MAX_SIZE2];
+ AVLFG avlfg;
+};
+
+static void makegauss(struct ctx *k, unsigned int sizeb)
+{
+ assert(sizeb >= 1 && sizeb <= MAX_SIZEB);
+
+ memset(k, 0, sizeof(*k));
+ av_lfg_init(&k->avlfg, 123);
+
+ k->sizeb = sizeb;
+ k->size = 1 << k->sizeb;
+ k->size2 = k->size * k->size;
+
+ k->gauss_radius = k->size / 2 - 1;
+ k->gauss_middle = XY(k, k->gauss_radius, k->gauss_radius);
+
+ unsigned int gauss_size = k->gauss_radius * 2 + 1;
+ unsigned int gauss_size2 = gauss_size * gauss_size;
+
+ for (index_t c = 0; c < k->size2; c++)
+ k->gauss[c] = 0;
+
+ long double sigma = -logl(1.5 / UINT64_MAX * gauss_size2) / k->gauss_radius;
+
+ for (index_t gy = 0; gy <= k->gauss_radius; gy++) {
+ for (index_t gx = 0; gx <= gy; gx++) {
+ int cx = (int)gx - k->gauss_radius;
+ int cy = (int)gy - k->gauss_radius;
+ int sq = cx * cx + cy * cy;
+ long double e = expl(-sqrtl(sq) * sigma);
+ uint64_t v = e / gauss_size2 * UINT64_MAX;
+ k->gauss[XY(k, gx, gy)] =
+ k->gauss[XY(k, gy, gx)] =
+ k->gauss[XY(k, gx, gauss_size - 1 - gy)] =
+ k->gauss[XY(k, gy, gauss_size - 1 - gx)] =
+ k->gauss[XY(k, gauss_size - 1 - gx, gy)] =
+ k->gauss[XY(k, gauss_size - 1 - gy, gx)] =
+ k->gauss[XY(k, gauss_size - 1 - gx, gauss_size - 1 - gy)] =
+ k->gauss[XY(k, gauss_size - 1 - gy, gauss_size - 1 - gx)] = v;
+ }
+ }
+ uint64_t total = 0;
+ for (index_t c = 0; c < k->size2; c++) {
+ uint64_t oldtotal = total;
+ total += k->gauss[c];
+ assert(total >= oldtotal);
+ }
+}
+
+static void setbit(struct ctx *k, index_t c)
+{
+ if (k->calcmat[c])
+ return;
+ k->calcmat[c] = true;
+ uint64_t *m = k->gaussmat;
+ uint64_t *me = k->gaussmat + k->size2;
+ uint64_t *g = k->gauss + WRAP_SIZE2(k, k->gauss_middle + k->size2 - c);
+ uint64_t *ge = k->gauss + k->size2;
+ while (g < ge)
+ *m++ += *g++;
+ g = k->gauss;
+ while (m < me)
+ *m++ += *g++;
+}
+
+static index_t getmin(struct ctx *k)
+{
+ uint64_t min = UINT64_MAX;
+ index_t resnum = 0;
+ unsigned int size2 = k->size2;
+ for (index_t c = 0; c < size2; c++) {
+ if (k->calcmat[c])
+ continue;
+ uint64_t total = k->gaussmat[c];
+ if (total <= min) {
+ if (total != min) {
+ min = total;
+ resnum = 0;
+ }
+ k->randomat[resnum++] = c;
+ }
+ }
+ if (resnum == 1)
+ return k->randomat[0];
+ if (resnum == size2)
+ return size2 / 2;
+ return k->randomat[av_lfg_get(&k->avlfg) % resnum];
+}
+
+static void makeuniform(struct ctx *k)
+{
+ unsigned int size2 = k->size2;
+ for (index_t c = 0; c < size2; c++) {
+ index_t r = getmin(k);
+ setbit(k, r);
+ k->unimat[r] = c;
+ }
+}
+
+// out_matrix is a reactangular tsize * tsize array, where tsize = (1 << size).
+void mp_make_fruit_dither_matrix(float *out_matrix, int size)
+{
+ struct ctx *k = talloc(NULL, struct ctx);
+ makegauss(k, size);
+ makeuniform(k);
+ float invscale = k->size2;
+ for(index_t y = 0; y < k->size; y++) {
+ for(index_t x = 0; x < k->size; x++)
+ out_matrix[x + y * k->size] = k->unimat[XY(k, x, y)] / invscale;
+ }
+ talloc_free(k);
+}
+
+void mp_make_ordered_dither_matrix(unsigned char *m, int size)
+{
+ m[0] = 0;
+ for (int sz = 1; sz < size; sz *= 2) {
+ int offset[] = {sz*size, sz, sz * (size+1), 0};
+ for (int i = 0; i < 4; i++)
+ for (int y = 0; y < sz * size; y += size)
+ for (int x = 0; x < sz; x++)
+ m[x+y+offset[i]] = m[x+y] * 4 + (3-i) * 256/size/size;
+ }
+}
+
+#if 0
+
+static int index_cmp(const void *a, const void *b)
+{
+ index_t x = *(const index_t *)a;
+ index_t y = *(const index_t *)b;
+ return x < y ? -1 : x > y;
+}
+
+static void fsck(struct ctx *k)
+{
+ qsort(k->unimat, k->size2, sizeof k->unimat[0], index_cmp);
+ for (index_t c = 0; c < k->size2; c++)
+ assert(k->unimat[c] == c);
+}
+
+uint16_t r[MAX_SIZE2];
+static void print(struct ctx *k)
+{
+#if 0
+ puts("#include <stdint.h>");
+ printf("static const int mp_dither_size = %d;\n", k->size);
+ printf("static const int mp_dither_size2 = %d;\n", k->size2);
+ printf("static const uint16_t mp_dither_matrix[] = {\n");
+ for(index_t y = 0; y < k->size; y++) {
+ printf("\t");
+ for(index_t x = 0; x < k->size; x++)
+ printf("%4"PRIuFAST32", ", k->unimat[XY(k, x, y)]);
+ printf("\n");
+ }
+ puts("};");
+#else
+ for(index_t y = 0; y < k->size; y++) {
+ for(index_t x = 0; x < k->size; x++)
+ r[XY(k, x, y)] = k->unimat[XY(k, x, y)];
+ }
+#endif
+}
+
+#include "osdep/timer.h"
+int main(void)
+{
+ mp_time_init();
+ struct ctx *k = malloc(sizeof(struct ctx));
+ int64_t s = mp_time_us();
+ makegauss(k, 6);
+ makeuniform(k);
+ print(k);
+ fsck(k);
+ int64_t l = mp_time_us() - s;
+ printf("time: %f ms\n", l / 1000.0);
+ return 0;
+}
+
+#endif
diff --git a/video/out/dither.h b/video/out/dither.h
new file mode 100644
index 0000000000..ca804e37c9
--- /dev/null
+++ b/video/out/dither.h
@@ -0,0 +1,2 @@
+void mp_make_fruit_dither_matrix(float *out_matrix, int size);
+void mp_make_ordered_dither_matrix(unsigned char *m, int size);
diff --git a/video/out/gl_common.c b/video/out/gl_common.c
index 39fb592df4..cc3eabb712 100644
--- a/video/out/gl_common.c
+++ b/video/out/gl_common.c
@@ -259,6 +259,7 @@ struct gl_functions gl_functions[] = {
DEF_FN(Uniform2f),
DEF_FN(Uniform3f),
DEF_FN(Uniform1i),
+ DEF_FN(UniformMatrix2fv),
DEF_FN(UniformMatrix3fv),
DEF_FN(TexImage3D),
{0},
diff --git a/video/out/gl_common.h b/video/out/gl_common.h
index 8a627963ed..a5e3125fba 100644
--- a/video/out/gl_common.h
+++ b/video/out/gl_common.h
@@ -305,6 +305,8 @@ struct GL {
void (GLAPIENTRY *Uniform3f)(GLint, GLfloat, GLfloat, GLfloat);
void (GLAPIENTRY *Uniform4f)(GLint, GLfloat, GLfloat, GLfloat, GLfloat);
void (GLAPIENTRY *Uniform1i)(GLint, GLint);
+ void (GLAPIENTRY *UniformMatrix2fv)(GLint, GLsizei, GLboolean,
+ const GLfloat *);
void (GLAPIENTRY *UniformMatrix3fv)(GLint, GLsizei, GLboolean,
const GLfloat *);
void (GLAPIENTRY *UniformMatrix4x3fv)(GLint, GLsizei, GLboolean,
diff --git a/video/out/gl_video.c b/video/out/gl_video.c
index 791f302941..cc2656c763 100644
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@@ -37,6 +37,7 @@
#include "aspect.h"
#include "video/memcpy_pic.h"
#include "bitmap_packer.h"
+#include "dither.h"
static const char vo_opengl_shaders[] =
// Generated from gl_video_shaders.glsl
@@ -192,6 +193,10 @@ struct gl_video {
int frames_rendered;
+ // Cached because computing it can take relatively long
+ int last_dither_matrix_size;
+ float *last_dither_matrix;
+
void *scratch;
};
@@ -229,6 +234,7 @@ static const char *osd_shaders[SUBBITMAP_COUNT] = {
static const struct gl_video_opts gl_video_opts_def = {
.npot = 1,
.dither_depth = -1,
+ .dither_size = 6,
.fbo_format = GL_RGB,
.scale_sep = 1,
.scalers = { "bilinear", "bilinear" },
@@ -269,6 +275,10 @@ const struct m_sub_options gl_video_conf = {
{"rgba32f", GL_RGBA32F})),
OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16,
({"no", -1}, {"auto", 0})),
+ OPT_CHOICE("dither", dither_algo, 0,
+ ({"fruit", 0}, {"ordered", 1}, {"no", -1})),
+ OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8),
+ OPT_FLAG("temporal-dither", temporal_dither, 0),
OPT_FLAG("alpha", enable_alpha, 0),
{0}
},
@@ -754,6 +764,7 @@ static void compile_shaders(struct gl_video *p)
shader_def_opt(&header_final, "USE_3DLUT", p->use_lut_3d);
shader_def_opt(&header_final, "USE_SRGB", p->opts.srgb);
shader_def_opt(&header_final, "USE_DITHER", p->dither_texture != 0);
+ shader_def_opt(&header_final, "USE_TEMPORAL_DITHER", p->opts.temporal_dither);
if (p->opts.scale_sep && p->scalers[0].kernel) {
header_sep = talloc_strdup(tmp, "");
@@ -926,18 +937,6 @@ static void init_scaler(struct gl_video *p, struct scaler *scaler)
debug_check_gl(p, "after initializing scaler");
}
-static void make_dither_matrix(unsigned char *m, int size)
-{
- m[0] = 0;
- for (int sz = 1; sz < size; sz *= 2) {
- int offset[] = {sz*size, sz, sz * (size+1), 0};
- for (int i = 0; i < 4; i++)
- for (int y = 0; y < sz * size; y += size)
- for (int x = 0; x < sz; x++)
- m[x+y+offset[i]] = m[x+y] * 4 + (3-i) * 256/size/size;
- }
-}
-
static void init_dither(struct gl_video *p)
{
GL *gl = p->gl;
@@ -947,30 +946,54 @@ static void init_dither(struct gl_video *p)
if (p->opts.dither_depth > 0)
dst_depth = p->opts.dither_depth;
- if (p->opts.dither_depth < 0)
+ if (p->opts.dither_depth < 0 || p->opts.dither_algo < 0)
return;
mp_msg(MSGT_VO, MSGL_V, "[gl] Dither to %d.\n", dst_depth);
+ int tex_size;
+ void *tex_data;
+ GLenum tex_type;
+ unsigned char temp[256];
+
+ if (p->opts.dither_algo == 0) {
+ int sizeb = p->opts.dither_size;
+ int size = 1 << sizeb;
+
+ if (p->last_dither_matrix_size != size) {
+ p->last_dither_matrix = talloc_realloc(p, p->last_dither_matrix,
+ float, size * size);
+ mp_make_fruit_dither_matrix(p->last_dither_matrix, sizeb);
+ p->last_dither_matrix_size = size;
+ }
+
+ tex_size = size;
+ tex_type = GL_FLOAT;
+ tex_data = p->last_dither_matrix;
+ } else {
+ assert(sizeof(temp) >= 8 * 8);
+ mp_make_ordered_dither_matrix(temp, 8);
+
+ tex_size = 8;
+ tex_type = GL_UNSIGNED_BYTE;
+ tex_data = temp;
+ }
+
// This defines how many bits are considered significant for output on
- // screen. The superfluous bits will be used for rounded according to the
+ // screen. The superfluous bits will be used for rounding according to the
// dither matrix. The precision of the source implicitly decides how many
// dither patterns can be visible.
p->dither_quantization = (1 << dst_depth) - 1;
- int size = 8;
- p->dither_multiply = p->dither_quantization + 1.0 / (size*size);
- unsigned char dither[256];
- make_dither_matrix(dither, size);
-
- p->dither_size = size;
+ p->dither_multiply = p->dither_quantization + 1.0 / (tex_size * tex_size);
+ p->dither_size = tex_size;
gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_DITHER);
gl->GenTextures(1, &p->dither_texture);
gl->BindTexture(GL_TEXTURE_2D, p->dither_texture);
gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1);
gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- gl->TexImage2D(GL_TEXTURE_2D, 0, GL_RED, size, size, 0, GL_RED,
- GL_UNSIGNED_BYTE, dither);
+ gl->TexImage2D(GL_TEXTURE_2D, 0, GL_RED, tex_size, tex_size, 0, GL_RED,
+ tex_type, tex_data);
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
@@ -1164,6 +1187,25 @@ static void uninit_video(struct gl_video *p)
fbotex_uninit(p, &p->scale_sep_fbo);
}
+static void change_dither_trafo(struct gl_video *p)
+{
+ GL *gl = p->gl;
+ int program = p->final_program;
+
+ int phase = p->frames_rendered % 8u;
+ float r = phase * (M_PI / 2); // rotate
+ float m = phase < 4 ? 1 : -1; // mirror
+
+ gl->UseProgram(program);
+
+ float matrix[2][2] = {{cos(r), -sin(r) },
+ {sin(r) * m, cos(r) * m}};
+ gl->UniformMatrix2fv(gl->GetUniformLocation(program, "dither_trafo"),
+ 1, GL_TRUE, &matrix[0][0]);
+
+ gl->UseProgram(0);
+}
+
static void render_to_fbo(struct gl_video *p, struct fbotex *fbo, int w, int h,
int tex_w, int tex_h)
{
@@ -1206,6 +1248,9 @@ void gl_video_render_frame(struct gl_video *p)
struct video_image *vimg = &p->image;
bool is_flipped = vimg->image_flipped;
+ if (p->opts.temporal_dither)
+ change_dither_trafo(p);
+
if (p->dst_rect.x0 > p->vp_x || p->dst_rect.y0 > p->vp_y
|| p->dst_rect.x1 < p->vp_x + p->vp_w
|| p->dst_rect.y1 < p->vp_y + p->vp_h)
@@ -1274,6 +1319,8 @@ void gl_video_render_frame(struct gl_video *p)
gl->UseProgram(0);
+ p->frames_rendered++;
+
debug_check_gl(p, "after video rendering");
}
@@ -1542,6 +1589,14 @@ static void check_gl_features(struct gl_video *p)
}
}
+ if (!have_float_tex && p->opts.dither_depth >= 0) {
+ // only fruit dithering uses float textures
+ if (p->opts.dither_algo == 0) {
+ p->opts.dither_depth = -1;
+ disabled[n_disabled++] = "dithering (float tex.)";
+ }
+ }
+
if (!have_srgb && p->opts.srgb) {
p->opts.srgb = false;
disabled[n_disabled++] = "sRGB";
diff --git a/video/out/gl_video.h b/video/out/gl_video.h
index 72cfcf51a9..0d796502bb 100644
--- a/video/out/gl_video.h
+++ b/video/out/gl_video.h
@@ -39,6 +39,9 @@ struct gl_video_opts {
int npot;
int pbo;
int dither_depth;
+ int dither_algo;
+ int dither_size;
+ int temporal_dither;
int fbo_format;
int stereo_mode;
int enable_alpha;
diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl
index b968cb2c87..c19a19fbee 100644
--- a/video/out/gl_video_shaders.glsl
+++ b/video/out/gl_video_shaders.glsl
@@ -121,6 +121,7 @@ uniform sampler2D lut_l_2d;
uniform sampler3D lut_3d;
uniform sampler2D dither;
uniform mat4x3 colormatrix;
+uniform mat2 dither_trafo;
uniform vec3 inv_gamma;
uniform float input_gamma;
uniform float conv_gamma;
@@ -376,7 +377,11 @@ void main() {
color.rgb = srgb_compand(color.rgb);
#endif
#ifdef USE_DITHER
- float dither_value = texture(dither, gl_FragCoord.xy / dither_size).r;
+ vec2 dither_pos = gl_FragCoord.xy / dither_size;
+#ifdef USE_TEMPORAL_DITHER
+ dither_pos = dither_trafo * dither_pos;
+#endif
+ float dither_value = texture(dither, dither_pos).r;
color = floor(color * dither_multiply + dither_value ) / dither_quantization;
#endif
#ifdef USE_ALPHA