vo_opengl: split utils.c/h

Actually GL-specific parts go into gl_utils.c/h, the shader cache (gl_sc*) into shader_cache.c/h. No semantic changes of any kind, except that the VAO helper is made public again as part of gl_utils.c (all while the goal for gl_utils.c itself is to be included by GL-specific code).
author: wm4 <wm4@nowhere> 2017-08-04 19:09:46 +0200
committer: wm4 <wm4@nowhere> 2017-08-05 13:09:05 +0200
commit: aac04c0d6496d8847499a94376e85f1711bf31d6 (patch)
tree: bf77bcd2d12729a3706804b29ba5951d173a3226
parent: fa4a1c46759136334646e47c627ddb75e532a658 (diff)
download: mpv-aac04c0d6496d8847499a94376e85f1711bf31d6.tar.bz2
mpv-aac04c0d6496d8847499a94376e85f1711bf31d6.tar.xz
13 files changed, 1647 insertions, 1602 deletions
diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c
new file mode 100644
index 0000000000..c870756b1e
--- /dev/null
+++ b/video/out/opengl/gl_utils.c
@@ -0,0 +1,502 @@
+/*
+ * This file is part of mpv.
+ * Parts based on MPlayer code by Reimar Döffinger.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include <libavutil/sha.h>
+#include <libavutil/intreadwrite.h>
+#include <libavutil/mem.h>
+
+#include "osdep/io.h"
+
+#include "common/common.h"
+#include "options/path.h"
+#include "stream/stream.h"
+#include "formats.h"
+#include "ra_gl.h"
+#include "gl_utils.h"
+
+// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL)
+static const char *gl_error_to_string(GLenum error)
+{
+    switch (error) {
+    case GL_INVALID_ENUM: return "INVALID_ENUM";
+    case GL_INVALID_VALUE: return "INVALID_VALUE";
+    case GL_INVALID_OPERATION: return "INVALID_OPERATION";
+    case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION";
+    case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY";
+    default: return "unknown";
+    }
+}
+
+void gl_check_error(GL *gl, struct mp_log *log, const char *info)
+{
+    for (;;) {
+        GLenum error = gl->GetError();
+        if (error == GL_NO_ERROR)
+            break;
+        mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info,
+               gl_error_to_string(error));
+    }
+}
+
+static int get_alignment(int stride)
+{
+    if (stride % 8 == 0)
+        return 8;
+    if (stride % 4 == 0)
+        return 4;
+    if (stride % 2 == 0)
+        return 2;
+    return 1;
+}
+
+// upload a texture, handling things like stride and slices
+//  target: texture target, usually GL_TEXTURE_2D
+//  format, type: texture parameters
+//  dataptr, stride: image data
+//  x, y, width, height: part of the image to upload
+void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type,
+                   const void *dataptr, int stride,
+                   int x, int y, int w, int h)
+{
+    int bpp = gl_bytes_per_pixel(format, type);
+    const uint8_t *data = dataptr;
+    int y_max = y + h;
+    if (w <= 0 || h <= 0 || !bpp)
+        return;
+    if (stride < 0) {
+        data += (h - 1) * stride;
+        stride = -stride;
+    }
+    gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride));
+    int slice = h;
+    if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) {
+        // this is not always correct, but should work for MPlayer
+        gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp);
+    } else {
+        if (stride != bpp * w)
+            slice = 1; // very inefficient, but at least it works
+    }
+    for (; y + slice <= y_max; y += slice) {
+        gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data);
+        data += stride * slice;
+    }
+    if (y < y_max)
+        gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data);
+    if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH)
+        gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+    gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4);
+}
+
+mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h)
+{
+    if (gl->es)
+        return NULL; // ES can't read from front buffer
+    mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, w, h);
+    if (!image)
+        return NULL;
+    gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
+    GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT;
+    gl->PixelStorei(GL_PACK_ALIGNMENT, 1);
+    gl->ReadBuffer(obj);
+    //flip image while reading (and also avoid stride-related trouble)
+    for (int y = 0; y < h; y++) {
+        gl->ReadPixels(0, h - y - 1, w, 1, GL_RGB, GL_UNSIGNED_BYTE,
+                       image->planes[0] + y * image->stride[0]);
+    }
+    gl->PixelStorei(GL_PACK_ALIGNMENT, 4);
+    gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+    return image;
+}
+
+void mp_log_source(struct mp_log *log, int lev, const char *src)
+{
+    int line = 1;
+    if (!src)
+        return;
+    while (*src) {
+        const char *end = strchr(src, '\n');
+        const char *next = end + 1;
+        if (!end)
+            next = end = src + strlen(src);
+        mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src);
+        line++;
+        src = next;
+    }
+}
+
+static void gl_vao_enable_attribs(struct gl_vao *vao)
+{
+    GL *gl = vao->gl;
+
+    for (int n = 0; vao->entries[n].name; n++) {
+        const struct gl_vao_entry *e = &vao->entries[n];
+
+        gl->EnableVertexAttribArray(n);
+        gl->VertexAttribPointer(n, e->num_elems, e->type, e->normalized,
+                                vao->stride, (void *)(intptr_t)e->offset);
+    }
+}
+
+void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
+                 const struct gl_vao_entry *entries)
+{
+    assert(!vao->vao);
+    assert(!vao->buffer);
+
+    *vao = (struct gl_vao){
+        .gl = gl,
+        .stride = stride,
+        .entries = entries,
+    };
+
+    gl->GenBuffers(1, &vao->buffer);
+
+    if (gl->BindVertexArray) {
+        gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
+
+        gl->GenVertexArrays(1, &vao->vao);
+        gl->BindVertexArray(vao->vao);
+        gl_vao_enable_attribs(vao);
+        gl->BindVertexArray(0);
+
+        gl->BindBuffer(GL_ARRAY_BUFFER, 0);
+    }
+}
+
+void gl_vao_uninit(struct gl_vao *vao)
+{
+    GL *gl = vao->gl;
+    if (!gl)
+        return;
+
+    if (gl->DeleteVertexArrays)
+        gl->DeleteVertexArrays(1, &vao->vao);
+    gl->DeleteBuffers(1, &vao->buffer);
+
+    *vao = (struct gl_vao){0};
+}
+
+static void gl_vao_bind(struct gl_vao *vao)
+{
+    GL *gl = vao->gl;
+
+    if (gl->BindVertexArray) {
+        gl->BindVertexArray(vao->vao);
+    } else {
+        gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
+        gl_vao_enable_attribs(vao);
+        gl->BindBuffer(GL_ARRAY_BUFFER, 0);
+    }
+}
+
+static void gl_vao_unbind(struct gl_vao *vao)
+{
+    GL *gl = vao->gl;
+
+    if (gl->BindVertexArray) {
+        gl->BindVertexArray(0);
+    } else {
+        for (int n = 0; vao->entries[n].name; n++)
+            gl->DisableVertexAttribArray(n);
+    }
+}
+
+// Draw the vertex data (as described by the gl_vao_entry entries) in ptr
+// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES.
+// If ptr is NULL, then skip the upload, and use the data uploaded with the
+// previous call.
+void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num)
+{
+    GL *gl = vao->gl;
+
+    if (ptr) {
+        gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
+        gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW);
+        gl->BindBuffer(GL_ARRAY_BUFFER, 0);
+    }
+
+    gl_vao_bind(vao);
+
+    gl->DrawArrays(prim, 0, num);
+
+    gl_vao_unbind(vao);
+}
+
+static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id,
+                                   GLenum severity, GLsizei length,
+                                   const GLchar *message, const void *userParam)
+{
+    // keep in mind that the debug callback can be asynchronous
+    struct mp_log *log = (void *)userParam;
+    int level = MSGL_ERR;
+    switch (severity) {
+    case GL_DEBUG_SEVERITY_NOTIFICATION:level = MSGL_V; break;
+    case GL_DEBUG_SEVERITY_LOW:         level = MSGL_INFO; break;
+    case GL_DEBUG_SEVERITY_MEDIUM:      level = MSGL_WARN; break;
+    case GL_DEBUG_SEVERITY_HIGH:        level = MSGL_ERR; break;
+    }
+    mp_msg(log, level, "GL: %s\n", message);
+}
+
+void gl_set_debug_logger(GL *gl, struct mp_log *log)
+{
+    if (gl->DebugMessageCallback)
+        gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log);
+}
+
+// Maximum number of simultaneous query objects to keep around. Reducing this
+// number might cause rendering to block until the result of a previous query is
+// available
+#define QUERY_OBJECT_NUM 8
+
+struct gl_timer {
+    GL *gl;
+    GLuint query[QUERY_OBJECT_NUM];
+    int query_idx;
+
+    // these numbers are all in nanoseconds
+    uint64_t samples[PERF_SAMPLE_COUNT];
+    int sample_idx;
+    int sample_count;
+
+    uint64_t avg_sum;
+    uint64_t peak;
+};
+
+struct mp_pass_perf gl_timer_measure(struct gl_timer *timer)
+{
+    assert(timer);
+    struct mp_pass_perf res = {
+        .count = timer->sample_count,
+        .index = (timer->sample_idx - timer->sample_count) % PERF_SAMPLE_COUNT,
+        .peak = timer->peak,
+        .samples = timer->samples,
+    };
+
+    res.last = timer->samples[(timer->sample_idx - 1) % PERF_SAMPLE_COUNT];
+
+    if (timer->sample_count > 0) {
+        res.avg  = timer->avg_sum / timer->sample_count;
+    }
+
+    return res;
+}
+
+struct gl_timer *gl_timer_create(GL *gl)
+{
+    struct gl_timer *timer = talloc_ptrtype(NULL, timer);
+    *timer = (struct gl_timer){ .gl = gl };
+
+    if (gl->GenQueries)
+        gl->GenQueries(QUERY_OBJECT_NUM, timer->query);
+
+    return timer;
+}
+
+void gl_timer_free(struct gl_timer *timer)
+{
+    if (!timer)
+        return;
+
+    GL *gl = timer->gl;
+    if (gl && gl->DeleteQueries) {
+        // this is a no-op on already uninitialized queries
+        gl->DeleteQueries(QUERY_OBJECT_NUM, timer->query);
+    }
+
+    talloc_free(timer);
+}
+
+static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
+{
+    // Input res into the buffer and grab the previous value
+    uint64_t old = timer->samples[timer->sample_idx];
+    timer->samples[timer->sample_idx++] = new;
+    timer->sample_idx %= PERF_SAMPLE_COUNT;
+
+    // Update average and sum
+    timer->avg_sum = timer->avg_sum + new - old;
+    timer->sample_count = MPMIN(timer->sample_count + 1, PERF_SAMPLE_COUNT);
+
+    // Update peak if necessary
+    if (new >= timer->peak) {
+        timer->peak = new;
+    } else if (timer->peak == old) {
+        // It's possible that the last peak was the value we just removed,
+        // if so we need to scan for the new peak
+        uint64_t peak = new;
+        for (int i = 0; i < PERF_SAMPLE_COUNT; i++)
+            peak = MPMAX(peak, timer->samples[i]);
+        timer->peak = peak;
+    }
+}
+
+// If no free query is available, this can block. Shouldn't ever happen in
+// practice, though. (If it does, consider increasing QUERY_OBJECT_NUM)
+// IMPORTANT: only one gl_timer object may ever be active at a single time.
+// The caling code *MUST* ensure this
+void gl_timer_start(struct gl_timer *timer)
+{
+    assert(timer);
+    GL *gl = timer->gl;
+    if (!gl->BeginQuery)
+        return;
+
+    // Get the next query object
+    GLuint id = timer->query[timer->query_idx++];
+    timer->query_idx %= QUERY_OBJECT_NUM;
+
+    // If this query object already holds a result, we need to get and
+    // record it first
+    if (gl->IsQuery(id)) {
+        GLuint64 elapsed;
+        gl->GetQueryObjectui64v(id, GL_QUERY_RESULT, &elapsed);
+        gl_timer_record(timer, elapsed);
+    }
+
+    gl->BeginQuery(GL_TIME_ELAPSED, id);
+}
+
+void gl_timer_stop(GL *gl)
+{
+    if (gl->EndQuery)
+        gl->EndQuery(GL_TIME_ELAPSED);
+}
+
+// Upload a texture, going through a PBO. PBO supposedly can facilitate
+// asynchronous copy from CPU to GPU, so this is an optimization. Note that
+// changing format/type/tex_w/tex_h or reusing the PBO in the same frame can
+// ruin performance.
+// This call is like gl_upload_tex(), plus PBO management/use.
+// target, format, type, dataptr, stride, x, y, w, h: texture upload params
+//                                                    (see gl_upload_tex())
+// tex_w, tex_h: maximum size of the used texture
+// use_pbo: for convenience, if false redirects the call to gl_upload_tex
+void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
+                       GLenum target, GLenum format, GLenum type,
+                       int tex_w, int tex_h, const void *dataptr, int stride,
+                       int x, int y, int w, int h)
+{
+    assert(x >= 0 && y >= 0 && w >= 0 && h >= 0);
+    assert(x + w <= tex_w && y + h <= tex_h);
+
+    if (!use_pbo) {
+        gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h);
+        return;
+    }
+
+    // We align the buffer size to 4096 to avoid possible subregion
+    // dependencies. This is not a strict requirement (the spec requires no
+    // alignment), but a good precaution for performance reasons
+    size_t needed_size = stride * h;
+    size_t buffer_size = MP_ALIGN_UP(needed_size, 4096);
+
+    if (buffer_size != pbo->buffer_size)
+        gl_pbo_upload_uninit(pbo);
+
+    if (!pbo->buffer) {
+        pbo->gl = gl;
+        pbo->buffer_size = buffer_size;
+        gl->GenBuffers(1, &pbo->buffer);
+        gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
+        // Magic time: Because we memcpy once from RAM to the buffer, and then
+        // the GPU needs to read from this anyway, we actually *don't* want
+        // this buffer to be allocated in RAM. If we allocate it in VRAM
+        // instead, we can reduce this to a single copy: from RAM into VRAM.
+        // Unfortunately, drivers e.g. nvidia will think GL_STREAM_DRAW is best
+        // allocated on host memory instead of device memory, so we lie about
+        // the usage to fool the driver into giving us a buffer in VRAM instead
+        // of RAM, which can be significantly faster for our use case.
+        // Seriously, fuck OpenGL.
+        gl->BufferData(GL_PIXEL_UNPACK_BUFFER, NUM_PBO_BUFFERS * buffer_size,
+                       NULL, GL_STREAM_COPY);
+    }
+
+    uintptr_t offset = buffer_size * pbo->index;
+    pbo->index = (pbo->index + 1) % NUM_PBO_BUFFERS;
+
+    gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
+    gl->BufferSubData(GL_PIXEL_UNPACK_BUFFER, offset, needed_size, dataptr);
+    gl_upload_tex(gl, target, format, type, (void *)offset, stride, x, y, w, h);
+    gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+}
+
+void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo)
+{
+    if (pbo->gl)
+        pbo->gl->DeleteBuffers(1, &pbo->buffer);
+
+    *pbo = (struct gl_pbo_upload){0};
+}
+
+// The intention is to return the actual depth of any fixed point 16 bit
+// textures. (Actually tests only 1 format - hope that is good enough.)
+int gl_determine_16bit_tex_depth(GL *gl)
+{
+    const struct gl_format *fmt = gl_find_unorm_format(gl, 2, 1);
+    if (!gl->GetTexLevelParameteriv || !fmt) {
+        // ANGLE supports ES 3.0 and the extension, but lacks the function above.
+        if (gl->mpgl_caps & MPGL_CAP_EXT16)
+            return 16;
+        return -1;
+    }
+
+    GLuint tex;
+    gl->GenTextures(1, &tex);
+    gl->BindTexture(GL_TEXTURE_2D, tex);
+    gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, 64, 64, 0,
+                   fmt->format, fmt->type, NULL);
+    GLenum pname = 0;
+    switch (fmt->format) {
+    case GL_RED:        pname = GL_TEXTURE_RED_SIZE; break;
+    case GL_LUMINANCE:  pname = GL_TEXTURE_LUMINANCE_SIZE; break;
+    }
+    GLint param = -1;
+    if (pname)
+        gl->GetTexLevelParameteriv(GL_TEXTURE_2D, 0, pname, &param);
+    gl->DeleteTextures(1, &tex);
+    return param;
+}
+
+int gl_get_fb_depth(GL *gl, int fbo)
+{
+    if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB))
+        return -1;
+
+    gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
+
+    GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK;
+    if (fbo)
+        obj = GL_COLOR_ATTACHMENT0;
+
+    GLint depth_g = -1;
+
+    gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj,
+                            GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g);
+
+    gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+
+    return depth_g > 0 ? depth_g : -1;
+}
diff --git a/video/out/opengl/gl_utils.h b/video/out/opengl/gl_utils.h
new file mode 100644
index 0000000000..5ae8d1590b
--- /dev/null
+++ b/video/out/opengl/gl_utils.h
@@ -0,0 +1,94 @@
+/*
+ * This file is part of mpv.
+ * Parts based on MPlayer code by Reimar Döffinger.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_GL_UTILS_
+#define MP_GL_UTILS_
+
+#include <math.h>
+
+#include "common.h"
+#include "ra.h"
+
+struct mp_log;
+
+void gl_check_error(GL *gl, struct mp_log *log, const char *info);
+
+void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type,
+                   const void *dataptr, int stride,
+                   int x, int y, int w, int h);
+
+mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h);
+
+const char* mp_sampler_type(GLenum texture_target);
+
+// print a multi line string with line numbers (e.g. for shader sources)
+// log, lev: module and log level, as in mp_msg()
+void mp_log_source(struct mp_log *log, int lev, const char *src);
+
+struct gl_vao_entry {
+    // used for shader / glBindAttribLocation
+    const char *name;
+    // glVertexAttribPointer() arguments
+    int num_elems;      // size (number of elements)
+    GLenum type;
+    bool normalized;
+    int offset;
+};
+
+struct gl_vao {
+    GL *gl;
+    GLuint vao;     // the VAO object, or 0 if unsupported by driver
+    GLuint buffer;  // GL_ARRAY_BUFFER used for the data
+    int stride;     // size of each element (interleaved elements are assumed)
+    const struct gl_vao_entry *entries;
+};
+
+void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
+                 const struct gl_vao_entry *entries);
+void gl_vao_uninit(struct gl_vao *vao);
+void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num);
+
+void gl_set_debug_logger(GL *gl, struct mp_log *log);
+
+struct gl_timer;
+
+struct gl_timer *gl_timer_create(GL *gl);
+void gl_timer_free(struct gl_timer *timer);
+void gl_timer_start(struct gl_timer *timer);
+void gl_timer_stop(GL *gl);
+struct mp_pass_perf gl_timer_measure(struct gl_timer *timer);
+
+#define NUM_PBO_BUFFERS 3
+
+struct gl_pbo_upload {
+    GL *gl;
+    int index;
+    GLuint buffer;
+    size_t buffer_size;
+};
+
+void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
+                       GLenum target, GLenum format,  GLenum type,
+                       int tex_w, int tex_h, const void *dataptr, int stride,
+                       int x, int y, int w, int h);
+void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo);
+
+int gl_determine_16bit_tex_depth(GL *gl);
+int gl_get_fb_depth(GL *gl, int fbo);
+
+#endif
diff --git a/video/out/opengl/hwdec_vdpau.c b/video/out/opengl/hwdec_vdpau.c
index 712997ed7a..9ddec18e06 100644
--- a/video/out/opengl/hwdec_vdpau.c
+++ b/video/out/opengl/hwdec_vdpau.c
@@ -21,7 +21,7 @@
 #include <GL/glx.h>
 
 #include "hwdec.h"
-#include "utils.h"
+#include "gl_utils.h"
 #include "video/vdpau.h"
 #include "video/vdpau_mixer.h"
 
diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c
index 18eb36694f..aa0791139d 100644
--- a/video/out/opengl/osd.c
+++ b/video/out/opengl/osd.c
@@ -22,7 +22,6 @@
 #include <libavutil/common.h>
 
 #include "formats.h"
-#include "utils.h"
 #include "osd.h"
 
 #define GLSL(x) gl_sc_add(sc, #x "\n");
diff --git a/video/out/opengl/osd.h b/video/out/opengl/osd.h
index a09c891c0b..36926f95f0 100644
--- a/video/out/opengl/osd.h
+++ b/video/out/opengl/osd.h
@@ -5,6 +5,8 @@
 #include <inttypes.h>
 
 #include "utils.h"
+#include "gl_utils.h"
+#include "shader_cache.h"
 #include "sub/osd.h"
 
 struct mpgl_osd *mpgl_osd_init(GL *gl, struct mp_log *log, struct osd_state *osd);
diff --git a/video/out/opengl/ra_gl.h b/video/out/opengl/ra_gl.h
index 23e3199aeb..016ce13419 100644
--- a/video/out/opengl/ra_gl.h
+++ b/video/out/opengl/ra_gl.h
@@ -2,7 +2,7 @@
 
 #include "common.h"
 #include "ra.h"
-#include "utils.h"
+#include "gl_utils.h"
 
 // For ra.priv
 struct ra_gl {
diff --git a/video/out/opengl/shader_cache.c b/video/out/opengl/shader_cache.c
new file mode 100644
index 0000000000..7f8b37be64
--- /dev/null
+++ b/video/out/opengl/shader_cache.c
@@ -0,0 +1,952 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include <libavutil/sha.h>
+#include <libavutil/intreadwrite.h>
+#include <libavutil/mem.h>
+
+#include "osdep/io.h"
+
+#include "common/common.h"
+#include "options/path.h"
+#include "stream/stream.h"
+#include "shader_cache.h"
+#include "formats.h"
+#include "ra_gl.h"
+#include "gl_utils.h"
+
+// Force cache flush if more than this number of shaders is created.
+#define SC_MAX_ENTRIES 48
+
+enum uniform_type {
+    UT_invalid,
+    UT_i,
+    UT_f,
+    UT_m,
+};
+
+union uniform_val {
+    GLfloat f[9];
+    GLint i[4];
+};
+
+struct sc_uniform {
+    char *name;
+    enum uniform_type type;
+    const char *glsl_type;
+    int size;
+    GLint loc;
+    union uniform_val v;
+    // Set for sampler uniforms.
+    GLenum tex_target;
+    GLuint tex_handle;
+    // Set for image uniforms
+    GLuint img_handle;
+    GLenum img_access;
+    GLenum img_iformat;
+};
+
+struct sc_buffer {
+    char *name;
+    char *format;
+    GLuint binding;
+    GLuint ssbo;
+};
+
+struct sc_cached_uniform {
+    GLint loc;
+    union uniform_val v;
+};
+
+struct sc_entry {
+    GLuint gl_shader;
+    struct sc_cached_uniform *uniforms;
+    int num_uniforms;
+    bstr frag;
+    bstr vert;
+    bstr comp;
+    struct gl_timer *timer;
+    struct gl_vao vao;
+};
+
+struct gl_shader_cache {
+    GL *gl;
+    struct mp_log *log;
+
+    // permanent
+    char **exts;
+    int num_exts;
+
+    // this is modified during use (gl_sc_add() etc.) and reset for each shader
+    bstr prelude_text;
+    bstr header_text;
+    bstr text;
+    int next_texture_unit;
+    int next_image_unit;
+    int next_buffer_binding;
+    struct gl_vao *vao; // deprecated
+
+    struct sc_entry *entries;
+    int num_entries;
+
+    struct sc_entry *current_shader; // set by gl_sc_generate()
+
+    struct sc_uniform *uniforms;
+    int num_uniforms;
+    struct sc_buffer *buffers;
+    int num_buffers;
+
+    const struct gl_vao_entry *vertex_entries;
+    size_t vertex_size;
+
+    // For checking that the user is calling gl_sc_reset() properly.
+    bool needs_reset;
+
+    bool error_state; // true if an error occurred
+
+    // temporary buffers (avoids frequent reallocations)
+    bstr tmp[5];
+
+    // For the disk-cache.
+    char *cache_dir;
+    struct mpv_global *global; // can be NULL
+};
+
+struct gl_shader_cache *gl_sc_create(GL *gl, struct mp_log *log)
+{
+    struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc);
+    *sc = (struct gl_shader_cache){
+        .gl = gl,
+        .log = log,
+    };
+    gl_sc_reset(sc);
+    return sc;
+}
+
+// Reset the previous pass. This must be called after
+// Unbind all GL state managed by sc - the current program and texture units.
+void gl_sc_reset(struct gl_shader_cache *sc)
+{
+    GL *gl = sc->gl;
+
+    if (sc->needs_reset) {
+        gl_timer_stop(gl);
+        gl->UseProgram(0);
+
+        for (int n = 0; n < sc->num_uniforms; n++) {
+            struct sc_uniform *u = &sc->uniforms[n];
+            if (u->type == UT_i && u->tex_target) {
+                gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]);
+                gl->BindTexture(u->tex_target, 0);
+            }
+            if (u->type == UT_i && u->img_access) {
+                gl->BindImageTexture(u->v.i[0], 0, 0, GL_FALSE, 0,
+                                     u->img_access, u->img_iformat);
+            }
+        }
+        gl->ActiveTexture(GL_TEXTURE0);
+
+        for (int n = 0; n < sc->num_buffers; n++) {
+            struct sc_buffer *b = &sc->buffers[n];
+            gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, b->binding, 0);
+        }
+    }
+
+    sc->prelude_text.len = 0;
+    sc->header_text.len = 0;
+    sc->text.len = 0;
+    for (int n = 0; n < sc->num_uniforms; n++)
+        talloc_free(sc->uniforms[n].name);
+    sc->num_uniforms = 0;
+    for (int n = 0; n < sc->num_buffers; n++) {
+        talloc_free(sc->buffers[n].name);
+        talloc_free(sc->buffers[n].format);
+    }
+    sc->num_buffers = 0;
+    sc->next_texture_unit = 1; // not 0, as 0 is "free for use"
+    sc->next_image_unit = 1;
+    sc->next_buffer_binding = 1;
+    sc->vertex_entries = NULL;
+    sc->vertex_size = 0;
+    sc->current_shader = NULL;
+    sc->needs_reset = false;
+}
+
+static void sc_flush_cache(struct gl_shader_cache *sc)
+{
+    MP_VERBOSE(sc, "flushing shader cache\n");
+
+    for (int n = 0; n < sc->num_entries; n++) {
+        struct sc_entry *e = &sc->entries[n];
+        sc->gl->DeleteProgram(e->gl_shader);
+        talloc_free(e->vert.start);
+        talloc_free(e->frag.start);
+        talloc_free(e->comp.start);
+        talloc_free(e->uniforms);
+        gl_timer_free(e->timer);
+        gl_vao_uninit(&e->vao);
+    }
+    sc->num_entries = 0;
+}
+
+void gl_sc_destroy(struct gl_shader_cache *sc)
+{
+    if (!sc)
+        return;
+    gl_sc_reset(sc);
+    sc_flush_cache(sc);
+    talloc_free(sc);
+}
+
+bool gl_sc_error_state(struct gl_shader_cache *sc)
+{
+    return sc->error_state;
+}
+
+void gl_sc_reset_error(struct gl_shader_cache *sc)
+{
+    sc->error_state = false;
+}
+
+void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name)
+{
+    for (int n = 0; n < sc->num_exts; n++) {
+        if (strcmp(sc->exts[n], name) == 0)
+            return;
+    }
+    MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name));
+}
+
+#define bstr_xappend0(sc, b, s) bstr_xappend(sc, b, bstr0(s))
+
+void gl_sc_add(struct gl_shader_cache *sc, const char *text)
+{
+    bstr_xappend0(sc, &sc->text, text);
+}
+
+void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...)
+{
+    va_list ap;
+    va_start(ap, textf);
+    bstr_xappend_vasprintf(sc, &sc->text, textf, ap);
+    va_end(ap);
+}
+
+void gl_sc_hadd(struct gl_shader_cache *sc, const char *text)
+{
+    bstr_xappend0(sc, &sc->header_text, text);
+}
+
+void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...)
+{
+    va_list ap;
+    va_start(ap, textf);
+    bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap);
+    va_end(ap);
+}
+
+void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text)
+{
+    bstr_xappend(sc, &sc->header_text, text);
+}
+
+void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
+{
+    va_list ap;
+    va_start(ap, textf);
+    bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap);
+    va_end(ap);
+}
+
+static struct sc_uniform *find_uniform(struct gl_shader_cache *sc,
+                                       const char *name)
+{
+    for (int n = 0; n < sc->num_uniforms; n++) {
+        if (strcmp(sc->uniforms[n].name, name) == 0)
+            return &sc->uniforms[n];
+    }
+    // not found -> add it
+    struct sc_uniform new = {
+        .loc = -1,
+        .name = talloc_strdup(NULL, name),
+    };
+    MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new);
+    return &sc->uniforms[sc->num_uniforms - 1];
+}
+
+static struct sc_buffer *find_buffer(struct gl_shader_cache *sc,
+                                     const char *name)
+{
+    for (int n = 0; n < sc->num_buffers; n++) {
+        if (strcmp(sc->buffers[n].name, name) == 0)
+            return &sc->buffers[n];
+    }
+    // not found -> add it
+    struct sc_buffer new = {
+        .name = talloc_strdup(NULL, name),
+    };
+    MP_TARRAY_APPEND(sc, sc->buffers, sc->num_buffers, new);
+    return &sc->buffers[sc->num_buffers - 1];
+}
+
+const char *mp_sampler_type(GLenum texture_target)
+{
+    switch (texture_target) {
+    case GL_TEXTURE_1D:         return "sampler1D";
+    case GL_TEXTURE_2D:         return "sampler2D";
+    case GL_TEXTURE_RECTANGLE:  return "sample
author	wm4 <wm4@nowhere>	2017-08-04 19:09:46 +0200
committer	wm4 <wm4@nowhere>	2017-08-05 13:09:05 +0200
commit	aac04c0d6496d8847499a94376e85f1711bf31d6 (patch)
tree	bf77bcd2d12729a3706804b29ba5951d173a3226
parent	fa4a1c46759136334646e47c627ddb75e532a658 (diff)
download	mpv-aac04c0d6496d8847499a94376e85f1711bf31d6.tar.bz2 mpv-aac04c0d6496d8847499a94376e85f1711bf31d6.tar.xz