From aac04c0d6496d8847499a94376e85f1711bf31d6 Mon Sep 17 00:00:00 2001 From: wm4 Date: Fri, 4 Aug 2017 19:09:46 +0200 Subject: vo_opengl: split utils.c/h Actually GL-specific parts go into gl_utils.c/h, the shader cache (gl_sc*) into shader_cache.c/h. No semantic changes of any kind, except that the VAO helper is made public again as part of gl_utils.c (all while the goal for gl_utils.c itself is to be included by GL-specific code). --- video/out/opengl/gl_utils.c | 502 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 502 insertions(+) create mode 100644 video/out/opengl/gl_utils.c (limited to 'video/out/opengl/gl_utils.c') diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c new file mode 100644 index 0000000000..c870756b1e --- /dev/null +++ b/video/out/opengl/gl_utils.c @@ -0,0 +1,502 @@ +/* + * This file is part of mpv. + * Parts based on MPlayer code by Reimar Döffinger. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "osdep/io.h" + +#include "common/common.h" +#include "options/path.h" +#include "stream/stream.h" +#include "formats.h" +#include "ra_gl.h" +#include "gl_utils.h" + +// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL) +static const char *gl_error_to_string(GLenum error) +{ + switch (error) { + case GL_INVALID_ENUM: return "INVALID_ENUM"; + case GL_INVALID_VALUE: return "INVALID_VALUE"; + case GL_INVALID_OPERATION: return "INVALID_OPERATION"; + case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION"; + case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY"; + default: return "unknown"; + } +} + +void gl_check_error(GL *gl, struct mp_log *log, const char *info) +{ + for (;;) { + GLenum error = gl->GetError(); + if (error == GL_NO_ERROR) + break; + mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info, + gl_error_to_string(error)); + } +} + +static int get_alignment(int stride) +{ + if (stride % 8 == 0) + return 8; + if (stride % 4 == 0) + return 4; + if (stride % 2 == 0) + return 2; + return 1; +} + +// upload a texture, handling things like stride and slices +// target: texture target, usually GL_TEXTURE_2D +// format, type: texture parameters +// dataptr, stride: image data +// x, y, width, height: part of the image to upload +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h) +{ + int bpp = gl_bytes_per_pixel(format, type); + const uint8_t *data = dataptr; + int y_max = y + h; + if (w <= 0 || h <= 0 || !bpp) + return; + if (stride < 0) { + data += (h - 1) * stride; + stride = -stride; + } + gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride)); + int slice = h; + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) { + // this is not always correct, but should work for MPlayer + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp); + } else { + if (stride != bpp * w) + slice = 1; // very inefficient, but at least it works + } + for (; y + slice <= y_max; y += slice) { + gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data); + data += stride * slice; + } + if (y < y_max) + gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data); + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); +} + +mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h) +{ + if (gl->es) + return NULL; // ES can't read from front buffer + mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, w, h); + if (!image) + return NULL; + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); + GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT; + gl->PixelStorei(GL_PACK_ALIGNMENT, 1); + gl->ReadBuffer(obj); + //flip image while reading (and also avoid stride-related trouble) + for (int y = 0; y < h; y++) { + gl->ReadPixels(0, h - y - 1, w, 1, GL_RGB, GL_UNSIGNED_BYTE, + image->planes[0] + y * image->stride[0]); + } + gl->PixelStorei(GL_PACK_ALIGNMENT, 4); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + return image; +} + +void mp_log_source(struct mp_log *log, int lev, const char *src) +{ + int line = 1; + if (!src) + return; + while (*src) { + const char *end = strchr(src, '\n'); + const char *next = end + 1; + if (!end) + next = end = src + strlen(src); + mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src); + line++; + src = next; + } +} + +static void gl_vao_enable_attribs(struct gl_vao *vao) +{ + GL *gl = vao->gl; + + for (int n = 0; vao->entries[n].name; n++) { + const struct gl_vao_entry *e = &vao->entries[n]; + + gl->EnableVertexAttribArray(n); + gl->VertexAttribPointer(n, e->num_elems, e->type, e->normalized, + vao->stride, (void *)(intptr_t)e->offset); + } +} + +void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, + const struct gl_vao_entry *entries) +{ + assert(!vao->vao); + assert(!vao->buffer); + + *vao = (struct gl_vao){ + .gl = gl, + .stride = stride, + .entries = entries, + }; + + gl->GenBuffers(1, &vao->buffer); + + if (gl->BindVertexArray) { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + + gl->GenVertexArrays(1, &vao->vao); + gl->BindVertexArray(vao->vao); + gl_vao_enable_attribs(vao); + gl->BindVertexArray(0); + + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } +} + +void gl_vao_uninit(struct gl_vao *vao) +{ + GL *gl = vao->gl; + if (!gl) + return; + + if (gl->DeleteVertexArrays) + gl->DeleteVertexArrays(1, &vao->vao); + gl->DeleteBuffers(1, &vao->buffer); + + *vao = (struct gl_vao){0}; +} + +static void gl_vao_bind(struct gl_vao *vao) +{ + GL *gl = vao->gl; + + if (gl->BindVertexArray) { + gl->BindVertexArray(vao->vao); + } else { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + gl_vao_enable_attribs(vao); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } +} + +static void gl_vao_unbind(struct gl_vao *vao) +{ + GL *gl = vao->gl; + + if (gl->BindVertexArray) { + gl->BindVertexArray(0); + } else { + for (int n = 0; vao->entries[n].name; n++) + gl->DisableVertexAttribArray(n); + } +} + +// Draw the vertex data (as described by the gl_vao_entry entries) in ptr +// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES. +// If ptr is NULL, then skip the upload, and use the data uploaded with the +// previous call. +void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num) +{ + GL *gl = vao->gl; + + if (ptr) { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } + + gl_vao_bind(vao); + + gl->DrawArrays(prim, 0, num); + + gl_vao_unbind(vao); +} + +static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id, + GLenum severity, GLsizei length, + const GLchar *message, const void *userParam) +{ + // keep in mind that the debug callback can be asynchronous + struct mp_log *log = (void *)userParam; + int level = MSGL_ERR; + switch (severity) { + case GL_DEBUG_SEVERITY_NOTIFICATION:level = MSGL_V; break; + case GL_DEBUG_SEVERITY_LOW: level = MSGL_INFO; break; + case GL_DEBUG_SEVERITY_MEDIUM: level = MSGL_WARN; break; + case GL_DEBUG_SEVERITY_HIGH: level = MSGL_ERR; break; + } + mp_msg(log, level, "GL: %s\n", message); +} + +void gl_set_debug_logger(GL *gl, struct mp_log *log) +{ + if (gl->DebugMessageCallback) + gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log); +} + +// Maximum number of simultaneous query objects to keep around. Reducing this +// number might cause rendering to block until the result of a previous query is +// available +#define QUERY_OBJECT_NUM 8 + +struct gl_timer { + GL *gl; + GLuint query[QUERY_OBJECT_NUM]; + int query_idx; + + // these numbers are all in nanoseconds + uint64_t samples[PERF_SAMPLE_COUNT]; + int sample_idx; + int sample_count; + + uint64_t avg_sum; + uint64_t peak; +}; + +struct mp_pass_perf gl_timer_measure(struct gl_timer *timer) +{ + assert(timer); + struct mp_pass_perf res = { + .count = timer->sample_count, + .index = (timer->sample_idx - timer->sample_count) % PERF_SAMPLE_COUNT, + .peak = timer->peak, + .samples = timer->samples, + }; + + res.last = timer->samples[(timer->sample_idx - 1) % PERF_SAMPLE_COUNT]; + + if (timer->sample_count > 0) { + res.avg = timer->avg_sum / timer->sample_count; + } + + return res; +} + +struct gl_timer *gl_timer_create(GL *gl) +{ + struct gl_timer *timer = talloc_ptrtype(NULL, timer); + *timer = (struct gl_timer){ .gl = gl }; + + if (gl->GenQueries) + gl->GenQueries(QUERY_OBJECT_NUM, timer->query); + + return timer; +} + +void gl_timer_free(struct gl_timer *timer) +{ + if (!timer) + return; + + GL *gl = timer->gl; + if (gl && gl->DeleteQueries) { + // this is a no-op on already uninitialized queries + gl->DeleteQueries(QUERY_OBJECT_NUM, timer->query); + } + + talloc_free(timer); +} + +static void gl_timer_record(struct gl_timer *timer, GLuint64 new) +{ + // Input res into the buffer and grab the previous value + uint64_t old = timer->samples[timer->sample_idx]; + timer->samples[timer->sample_idx++] = new; + timer->sample_idx %= PERF_SAMPLE_COUNT; + + // Update average and sum + timer->avg_sum = timer->avg_sum + new - old; + timer->sample_count = MPMIN(timer->sample_count + 1, PERF_SAMPLE_COUNT); + + // Update peak if necessary + if (new >= timer->peak) { + timer->peak = new; + } else if (timer->peak == old) { + // It's possible that the last peak was the value we just removed, + // if so we need to scan for the new peak + uint64_t peak = new; + for (int i = 0; i < PERF_SAMPLE_COUNT; i++) + peak = MPMAX(peak, timer->samples[i]); + timer->peak = peak; + } +} + +// If no free query is available, this can block. Shouldn't ever happen in +// practice, though. (If it does, consider increasing QUERY_OBJECT_NUM) +// IMPORTANT: only one gl_timer object may ever be active at a single time. +// The caling code *MUST* ensure this +void gl_timer_start(struct gl_timer *timer) +{ + assert(timer); + GL *gl = timer->gl; + if (!gl->BeginQuery) + return; + + // Get the next query object + GLuint id = timer->query[timer->query_idx++]; + timer->query_idx %= QUERY_OBJECT_NUM; + + // If this query object already holds a result, we need to get and + // record it first + if (gl->IsQuery(id)) { + GLuint64 elapsed; + gl->GetQueryObjectui64v(id, GL_QUERY_RESULT, &elapsed); + gl_timer_record(timer, elapsed); + } + + gl->BeginQuery(GL_TIME_ELAPSED, id); +} + +void gl_timer_stop(GL *gl) +{ + if (gl->EndQuery) + gl->EndQuery(GL_TIME_ELAPSED); +} + +// Upload a texture, going through a PBO. PBO supposedly can facilitate +// asynchronous copy from CPU to GPU, so this is an optimization. Note that +// changing format/type/tex_w/tex_h or reusing the PBO in the same frame can +// ruin performance. +// This call is like gl_upload_tex(), plus PBO management/use. +// target, format, type, dataptr, stride, x, y, w, h: texture upload params +// (see gl_upload_tex()) +// tex_w, tex_h: maximum size of the used texture +// use_pbo: for convenience, if false redirects the call to gl_upload_tex +void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo, + GLenum target, GLenum format, GLenum type, + int tex_w, int tex_h, const void *dataptr, int stride, + int x, int y, int w, int h) +{ + assert(x >= 0 && y >= 0 && w >= 0 && h >= 0); + assert(x + w <= tex_w && y + h <= tex_h); + + if (!use_pbo) { + gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h); + return; + } + + // We align the buffer size to 4096 to avoid possible subregion + // dependencies. This is not a strict requirement (the spec requires no + // alignment), but a good precaution for performance reasons + size_t needed_size = stride * h; + size_t buffer_size = MP_ALIGN_UP(needed_size, 4096); + + if (buffer_size != pbo->buffer_size) + gl_pbo_upload_uninit(pbo); + + if (!pbo->buffer) { + pbo->gl = gl; + pbo->buffer_size = buffer_size; + gl->GenBuffers(1, &pbo->buffer); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer); + // Magic time: Because we memcpy once from RAM to the buffer, and then + // the GPU needs to read from this anyway, we actually *don't* want + // this buffer to be allocated in RAM. If we allocate it in VRAM + // instead, we can reduce this to a single copy: from RAM into VRAM. + // Unfortunately, drivers e.g. nvidia will think GL_STREAM_DRAW is best + // allocated on host memory instead of device memory, so we lie about + // the usage to fool the driver into giving us a buffer in VRAM instead + // of RAM, which can be significantly faster for our use case. + // Seriously, fuck OpenGL. + gl->BufferData(GL_PIXEL_UNPACK_BUFFER, NUM_PBO_BUFFERS * buffer_size, + NULL, GL_STREAM_COPY); + } + + uintptr_t offset = buffer_size * pbo->index; + pbo->index = (pbo->index + 1) % NUM_PBO_BUFFERS; + + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer); + gl->BufferSubData(GL_PIXEL_UNPACK_BUFFER, offset, needed_size, dataptr); + gl_upload_tex(gl, target, format, type, (void *)offset, stride, x, y, w, h); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); +} + +void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo) +{ + if (pbo->gl) + pbo->gl->DeleteBuffers(1, &pbo->buffer); + + *pbo = (struct gl_pbo_upload){0}; +} + +// The intention is to return the actual depth of any fixed point 16 bit +// textures. (Actually tests only 1 format - hope that is good enough.) +int gl_determine_16bit_tex_depth(GL *gl) +{ + const struct gl_format *fmt = gl_find_unorm_format(gl, 2, 1); + if (!gl->GetTexLevelParameteriv || !fmt) { + // ANGLE supports ES 3.0 and the extension, but lacks the function above. + if (gl->mpgl_caps & MPGL_CAP_EXT16) + return 16; + return -1; + } + + GLuint tex; + gl->GenTextures(1, &tex); + gl->BindTexture(GL_TEXTURE_2D, tex); + gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, 64, 64, 0, + fmt->format, fmt->type, NULL); + GLenum pname = 0; + switch (fmt->format) { + case GL_RED: pname = GL_TEXTURE_RED_SIZE; break; + case GL_LUMINANCE: pname = GL_TEXTURE_LUMINANCE_SIZE; break; + } + GLint param = -1; + if (pname) + gl->GetTexLevelParameteriv(GL_TEXTURE_2D, 0, pname, ¶m); + gl->DeleteTextures(1, &tex); + return param; +} + +int gl_get_fb_depth(GL *gl, int fbo) +{ + if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB)) + return -1; + + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); + + GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK; + if (fbo) + obj = GL_COLOR_ATTACHMENT0; + + GLint depth_g = -1; + + gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g); + + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + + return depth_g > 0 ? depth_g : -1; +} -- cgit v1.2.3