summaryrefslogtreecommitdiffstats
path: root/video/out/vo_opengl_old.c
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2013-01-13 23:26:26 +0100
committerwm4 <wm4@nowhere>2013-01-13 23:30:13 +0100
commitf0a08a9fdf3ac9b30089820e61606ace493f4fcd (patch)
treefbea9bb25568b3bcfa254099ad3e11d29777bcee /video/out/vo_opengl_old.c
parent40567656435a8d967ee9b55e7b1ba12d32d77734 (diff)
downloadmpv-f0a08a9fdf3ac9b30089820e61606ace493f4fcd.tar.bz2
mpv-f0a08a9fdf3ac9b30089820e61606ace493f4fcd.tar.xz
gl_common: move things used by vo_opengl_old.c only to vo_opengl_old.c
Having this in gl_common is confusing.
Diffstat (limited to 'video/out/vo_opengl_old.c')
-rw-r--r--video/out/vo_opengl_old.c1241
1 files changed, 1241 insertions, 0 deletions
diff --git a/video/out/vo_opengl_old.c b/video/out/vo_opengl_old.c
index d010daacb9..8c32676e1d 100644
--- a/video/out/vo_opengl_old.c
+++ b/video/out/vo_opengl_old.c
@@ -42,6 +42,7 @@
#include "gl_osd.h"
#include "aspect.h"
#include "video/memcpy_pic.h"
+#include "pnm_loader.h"
//for gl_priv.use_yuv
#define MASK_ALL_YUV (~(1 << YUV_CONVERSION_NONE))
@@ -110,6 +111,1246 @@ struct gl_priv {
unsigned int slice_height;
};
+static int glFindFormat(uint32_t format, int have_texture_rg, int *bpp,
+ GLint *gl_texfmt, GLenum *gl_format, GLenum *gl_type);
+static void glCreateClearTex(GL *gl, GLenum target, GLenum fmt, GLenum format,
+ GLenum type, GLint filter, int w, int h,
+ unsigned char val);
+static int glCreatePPMTex(GL *gl, GLenum target, GLenum fmt, GLint filter,
+ FILE *f, int *width, int *height, int *maxval);
+static void glDrawTex(GL *gl, GLfloat x, GLfloat y, GLfloat w, GLfloat h,
+ GLfloat tx, GLfloat ty, GLfloat tw, GLfloat th,
+ int sx, int sy, int rect_tex, int is_yv12, int flip);
+static int loadGPUProgram(GL *gl, GLenum target, char *prog);
+//! do not use YUV conversion, this should always stay 0
+#define YUV_CONVERSION_NONE 0
+//! use nVidia specific register combiners for YUV conversion
+//! implementation has been removed
+#define YUV_CONVERSION_COMBINERS 1
+//! use a fragment program for YUV conversion
+#define YUV_CONVERSION_FRAGMENT 2
+//! use a fragment program for YUV conversion with gamma using POW
+#define YUV_CONVERSION_FRAGMENT_POW 3
+//! use a fragment program with additional table lookup for YUV conversion
+#define YUV_CONVERSION_FRAGMENT_LOOKUP 4
+//! use ATI specific register combiners ("fragment program")
+#define YUV_CONVERSION_COMBINERS_ATI 5
+//! use a fragment program with 3D table lookup for YUV conversion
+#define YUV_CONVERSION_FRAGMENT_LOOKUP3D 6
+//! use ATI specific "text" register combiners ("fragment program")
+#define YUV_CONVERSION_TEXT_FRAGMENT 7
+//! use normal bilinear scaling for textures
+#define YUV_SCALER_BILIN 0
+//! use higher quality bicubic scaling for textures
+#define YUV_SCALER_BICUB 1
+//! use cubic scaling in X and normal linear scaling in Y direction
+#define YUV_SCALER_BICUB_X 2
+//! use cubic scaling without additional lookup texture
+#define YUV_SCALER_BICUB_NOTEX 3
+#define YUV_SCALER_UNSHARP 4
+#define YUV_SCALER_UNSHARP2 5
+//! mask for conversion type
+#define YUV_CONVERSION_MASK 0xF
+//! mask for scaler type
+#define YUV_SCALER_MASK 0xF
+//! shift value for luminance scaler type
+#define YUV_LUM_SCALER_SHIFT 8
+//! shift value for chrominance scaler type
+#define YUV_CHROM_SCALER_SHIFT 12
+//! extract conversion out of type
+#define YUV_CONVERSION(t) ((t) & YUV_CONVERSION_MASK)
+//! extract luminance scaler out of type
+#define YUV_LUM_SCALER(t) (((t) >> YUV_LUM_SCALER_SHIFT) & YUV_SCALER_MASK)
+//! extract chrominance scaler out of type
+#define YUV_CHROM_SCALER(t) (((t) >> YUV_CHROM_SCALER_SHIFT) & YUV_SCALER_MASK)
+#define SET_YUV_CONVERSION(c) ((c) & YUV_CONVERSION_MASK)
+#define SET_YUV_LUM_SCALER(s) (((s) & YUV_SCALER_MASK) << YUV_LUM_SCALER_SHIFT)
+#define SET_YUV_CHROM_SCALER(s) (((s) & YUV_SCALER_MASK) << YUV_CHROM_SCALER_SHIFT)
+//! returns whether the yuv conversion supports large brightness range etc.
+static inline int glYUVLargeRange(int conv)
+{
+ switch (conv) {
+ case YUV_CONVERSION_NONE:
+ case YUV_CONVERSION_COMBINERS_ATI:
+ case YUV_CONVERSION_FRAGMENT_LOOKUP3D:
+ case YUV_CONVERSION_TEXT_FRAGMENT:
+ return 0;
+ }
+ return 1;
+}
+typedef struct {
+ GLenum target;
+ int type;
+ struct mp_csp_params csp_params;
+ int texw;
+ int texh;
+ int chrom_texw;
+ int chrom_texh;
+ float filter_strength;
+ float noise_strength;
+} gl_conversion_params_t;
+
+static int glAutodetectYUVConversion(GL *gl);
+static void glSetupYUVConversion(GL *gl, gl_conversion_params_t *params);
+static void glEnableYUVConversion(GL *gl, GLenum target, int type);
+static void glDisableYUVConversion(GL *gl, GLenum target, int type);
+
+//! always return this format as internal texture format in glFindFormat
+#define TEXTUREFORMAT_ALWAYS GL_RGB8
+#undef TEXTUREFORMAT_ALWAYS
+
+/**
+ * \brief find the OpenGL settings coresponding to format.
+ *
+ * All parameters may be NULL.
+ * \param fmt MPlayer format to analyze.
+ * \param dummy reserved
+ * \param gl_texfmt [OUT] internal texture format that fits the
+ * image format, not necessarily the best for performance.
+ * \param gl_format [OUT] OpenGL format for this image format.
+ * \param gl_type [OUT] OpenGL type for this image format.
+ * \return 1 if format is supported by OpenGL, 0 if not.
+ * \ingroup gltexture
+ */
+static int glFindFormat(uint32_t fmt, int have_texture_rg, int *dummy,
+ GLint *gl_texfmt, GLenum *gl_format, GLenum *gl_type)
+{
+ int supported = 1;
+ GLenum dummy2;
+ GLint dummy3;
+ if (!gl_texfmt)
+ gl_texfmt = &dummy3;
+ if (!gl_format)
+ gl_format = &dummy2;
+ if (!gl_type)
+ gl_type = &dummy2;
+
+ struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(fmt);
+ if (desc.flags & MP_IMGFLAG_YUV_P) {
+ // reduce the possible cases a bit
+ if (desc.plane_bits > 8)
+ fmt = IMGFMT_420P16;
+ else
+ fmt = IMGFMT_420P;
+ }
+
+ *gl_texfmt = 3;
+ switch (fmt) {
+ case IMGFMT_RGB48:
+ *gl_format = GL_RGB;
+ *gl_type = GL_UNSIGNED_SHORT;
+ break;
+ case IMGFMT_RGB24:
+ *gl_format = GL_RGB;
+ *gl_type = GL_UNSIGNED_BYTE;
+ break;
+ case IMGFMT_RGBA:
+ *gl_texfmt = 4;
+ *gl_format = GL_RGBA;
+ *gl_type = GL_UNSIGNED_BYTE;
+ break;
+ case IMGFMT_420P16:
+ supported = 0; // no native YUV support
+ *gl_texfmt = have_texture_rg ? GL_R16 : GL_LUMINANCE16;
+ *gl_format = have_texture_rg ? GL_RED : GL_LUMINANCE;
+ *gl_type = GL_UNSIGNED_SHORT;
+ break;
+ case IMGFMT_420P:
+ supported = 0; // no native YV12 support
+ case IMGFMT_Y8:
+ *gl_texfmt = 1;
+ *gl_format = GL_LUMINANCE;
+ *gl_type = GL_UNSIGNED_BYTE;
+ break;
+ case IMGFMT_UYVY:
+ *gl_texfmt = GL_YCBCR_MESA;
+ *gl_format = GL_YCBCR_MESA;
+ *gl_type = fmt == IMGFMT_UYVY ? GL_UNSIGNED_SHORT_8_8 : GL_UNSIGNED_SHORT_8_8_REV;
+ break;
+#if 0
+ // we do not support palettized formats, although the format the
+ // swscale produces works
+ case IMGFMT_RGB8:
+ *gl_format = GL_RGB;
+ *gl_type = GL_UNSIGNED_BYTE_2_3_3_REV;
+ break;
+#endif
+ case IMGFMT_RGB15:
+ *gl_format = GL_RGBA;
+ *gl_type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
+ break;
+ case IMGFMT_RGB16:
+ *gl_format = GL_RGB;
+ *gl_type = GL_UNSIGNED_SHORT_5_6_5_REV;
+ break;
+#if 0
+ case IMGFMT_BGR8:
+ // special case as red and blue have a different number of bits.
+ // GL_BGR and GL_UNSIGNED_BYTE_3_3_2 isn't supported at least
+ // by nVidia drivers, and in addition would give more bits to
+ // blue than to red, which isn't wanted
+ *gl_format = GL_RGB;
+ *gl_type = GL_UNSIGNED_BYTE_3_3_2;
+ break;
+#endif
+ case IMGFMT_BGR15:
+ *gl_format = GL_BGRA;
+ *gl_type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
+ break;
+ case IMGFMT_BGR16:
+ *gl_format = GL_RGB;
+ *gl_type = GL_UNSIGNED_SHORT_5_6_5;
+ break;
+ case IMGFMT_BGR24:
+ *gl_format = GL_BGR;
+ *gl_type = GL_UNSIGNED_BYTE;
+ break;
+ case IMGFMT_BGRA:
+ *gl_texfmt = 4;
+ *gl_format = GL_BGRA;
+ *gl_type = GL_UNSIGNED_BYTE;
+ break;
+ default:
+ *gl_texfmt = 4;
+ *gl_format = GL_RGBA;
+ *gl_type = GL_UNSIGNED_BYTE;
+ supported = 0;
+ }
+#ifdef TEXTUREFORMAT_ALWAYS
+ *gl_texfmt = TEXTUREFORMAT_ALWAYS;
+#endif
+ return supported;
+}
+
+/**
+ * \brief create a texture and set some defaults
+ * \param target texture taget, usually GL_TEXTURE_2D
+ * \param fmt internal texture format
+ * \param format texture host data format
+ * \param type texture host data type
+ * \param filter filter used for scaling, e.g. GL_LINEAR
+ * \param w texture width
+ * \param h texture height
+ * \param val luminance value to fill texture with
+ * \ingroup gltexture
+ */
+static void glCreateClearTex(GL *gl, GLenum target, GLenum fmt, GLenum format,
+ GLenum type, GLint filter, int w, int h,
+ unsigned char val)
+{
+ GLfloat fval = (GLfloat)val / 255.0;
+ GLfloat border[4] = {
+ fval, fval, fval, fval
+ };
+ int stride;
+ char *init;
+ if (w == 0)
+ w = 1;
+ if (h == 0)
+ h = 1;
+ stride = w * glFmt2bpp(format, type);
+ if (!stride)
+ return;
+ init = malloc(stride * h);
+ memset(init, val, stride * h);
+ glAdjustAlignment(gl, stride);
+ gl->PixelStorei(GL_UNPACK_ROW_LENGTH, w);
+ gl->TexImage2D(target, 0, fmt, w, h, 0, format, type, init);
+ gl->TexParameterf(target, GL_TEXTURE_PRIORITY, 1.0);
+ gl->TexParameteri(target, GL_TEXTURE_MIN_FILTER, filter);
+ gl->TexParameteri(target, GL_TEXTURE_MAG_FILTER, filter);
+ gl->TexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ gl->TexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ // Border texels should not be used with CLAMP_TO_EDGE
+ // We set a sane default anyway.
+ gl->TexParameterfv(target, GL_TEXTURE_BORDER_COLOR, border);
+ free(init);
+}
+
+static GLint detect_hqtexfmt(GL *gl)
+{
+ const char *extensions = (const char *)gl->GetString(GL_EXTENSIONS);
+ if (strstr(extensions, "_texture_float"))
+ return GL_RGB32F;
+ else if (strstr(extensions, "NV_float_buffer"))
+ return GL_FLOAT_RGB32_NV;
+ return GL_RGB16;
+}
+
+/**
+ * \brief creates a texture from a PPM file
+ * \param target texture taget, usually GL_TEXTURE_2D
+ * \param fmt internal texture format, 0 for default
+ * \param filter filter used for scaling, e.g. GL_LINEAR
+ * \param f file to read PPM from
+ * \param width [out] width of texture
+ * \param height [out] height of texture
+ * \param maxval [out] maxval value from PPM file
+ * \return 0 on error, 1 otherwise
+ * \ingroup gltexture
+ */
+static int glCreatePPMTex(GL *gl, GLenum target, GLenum fmt, GLint filter,
+ FILE *f, int *width, int *height, int *maxval)
+{
+ int w, h, m, bpp;
+ GLenum type;
+ uint8_t *data = read_pnm(f, &w, &h, &bpp, &m);
+ GLint hqtexfmt = detect_hqtexfmt(gl);
+ if (!data || (bpp != 3 && bpp != 6)) {
+ free(data);
+ return 0;
+ }
+ if (!fmt) {
+ fmt = bpp == 6 ? hqtexfmt : 3;
+ if (fmt == GL_FLOAT_RGB32_NV && target != GL_TEXTURE_RECTANGLE)
+ fmt = GL_RGB16;
+ }
+ type = bpp == 6 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE;
+ glCreateClearTex(gl, target, fmt, GL_RGB, type, filter, w, h, 0);
+ glUploadTex(gl, target, GL_RGB, type,
+ data, w * bpp, 0, 0, w, h, 0);
+ free(data);
+ if (width)
+ *width = w;
+ if (height)
+ *height = h;
+ if (maxval)
+ *maxval = m;
+ return 1;
+}
+
+
+/**
+ * \brief Setup ATI version of register combiners for YUV to RGB conversion.
+ * \param csp_params parameters used for colorspace conversion
+ * \param text if set use the GL_ATI_text_fragment_shader API as
+ * used on OS X.
+ */
+static void glSetupYUVFragmentATI(GL *gl, struct mp_csp_params *csp_params,
+ int text)
+{
+ GLint i;
+ float yuv2rgb[3][4];
+
+ gl->GetIntegerv(GL_MAX_TEXTURE_UNITS, &i);
+ if (i < 3)
+ mp_msg(MSGT_VO, MSGL_ERR,
+ "[gl] 3 texture units needed for YUV combiner (ATI) support (found %i)\n", i);
+
+ mp_get_yuv2rgb_coeffs(csp_params, yuv2rgb);
+ for (i = 0; i < 3; i++) {
+ int j;
+ yuv2rgb[i][3] -= -0.5 * (yuv2rgb[i][1] + yuv2rgb[i][2]);
+ for (j = 0; j < 4; j++) {
+ yuv2rgb[i][j] *= 0.125;
+ yuv2rgb[i][j] += 0.5;
+ if (yuv2rgb[i][j] > 1)
+ yuv2rgb[i][j] = 1;
+ if (yuv2rgb[i][j] < 0)
+ yuv2rgb[i][j] = 0;
+ }
+ }
+ if (text == 0) {
+ GLfloat c0[4] = { yuv2rgb[0][0], yuv2rgb[1][0], yuv2rgb[2][0] };
+ GLfloat c1[4] = { yuv2rgb[0][1], yuv2rgb[1][1], yuv2rgb[2][1] };
+ GLfloat c2[4] = { yuv2rgb[0][2], yuv2rgb[1][2], yuv2rgb[2][2] };
+ GLfloat c3[4] = { yuv2rgb[0][3], yuv2rgb[1][3], yuv2rgb[2][3] };
+ if (!gl->BeginFragmentShader || !gl->EndFragmentShader ||
+ !gl->SetFragmentShaderConstant || !gl->SampleMap ||
+ !gl->ColorFragmentOp2 || !gl->ColorFragmentOp3) {
+ mp_msg(MSGT_VO, MSGL_FATAL, "[gl] Combiner (ATI) functions missing!\n");
+ return;
+ }
+ gl->GetIntegerv(GL_NUM_FRAGMENT_REGISTERS_ATI, &i);
+ if (i < 3)
+ mp_msg(MSGT_VO, MSGL_ERR,
+ "[gl] 3 registers needed for YUV combiner (ATI) support (found %i)\n", i);
+ gl->BeginFragmentShader();
+ gl->SetFragmentShaderConstant(GL_CON_0_ATI, c0);
+ gl->SetFragmentShaderConstant(GL_CON_1_ATI, c1);
+ gl->SetFragmentShaderConstant(GL_CON_2_ATI, c2);
+ gl->SetFragmentShaderConstant(GL_CON_3_ATI, c3);
+ gl->SampleMap(GL_REG_0_ATI, GL_TEXTURE0, GL_SWIZZLE_STR_ATI);
+ gl->SampleMap(GL_REG_1_ATI, GL_TEXTURE1, GL_SWIZZLE_STR_ATI);
+ gl->SampleMap(GL_REG_2_ATI, GL_TEXTURE2, GL_SWIZZLE_STR_ATI);
+ gl->ColorFragmentOp2(GL_MUL_ATI, GL_REG_1_ATI, GL_NONE, GL_NONE,
+ GL_REG_1_ATI, GL_NONE, GL_BIAS_BIT_ATI,
+ GL_CON_1_ATI, GL_NONE, GL_BIAS_BIT_ATI);
+ gl->ColorFragmentOp3(GL_MAD_ATI, GL_REG_2_ATI, GL_NONE, GL_NONE,
+ GL_REG_2_ATI, GL_NONE, GL_BIAS_BIT_ATI,
+ GL_CON_2_ATI, GL_NONE, GL_BIAS_BIT_ATI,
+ GL_REG_1_ATI, GL_NONE, GL_NONE);
+ gl->ColorFragmentOp3(GL_MAD_ATI, GL_REG_0_ATI, GL_NONE, GL_NONE,
+ GL_REG_0_ATI, GL_NONE, GL_NONE,
+ GL_CON_0_ATI, GL_NONE, GL_BIAS_BIT_ATI,
+ GL_REG_2_ATI, GL_NONE, GL_NONE);
+ gl->ColorFragmentOp2(GL_ADD_ATI, GL_REG_0_ATI, GL_NONE, GL_8X_BIT_ATI,
+ GL_REG_0_ATI, GL_NONE, GL_NONE,
+ GL_CON_3_ATI, GL_NONE, GL_BIAS_BIT_ATI);
+ gl->EndFragmentShader();
+ } else {
+ static const char template[] =
+ "!!ATIfs1.0\n"
+ "StartConstants;\n"
+ " CONSTANT c0 = {%e, %e, %e};\n"
+ " CONSTANT c1 = {%e, %e, %e};\n"
+ " CONSTANT c2 = {%e, %e, %e};\n"
+ " CONSTANT c3 = {%e, %e, %e};\n"
+ "EndConstants;\n"
+ "StartOutputPass;\n"
+ " SampleMap r0, t0.str;\n"
+ " SampleMap r1, t1.str;\n"
+ " SampleMap r2, t2.str;\n"
+ " MUL r1.rgb, r1.bias, c1.bias;\n"
+ " MAD r2.rgb, r2.bias, c2.bias, r1;\n"
+ " MAD r0.rgb, r0, c0.bias, r2;\n"
+ " ADD r0.rgb.8x, r0, c3.bias;\n"
+ "EndPass;\n";
+ char buffer[512];
+ snprintf(buffer, sizeof(buffer), template,
+ yuv2rgb[0][0], yuv2rgb[1][0], yuv2rgb[2][0],
+ yuv2rgb[0][1], yuv2rgb[1][1], yuv2rgb[2][1],
+ yuv2rgb[0][2], yuv2rgb[1][2], yuv2rgb[2][2],
+ yuv2rgb[0][3], yuv2rgb[1][3], yuv2rgb[2][3]);
+ mp_msg(MSGT_VO, MSGL_DBG2, "[gl] generated fragment program:\n%s\n",
+ buffer);
+ loadGPUProgram(gl, GL_TEXT_FRAGMENT_SHADER_ATI, buffer);
+ }
+}
+
+// Replace all occurances of variables named "$"+name (e.g. $foo) in *text with
+// replace, and return the result. *text must have been allocated with talloc.
+static void replace_var_str(char **text, const char *name, const char *replace)
+{
+ size_t namelen = strlen(name);
+ char *nextvar = *text;
+ void *parent = talloc_parent(*text);
+ for (;;) {
+ nextvar = strchr(nextvar, '$');
+ if (!nextvar)
+ break;
+ char *until = nextvar;
+ nextvar++;
+ if (strncmp(nextvar, name, namelen) != 0)
+ continue;
+ nextvar += namelen;
+ // try not to replace prefixes of other vars (e.g. $foo vs. $foo_bar)
+ char term = nextvar[0];
+ if (isalnum(term) || term == '_')
+ continue;
+ int prelength = until - *text;
+ int postlength = nextvar - *text;
+ char *n = talloc_asprintf(parent, "%.*s%s%s", prelength, *text, replace,
+ nextvar);
+ talloc_free(*text);
+ *text = n;
+ nextvar = *text + postlength;
+ }
+}
+
+static void replace_var_float(char **text, const char *name, float replace)
+{
+ char *s = talloc_asprintf(NULL, "%e", replace);
+ replace_var_str(text, name, s);
+ talloc_free(s);
+}
+
+static void replace_var_char(char **text, const char *name, char replace)
+{
+ char s[2] = { replace, '\0' };
+ replace_var_str(text, name, s);
+}
+
+// Append template to *text. Possibly initialize *text if it's NULL.
+static void append_template(char **text, const char* template)
+{
+ if (!text)
+ *text = talloc_strdup(NULL, template);
+ else
+ *text = talloc_strdup_append(*text, template);
+}
+
+/**
+ * \brief helper function for gen_spline_lookup_tex
+ * \param x subpixel-position ((0,1) range) to calculate weights for
+ * \param dst where to store transformed weights, must provide space for 4 GLfloats
+ *
+ * calculates the weights and stores them after appropriate transformation
+ * for the scaler fragment program.
+ */
+static void store_weights(float x, GLfloat *dst)
+{
+ float w0 = (((-1 * x + 3) * x - 3) * x + 1) / 6;
+ float w1 = (((3 * x - 6) * x + 0) * x + 4) / 6;
+ float w2 = (((-3 * x + 3) * x + 3) * x + 1) / 6;
+ float w3 = (((1 * x + 0) * x + 0) * x + 0) / 6;
+ *dst++ = 1 + x - w1 / (w0 + w1);
+ *dst++ = 1 - x + w3 / (w2 + w3);
+ *dst++ = w0 + w1;
+ *dst++ = 0;
+}
+
+//! to avoid artefacts this should be rather large
+#define LOOKUP_BSPLINE_RES (2 * 1024)
+/**
+ * \brief creates the 1D lookup texture needed for fast higher-order filtering
+ * \param unit texture unit to attach texture to
+ */
+static void gen_spline_lookup_tex(GL *gl, GLenum unit)
+{
+ GLfloat *tex = calloc(4 * LOOKUP_BSPLINE_RES, sizeof(*tex));
+ GLfloat *tp = tex;
+ int i;
+ for (i = 0; i < LOOKUP_BSPLINE_RES; i++) {
+ float x = (float)(i + 0.5) / LOOKUP_BSPLINE_RES;
+ store_weights(x, tp);
+ tp += 4;
+ }
+ store_weights(0, tex);
+ store_weights(1, &tex[4 * (LOOKUP_BSPLINE_RES - 1)]);
+ gl->ActiveTexture(unit);
+ gl->TexImage1D(GL_TEXTURE_1D, 0, GL_RGBA16, LOOKUP_BSPLINE_RES, 0, GL_RGBA,
+ GL_FLOAT, tex);
+ gl->TexParameterf(GL_TEXTURE_1D, GL_TEXTURE_PRIORITY, 1.0);
+ gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+ gl->ActiveTexture(GL_TEXTURE0);
+ free(tex);
+}
+
+#define NOISE_RES 2048
+
+/**
+ * \brief creates the 1D lookup texture needed to generate pseudo-random numbers.
+ * \param unit texture unit to attach texture to
+ */
+static void gen_noise_lookup_tex(GL *gl, GLenum unit) {
+ GLfloat *tex = calloc(NOISE_RES, sizeof(*tex));
+ uint32_t lcg = 0x79381c11;
+ int i;
+ for (i = 0; i < NOISE_RES; i++)
+ tex[i] = (double)i / (NOISE_RES - 1);
+ for (i = 0; i < NOISE_RES - 1; i++) {
+ int remain = NOISE_RES - i;
+ int idx = i + (lcg >> 16) % remain;
+ GLfloat tmp = tex[i];
+ tex[i] = tex[idx];
+ tex[idx] = tmp;
+ lcg = lcg * 1664525 + 1013904223;
+ }
+ gl->ActiveTexture(unit);
+ gl->TexImage1D(GL_TEXTURE_1D, 0, 1, NOISE_RES, 0, GL_RED, GL_FLOAT, tex);
+ gl->TexParameterf(GL_TEXTURE_1D, GL_TEXTURE_PRIORITY, 1.0);
+ gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+ gl->ActiveTexture(GL_TEXTURE0);
+ free(tex);
+}
+
+#define SAMPLE(dest, coord, texture) \
+ "TEX textemp, " coord ", " texture ", $tex_type;\n" \
+ "MOV " dest ", textemp.r;\n"
+
+static const char bilin_filt_template[] =
+ SAMPLE("yuv.$out_comp","fragment.texcoord[$in_tex]","texture[$in_tex]");
+
+#define BICUB_FILT_MAIN \
+ /* first y-interpolation */ \
+ "ADD coord, fragment.texcoord[$in_tex].xyxy, cdelta.xyxw;\n" \
+ "ADD coord2, fragment.texcoord[$in_tex].xyxy, cdelta.zyzw;\n" \
+ SAMPLE("a.r","coord.xyxy","texture[$in_tex]") \
+ SAMPLE("a.g","coord.zwzw","texture[$in_tex]") \
+ /* second y-interpolation */ \
+ SAMPLE("b.r","coord2.xyxy","texture[$in_tex]") \
+ SAMPLE("b.g","coord2.zwzw","texture[$in_tex]") \
+ "LRP a.b, parmy.b, a.rrrr, a.gggg;\n" \
+ "LRP a.a, parmy.b, b.rrrr, b.gggg;\n" \
+ /* x-interpolation */ \
+ "LRP yuv.$out_comp, parmx.b, a.bbbb, a.aaaa;\n"
+
+static const char bicub_filt_template_2D[] =
+ "MAD coord.xy, fragment.texcoord[$in_tex], {$texw, $texh}, {0.5, 0.5};\n"
+ "TEX parmx, coord.x, texture[$texs], 1D;\n"
+ "MUL cdelta.xz, parmx.rrgg, {-$ptw, 0, $ptw, 0};\n"
+ "TEX parmy, coord.y, texture[$texs], 1D;\n"
+ "MUL cdelta.yw, parmy.rrgg, {0, -$pth, 0, $pth};\n"
+ BICUB_FILT_MAIN;
+
+static const char bicub_filt_template_RECT[] =
+ "ADD coord, fragment.texcoord[$in_tex], {0.5, 0.5};\n"
+ "TEX parmx, coord.x, texture[$texs], 1D;\n"
+ "MUL cdelta.xz, parmx.rrgg, {-1, 0, 1, 0};\n"
+ "TEX parmy, coord.y, texture[$texs], 1D;\n"
+ "MUL cdelta.yw, parmy.rrgg, {0, -1, 0, 1};\n"
+ BICUB_FILT_MAIN;
+
+#define CALCWEIGHTS(t, s) \
+ "MAD "t ", {-0.5, 0.1666, 0.3333, -0.3333}, "s ", {1, 0, -0.5, 0.5};\n" \
+ "MAD "t ", "t ", "s ", {0, 0, -0.5, 0.5};\n" \
+ "MAD "t ", "t ", "s ", {-0.6666, 0, 0.8333, 0.1666};\n" \
+ "RCP a.x, "t ".z;\n" \
+ "RCP a.y, "t ".w;\n" \
+ "MAD "t ".xy, "t ".xyxy, a.xyxy, {1, 1, 0, 0};\n" \
+ "ADD "t ".x, "t ".xxxx, "s ";\n" \
+ "SUB "t ".y, "t ".yyyy, "s ";\n"
+
+static const char bicub_notex_filt_template_2D[] =
+ "MAD coord.xy, fragment.texcoord[$in_tex], {$texw, $texh}, {0.5, 0.5};\n"
+ "FRC coord.xy, coord.xyxy;\n"
+ CALCWEIGHTS("parmx", "coord.xxxx")
+ "MUL cdelta.xz, parmx.rrgg, {-$ptw, 0, $ptw, 0};\n"
+ CALCWEIGHTS("parmy", "coord.yyyy")
+ "MUL cdelta.yw, parmy.rrgg, {0, -$pth, 0, $pth};\n"
+ BICUB_FILT_MAIN;
+
+static const char bicub_notex_filt_template_RECT[] =
+ "ADD coord, fragment.texcoord[$in_tex], {0.5, 0.5};\n"
+ "FRC coord.xy, coord.xyxy;\n"
+ CALCWEIGHTS("parmx", "coord.xxxx")
+ "MUL cdelta.xz, parmx.rrgg, {-1, 0, 1, 0};\n"
+ CALCWEIGHTS("parmy", "coord.yyyy")
+ "MUL cdelta.yw, parmy.rrgg, {0, -1, 0, 1};\n"
+ BICUB_FILT_MAIN;
+
+#define BICUB_X_FILT_MAIN \
+ "ADD coord.xy, fragment.texcoord[$in_tex].xyxy, cdelta.xyxy;\n" \
+ "ADD coord2.xy, fragment.texcoord[$in_tex].xyxy, cdelta.zyzy;\n" \
+ SAMPLE("a.r","coord","texture[$in_tex]") \
+ SAMPLE("b.r","coord2","texture[$in_tex]") \
+ /* x-interpolation */ \
+ "LRP yuv.$out_comp, parmx.b, a.rrrr, b.rrrr;\n"
+
+static const char bicub_x_filt_template_2D[] =
+ "MAD coord.x, fragment.texcoord[$in_tex], {$texw}, {0.5};\n"
+ "TEX parmx, coord, texture[$texs], 1D;\n"
+ "MUL cdelta.xyz, parmx.rrgg, {-$ptw, 0, $ptw};\n"
+ BICUB_X_FILT_MAIN;
+
+static const char bicub_x_filt_template_RECT[] =
+ "ADD coord.x, fragment.texcoord[$in_tex], {0.5};\n"
+ "TEX parmx, coord, texture[$texs], 1D;\n"
+ "MUL cdelta.xyz, parmx.rrgg, {-1, 0, 1};\n"
+ BICUB_X_FILT_MAIN;
+
+static const char unsharp_filt_template[] =
+ "PARAM dcoord$out_comp = {$ptw_05, $pth_05, $ptw_05, -$pth_05};\n"
+ "ADD coord, fragment.texcoord[$in_tex].xyxy, dcoord$out_comp;\n"
+ "SUB coord2, fragment.texcoord[$in_tex].xyxy, dcoord$out_comp;\n"
+ SAMPLE("a.r","fragment.texcoord[$in_tex]","texture[$in_tex]")
+ SAMPLE("b.r","coord.xyxy","texture[$in_tex]")
+ SAMPLE("b.g","coord.zwzw","texture[$in_tex]")
+ "ADD b.r, b.r, b.g;\n"
+ SAMPLE("b.b","coord2.xyxy","texture[$in_tex]")
+ SAMPLE("b.g","coord2.zwzw","texture[$in_tex]")
+ "DP3 b, b, {0.25, 0.25, 0.25};\n"
+ "SUB b.r, a.r, b.r;\n"
+ "MAD textemp.r, b.r, {$strength}, a.r;\n"
+ "MOV yuv.$out_comp, textemp.r;\n";
+
+static const char unsharp_filt_template2[] =
+ "PARAM dcoord$out_comp = {$ptw_12, $pth_12, $ptw_12, -$pth_12};\n"
+ "PARAM dcoord2$out_comp = {$ptw_15, 0, 0, $pth_15};\n"
+ "ADD coord, fragment.texcoord[$in_tex].xyxy, dcoord$out_comp;\n"
+ "SUB coord2, fragment.texcoord[$in_tex].xyxy, dcoord$out_comp;\n"
+ SAMPLE("a.r","fragment.texcoord[$in_tex]","texture[$in_tex]")
+ SAMPLE("b.r","coord.xyxy","texture[$in_tex]")
+ SAMPLE("b.g","coord.zwzw","texture[$in_tex]")
+ "ADD b.r, b.r, b.g;\n"
+ SAMPLE("b.b","coord2.xyxy","texture[$in_tex]")
+ SAMPLE("b.g","coord2.zwzw","texture[$in_tex]")
+ "ADD b.r, b.r, b.b;\n"
+ "ADD b.a, b.r, b.g;\n"
+ "ADD coord, fragment.texcoord[$in_tex].xyxy, dcoord2$out_comp;\n"
+ "SUB coord2, fragment.texcoord[$in_tex].xyxy, dcoord2$out_comp;\n"
+ SAMPLE("b.r","coord.xyxy","texture[$in_tex]")
+ SAMPLE("b.g","coord.zwzw","texture[$in_tex]")
+ "ADD b.r, b.r, b.g;\n"
+ SAMPLE("b.b","coord2.xyxy","texture[$in_tex]")
+ SAMPLE("b.g","coord2.zwzw","texture[$in_tex]")
+ "DP4 b.r, b, {-0.1171875, -0.1171875, -0.1171875, -0.09765625};\n"
+ "MAD b.r, a.r, {0.859375}, b.r;\n"
+ "MAD textemp.r, b.r, {$strength}, a.r;\n"
+ "MOV yuv.$out_comp, textemp.r;\n";
+
+static const char yuv_prog_template[] =
+ "PARAM ycoef = {$cm11, $cm21, $cm31};\n"
+ "PARAM ucoef = {$cm12, $cm22, $cm32};\n"
+ "PARAM vcoef = {$cm13, $cm23, $cm33};\n"
+ "PARAM offsets = {$cm14, $cm24, $cm34};\n"
+ "TEMP res;\n"
+ "MAD res.rgb, yuv.rrrr, ycoef, offsets;\n"
+ "MAD res.rgb, yuv.gggg, ucoef, res;\n"
+ "MAD res.rgb, yuv.bbbb, vcoef, res;\n";
+
+static const char yuv_pow_prog_template[] =
+ "PARAM ycoef = {$cm11, $cm21, $cm31};\n"
+ "PARAM ucoef = {$cm12, $cm22, $cm32};\n"
+ "PARAM vcoef = {$cm13, $cm23, $cm33};\n"
+ "PARAM offsets = {$cm14, $cm24, $cm34};\n"
+ "PARAM gamma = {$gamma_r, $gamma_g, $gamma_b};\n"
+ "TEMP res;\n"
+ "MAD res.rgb, yuv.rrrr, ycoef, offsets;\n"
+ "MAD res.rgb, yuv.gggg, ucoef, res;\n"
+ "MAD_SAT res.rgb, yuv.bbbb, vcoef, res;\n"
+ "POW res.r, res.r, gamma.r;\n"
+ "POW res.g, res.g, gamma.g;\n"
+ "POW res.b, res.b, gamma.b;\n";
+
+static const char yuv_lookup_prog_template[] =
+ "PARAM ycoef = {$cm11, $cm21, $cm31, 0};\n"
+ "PARAM ucoef = {$cm12, $cm22, $cm32, 0};\n"
+ "PARAM vcoef = {$cm13, $cm23, $cm33, 0};\n"
+ "PARAM offsets = {$cm14, $cm24, $cm34, 0.125};\n"
+ "TEMP res;\n"
+ "MAD res, yuv.rrrr, ycoef, offsets;\n"
+ "MAD res.rgb, yuv.gggg, ucoef, res;\n"
+ "MAD res.rgb, yuv.bbbb, vcoef, res;\n"
+ "TEX res.r, res.raaa, texture[$conv_tex0], 2D;\n"
+ "ADD res.a, res.a, 0.25;\n"
+ "TEX res.g, res.gaaa, texture[$conv_tex0], 2D;\n"
+ "ADD res.a, res.a, 0.25;\n"
+ "TEX res.b, res.baaa, texture[$conv_tex0], 2D;\n";
+
+static const char yuv_lookup3d_prog_template[] =
+ "TEMP res;\n"
+ "TEX res, yuv, texture[$conv_tex0], 3D;\n";
+
+static const char noise_filt_template[] =
+ "MUL coord.xy, fragment.texcoord[0], {$noise_sx, $noise_sy};\n"
+ "TEMP rand;\n"
+ "TEX rand.r, coord.x, texture[$noise_filt_tex], 1D;\n"
+ "ADD rand.r, rand.r, coord.y;\n"
+ "TEX rand.r, rand.r, texture[$noise_filt_tex], 1D;\n"
+ "MAD res.rgb, rand.rrrr, {$noise_str, $noise_str, $noise_str}, res;\n";
+
+/**
+ * \brief creates and initializes helper textures needed for scaling texture read
+ * \param scaler scaler type to create texture for
+ * \param texu contains next free texture unit number
+ * \param texs texture unit ids for the scaler are stored in this array
+ */
+static void create_scaler_textures(GL *gl, int scaler, int *texu, char *texs)
+{
+ switch (scaler) {
+ case YUV_SCALER_BILIN:
+ case YUV_SCALER_BICUB_NOTEX:
+ case YUV_SCALER_UNSHARP:
+ case YUV_SCALER_UNSHARP2:
+ break;
+ case YUV_SCALER_BICUB:
+ case YUV_SCALER_BICUB_X:
+ texs[0] = (*texu)++;
+ gen_spline_lookup_tex(gl, GL_TEXTURE0 + texs[0]);
+ texs[0] += '0';
+ break;
+ default:
+ mp_msg(MSGT_VO, MSGL_ERR, "[gl] unknown scaler type %i\n", scaler);
+ }
+}
+
+//! resolution of texture for gamma lookup table
+#define LOOKUP_RES 512
+//! resolution for 3D yuv->rgb conversion lookup table
+#define LOOKUP_3DRES 32
+/**
+ * \brief creates and initializes helper textures needed for yuv conversion
+ * \param params struct containing parameters like brightness, gamma, ...
+ * \param texu contains next free texture unit number
+ * \param texs texture unit ids for the conversion are stored in this array
+ */
+static void create_conv_textures(GL *gl, gl_conversion_params_t *params,
+ int *texu, char *texs)
+{
+ unsigned char *lookup_data = NULL;
+ int conv = YUV_CONVERSION(params->type);
+ switch (conv) {
+ case YUV_CONVERSION_FRAGMENT:
+ case YUV_CONVERSION_FRAGMENT_POW:
+ break;
+ case YUV_CONVERSION_FRAGMENT_LOOKUP:
+ texs[0] = (*texu)++;
+ gl->ActiveTexture(GL_TEXTURE0 + texs[0]);
+ lookup_data = malloc(4 * LOOKUP_RES);
+ mp_gen_gamma_map(lookup_data, LOOKUP_RES, params->csp_params.rgamma);
+ mp_gen_gamma_map(&lookup_data[LOOKUP_RES], LOOKUP_RES,
+ params->csp_params.ggamma);
+ mp_gen_gamma_map(&lookup_data[2 * LOOKUP_RES], LOOKUP_RES,
+ params->csp_params.bgamma);
+ glCreateClearTex(gl, GL_TEXTURE_2D, GL_LUMINANCE8, GL_LUMINANCE,
+ GL_UNSIGNED_BYTE, GL_LINEAR, LOOKUP_RES, 4, 0);
+ glUploadTex(gl, GL_TEXTURE_2D, GL_LUMINANCE, GL_UNSIGNED_BYTE,
+ lookup_data, LOOKUP_RES, 0, 0, LOOKUP_RES, 4, 0);
+ gl->ActiveTexture(GL_TEXTURE0);
+ texs[0] += '0';
+ break;
+ case YUV_CONVERSION_FRAGMENT_LOOKUP3D:
+ {
+ int sz = LOOKUP_3DRES + 2; // texture size including borders
+ if (!gl->TexImage3D) {
+ mp_msg(MSGT_VO, MSGL_ERR, "[gl] Missing 3D texture function!\n");
+ break;
+ }
+ texs[0] = (*texu)++;
+ gl->ActiveTexture(GL_TEXTURE0 + texs[0]);
+ lookup_data = malloc(3 * sz * sz * sz);
+ mp_gen_yuv2rgb_map(&params->csp_params, lookup_data, LOOKUP_3DRES);
+ glAdjustAlignment(gl, sz);
+ gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+ gl->TexImage3D(GL_TEXTURE_3D, 0, 3, sz, sz, sz, 1,
+ GL_RGB, GL_UNSIGNED_BYTE, lookup_data);
+ gl->TexParameterf(GL_TEXTURE_3D, GL_TEXTURE_PRIORITY, 1.0);
+ gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP);
+ gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP);
+ gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP);
+ gl->ActiveTexture(GL_TEXTURE0);
+ texs[0] += '0';
+ }
+ break;
+ default:
+ mp_msg(MSGT_VO, MSGL_ERR, "[gl] unknown conversion type %i\n", conv);
+ }
+ free(lookup_data);
+}
+
+/**
+ * \brief adds a scaling texture read at the current fragment program position
+ * \param scaler type of scaler to insert
+ * \param prog pointer to fragment program so far
+ * \param texs array containing the texture unit identifiers for this scaler
+ * \param in_tex texture unit the scaler should read from
+ * \param out_comp component of the yuv variable the scaler stores the result in
+ * \param rect if rectangular (pixel) adressing should be used for in_tex
+ * \param texw width of the in_tex texture
+ * \param texh height of the in_tex texture
+ * \param strength strength of filter effect if the scaler does some kind of filtering
+ */
+static void add_scaler(int scaler, char **prog, char *texs,
+ char in_tex, char out_comp, int rect, int texw, int texh,
+ double strength)
+{
+ const char *ttype = rect ? "RECT" : "2D";
+ const float ptw = rect ? 1.0 : 1.0 / texw;
+ const float pth = rect ? 1.0 : 1.0 / texh;
+ switch (scaler) {
+ case YUV_SCALER_BILIN:
+ append_template(prog, bilin_filt_template);
+ break;
+ case YUV_SCALER_BICUB:
+ if (rect)
+ append_template(prog, bicub_filt_template_RECT);
+ else
+ append_template(prog, bicub_filt_template_2D);
+ break;
+ case YUV_SCALER_BICUB_X:
+ if (rect)
+ append_template(prog, bicub_x_filt_template_RECT);
+ else
+ append_template(prog, bicub_x_filt_template_2D);
+ break;
+ case YUV_SCALER_BICUB_NOTEX:
+ if (rect)
+ append_template(prog, bicub_notex_filt_template_RECT);
+ else
+ append_template(prog, bicub_notex_filt_template_2D);
+ break;
+ case YUV_SCALER_UNSHARP:
+ append_template(prog, unsharp_filt_template);
+ break;
+ case YUV_SCALER_UNSHARP2:
+ append_template(prog, unsharp_filt_template2);
+ break;
+ }
+
+ replace_var_char(prog, "texs", texs[0]);
+ replace_var_char(prog, "in_tex", in_tex);
+ replace_var_char(prog, "out_comp", out_comp);
+ replace_var_str(prog, "tex_type", ttype);
+ replace_var_float(prog, "texw", texw);
+ replace_var_float(prog, "texh", texh);
+ replace_var_float(prog, "ptw", ptw);
+ replace_var_float(prog, "pth", pth);
+
+ // this is silly, not sure if that couldn't be in the shader source instead
+ replace_var_float(prog, "ptw_05", ptw * 0.5);
+ replace_var_float(prog, "pth_05", pth * 0.5);
+ replace_var_float(prog, "ptw_15", ptw * 1.5);
+ replace_var_float(prog, "pth_15", pth * 1.5);
+ replace_var_float(prog, "ptw_12", ptw * 1.2);
+ replace_var_float(prog, "pth_12", pth * 1.2);
+
+ replace_var_float(prog, "strength", strength);
+}
+
+static const struct {
+ const char *name;
+ GLenum cur;
+ GLenum max;
+} progstats[] = {
+ {"instructions", 0x88A0, 0x88A1},
+ {"native instructions", 0x88A2, 0x88A3},
+ {"temporaries", 0x88A4, 0x88A5},
+ {"native temporaries", 0x88A6, 0x88A7},
+ {"parameters", 0x88A8, 0x88A9},
+ {"native parameters", 0x88AA, 0x88AB},
+ {"attribs", 0x88AC, 0x88AD},
+ {"native attribs", 0x88AE, 0x88AF},
+ {"ALU instructions", 0x8805, 0x880B},
+ {"TEX instructions", 0x8806, 0x880C},
+ {"TEX indirections", 0x8807, 0x880D},
+ {"native ALU instructions", 0x8808, 0x880E},
+ {"native TEX instructions", 0x8809, 0x880F},
+ {"native TEX indirections", 0x880A, 0x8810},
+ {NULL, 0, 0}
+};
+
+/**
+ * \brief load the specified GPU Program
+ * \param target program target to load into, only GL_FRAGMENT_PROGRAM is tested
+ * \param prog program string
+ * \return 1 on success, 0 otherwise
+ */
+static int loadGPUProgram(GL *gl, GLenum target, char *prog)
+{
+ int i;
+ GLint cur = 0, max = 0, err = 0;
+ if (!gl->ProgramString) {
+ mp_msg(MSGT_VO, MSGL_ERR, "[gl] Missing GPU program function\n");
+ return 0;