diff options
author | wm4 <wm4@nowhere> | 2013-01-13 23:26:26 +0100 |
---|---|---|
committer | wm4 <wm4@nowhere> | 2013-01-13 23:30:13 +0100 |
commit | f0a08a9fdf3ac9b30089820e61606ace493f4fcd (patch) | |
tree | fbea9bb25568b3bcfa254099ad3e11d29777bcee /video/out/vo_opengl_old.c | |
parent | 40567656435a8d967ee9b55e7b1ba12d32d77734 (diff) | |
download | mpv-f0a08a9fdf3ac9b30089820e61606ace493f4fcd.tar.bz2 mpv-f0a08a9fdf3ac9b30089820e61606ace493f4fcd.tar.xz |
gl_common: move things used by vo_opengl_old.c only to vo_opengl_old.c
Having this in gl_common is confusing.
Diffstat (limited to 'video/out/vo_opengl_old.c')
-rw-r--r-- | video/out/vo_opengl_old.c | 1241 |
1 files changed, 1241 insertions, 0 deletions
diff --git a/video/out/vo_opengl_old.c b/video/out/vo_opengl_old.c index d010daacb9..8c32676e1d 100644 --- a/video/out/vo_opengl_old.c +++ b/video/out/vo_opengl_old.c @@ -42,6 +42,7 @@ #include "gl_osd.h" #include "aspect.h" #include "video/memcpy_pic.h" +#include "pnm_loader.h" //for gl_priv.use_yuv #define MASK_ALL_YUV (~(1 << YUV_CONVERSION_NONE)) @@ -110,6 +111,1246 @@ struct gl_priv { unsigned int slice_height; }; +static int glFindFormat(uint32_t format, int have_texture_rg, int *bpp, + GLint *gl_texfmt, GLenum *gl_format, GLenum *gl_type); +static void glCreateClearTex(GL *gl, GLenum target, GLenum fmt, GLenum format, + GLenum type, GLint filter, int w, int h, + unsigned char val); +static int glCreatePPMTex(GL *gl, GLenum target, GLenum fmt, GLint filter, + FILE *f, int *width, int *height, int *maxval); +static void glDrawTex(GL *gl, GLfloat x, GLfloat y, GLfloat w, GLfloat h, + GLfloat tx, GLfloat ty, GLfloat tw, GLfloat th, + int sx, int sy, int rect_tex, int is_yv12, int flip); +static int loadGPUProgram(GL *gl, GLenum target, char *prog); +//! do not use YUV conversion, this should always stay 0 +#define YUV_CONVERSION_NONE 0 +//! use nVidia specific register combiners for YUV conversion +//! implementation has been removed +#define YUV_CONVERSION_COMBINERS 1 +//! use a fragment program for YUV conversion +#define YUV_CONVERSION_FRAGMENT 2 +//! use a fragment program for YUV conversion with gamma using POW +#define YUV_CONVERSION_FRAGMENT_POW 3 +//! use a fragment program with additional table lookup for YUV conversion +#define YUV_CONVERSION_FRAGMENT_LOOKUP 4 +//! use ATI specific register combiners ("fragment program") +#define YUV_CONVERSION_COMBINERS_ATI 5 +//! use a fragment program with 3D table lookup for YUV conversion +#define YUV_CONVERSION_FRAGMENT_LOOKUP3D 6 +//! use ATI specific "text" register combiners ("fragment program") +#define YUV_CONVERSION_TEXT_FRAGMENT 7 +//! use normal bilinear scaling for textures +#define YUV_SCALER_BILIN 0 +//! use higher quality bicubic scaling for textures +#define YUV_SCALER_BICUB 1 +//! use cubic scaling in X and normal linear scaling in Y direction +#define YUV_SCALER_BICUB_X 2 +//! use cubic scaling without additional lookup texture +#define YUV_SCALER_BICUB_NOTEX 3 +#define YUV_SCALER_UNSHARP 4 +#define YUV_SCALER_UNSHARP2 5 +//! mask for conversion type +#define YUV_CONVERSION_MASK 0xF +//! mask for scaler type +#define YUV_SCALER_MASK 0xF +//! shift value for luminance scaler type +#define YUV_LUM_SCALER_SHIFT 8 +//! shift value for chrominance scaler type +#define YUV_CHROM_SCALER_SHIFT 12 +//! extract conversion out of type +#define YUV_CONVERSION(t) ((t) & YUV_CONVERSION_MASK) +//! extract luminance scaler out of type +#define YUV_LUM_SCALER(t) (((t) >> YUV_LUM_SCALER_SHIFT) & YUV_SCALER_MASK) +//! extract chrominance scaler out of type +#define YUV_CHROM_SCALER(t) (((t) >> YUV_CHROM_SCALER_SHIFT) & YUV_SCALER_MASK) +#define SET_YUV_CONVERSION(c) ((c) & YUV_CONVERSION_MASK) +#define SET_YUV_LUM_SCALER(s) (((s) & YUV_SCALER_MASK) << YUV_LUM_SCALER_SHIFT) +#define SET_YUV_CHROM_SCALER(s) (((s) & YUV_SCALER_MASK) << YUV_CHROM_SCALER_SHIFT) +//! returns whether the yuv conversion supports large brightness range etc. +static inline int glYUVLargeRange(int conv) +{ + switch (conv) { + case YUV_CONVERSION_NONE: + case YUV_CONVERSION_COMBINERS_ATI: + case YUV_CONVERSION_FRAGMENT_LOOKUP3D: + case YUV_CONVERSION_TEXT_FRAGMENT: + return 0; + } + return 1; +} +typedef struct { + GLenum target; + int type; + struct mp_csp_params csp_params; + int texw; + int texh; + int chrom_texw; + int chrom_texh; + float filter_strength; + float noise_strength; +} gl_conversion_params_t; + +static int glAutodetectYUVConversion(GL *gl); +static void glSetupYUVConversion(GL *gl, gl_conversion_params_t *params); +static void glEnableYUVConversion(GL *gl, GLenum target, int type); +static void glDisableYUVConversion(GL *gl, GLenum target, int type); + +//! always return this format as internal texture format in glFindFormat +#define TEXTUREFORMAT_ALWAYS GL_RGB8 +#undef TEXTUREFORMAT_ALWAYS + +/** + * \brief find the OpenGL settings coresponding to format. + * + * All parameters may be NULL. + * \param fmt MPlayer format to analyze. + * \param dummy reserved + * \param gl_texfmt [OUT] internal texture format that fits the + * image format, not necessarily the best for performance. + * \param gl_format [OUT] OpenGL format for this image format. + * \param gl_type [OUT] OpenGL type for this image format. + * \return 1 if format is supported by OpenGL, 0 if not. + * \ingroup gltexture + */ +static int glFindFormat(uint32_t fmt, int have_texture_rg, int *dummy, + GLint *gl_texfmt, GLenum *gl_format, GLenum *gl_type) +{ + int supported = 1; + GLenum dummy2; + GLint dummy3; + if (!gl_texfmt) + gl_texfmt = &dummy3; + if (!gl_format) + gl_format = &dummy2; + if (!gl_type) + gl_type = &dummy2; + + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(fmt); + if (desc.flags & MP_IMGFLAG_YUV_P) { + // reduce the possible cases a bit + if (desc.plane_bits > 8) + fmt = IMGFMT_420P16; + else + fmt = IMGFMT_420P; + } + + *gl_texfmt = 3; + switch (fmt) { + case IMGFMT_RGB48: + *gl_format = GL_RGB; + *gl_type = GL_UNSIGNED_SHORT; + break; + case IMGFMT_RGB24: + *gl_format = GL_RGB; + *gl_type = GL_UNSIGNED_BYTE; + break; + case IMGFMT_RGBA: + *gl_texfmt = 4; + *gl_format = GL_RGBA; + *gl_type = GL_UNSIGNED_BYTE; + break; + case IMGFMT_420P16: + supported = 0; // no native YUV support + *gl_texfmt = have_texture_rg ? GL_R16 : GL_LUMINANCE16; + *gl_format = have_texture_rg ? GL_RED : GL_LUMINANCE; + *gl_type = GL_UNSIGNED_SHORT; + break; + case IMGFMT_420P: + supported = 0; // no native YV12 support + case IMGFMT_Y8: + *gl_texfmt = 1; + *gl_format = GL_LUMINANCE; + *gl_type = GL_UNSIGNED_BYTE; + break; + case IMGFMT_UYVY: + *gl_texfmt = GL_YCBCR_MESA; + *gl_format = GL_YCBCR_MESA; + *gl_type = fmt == IMGFMT_UYVY ? GL_UNSIGNED_SHORT_8_8 : GL_UNSIGNED_SHORT_8_8_REV; + break; +#if 0 + // we do not support palettized formats, although the format the + // swscale produces works + case IMGFMT_RGB8: + *gl_format = GL_RGB; + *gl_type = GL_UNSIGNED_BYTE_2_3_3_REV; + break; +#endif + case IMGFMT_RGB15: + *gl_format = GL_RGBA; + *gl_type = GL_UNSIGNED_SHORT_1_5_5_5_REV; + break; + case IMGFMT_RGB16: + *gl_format = GL_RGB; + *gl_type = GL_UNSIGNED_SHORT_5_6_5_REV; + break; +#if 0 + case IMGFMT_BGR8: + // special case as red and blue have a different number of bits. + // GL_BGR and GL_UNSIGNED_BYTE_3_3_2 isn't supported at least + // by nVidia drivers, and in addition would give more bits to + // blue than to red, which isn't wanted + *gl_format = GL_RGB; + *gl_type = GL_UNSIGNED_BYTE_3_3_2; + break; +#endif + case IMGFMT_BGR15: + *gl_format = GL_BGRA; + *gl_type = GL_UNSIGNED_SHORT_1_5_5_5_REV; + break; + case IMGFMT_BGR16: + *gl_format = GL_RGB; + *gl_type = GL_UNSIGNED_SHORT_5_6_5; + break; + case IMGFMT_BGR24: + *gl_format = GL_BGR; + *gl_type = GL_UNSIGNED_BYTE; + break; + case IMGFMT_BGRA: + *gl_texfmt = 4; + *gl_format = GL_BGRA; + *gl_type = GL_UNSIGNED_BYTE; + break; + default: + *gl_texfmt = 4; + *gl_format = GL_RGBA; + *gl_type = GL_UNSIGNED_BYTE; + supported = 0; + } +#ifdef TEXTUREFORMAT_ALWAYS + *gl_texfmt = TEXTUREFORMAT_ALWAYS; +#endif + return supported; +} + +/** + * \brief create a texture and set some defaults + * \param target texture taget, usually GL_TEXTURE_2D + * \param fmt internal texture format + * \param format texture host data format + * \param type texture host data type + * \param filter filter used for scaling, e.g. GL_LINEAR + * \param w texture width + * \param h texture height + * \param val luminance value to fill texture with + * \ingroup gltexture + */ +static void glCreateClearTex(GL *gl, GLenum target, GLenum fmt, GLenum format, + GLenum type, GLint filter, int w, int h, + unsigned char val) +{ + GLfloat fval = (GLfloat)val / 255.0; + GLfloat border[4] = { + fval, fval, fval, fval + }; + int stride; + char *init; + if (w == 0) + w = 1; + if (h == 0) + h = 1; + stride = w * glFmt2bpp(format, type); + if (!stride) + return; + init = malloc(stride * h); + memset(init, val, stride * h); + glAdjustAlignment(gl, stride); + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, w); + gl->TexImage2D(target, 0, fmt, w, h, 0, format, type, init); + gl->TexParameterf(target, GL_TEXTURE_PRIORITY, 1.0); + gl->TexParameteri(target, GL_TEXTURE_MIN_FILTER, filter); + gl->TexParameteri(target, GL_TEXTURE_MAG_FILTER, filter); + gl->TexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + // Border texels should not be used with CLAMP_TO_EDGE + // We set a sane default anyway. + gl->TexParameterfv(target, GL_TEXTURE_BORDER_COLOR, border); + free(init); +} + +static GLint detect_hqtexfmt(GL *gl) +{ + const char *extensions = (const char *)gl->GetString(GL_EXTENSIONS); + if (strstr(extensions, "_texture_float")) + return GL_RGB32F; + else if (strstr(extensions, "NV_float_buffer")) + return GL_FLOAT_RGB32_NV; + return GL_RGB16; +} + +/** + * \brief creates a texture from a PPM file + * \param target texture taget, usually GL_TEXTURE_2D + * \param fmt internal texture format, 0 for default + * \param filter filter used for scaling, e.g. GL_LINEAR + * \param f file to read PPM from + * \param width [out] width of texture + * \param height [out] height of texture + * \param maxval [out] maxval value from PPM file + * \return 0 on error, 1 otherwise + * \ingroup gltexture + */ +static int glCreatePPMTex(GL *gl, GLenum target, GLenum fmt, GLint filter, + FILE *f, int *width, int *height, int *maxval) +{ + int w, h, m, bpp; + GLenum type; + uint8_t *data = read_pnm(f, &w, &h, &bpp, &m); + GLint hqtexfmt = detect_hqtexfmt(gl); + if (!data || (bpp != 3 && bpp != 6)) { + free(data); + return 0; + } + if (!fmt) { + fmt = bpp == 6 ? hqtexfmt : 3; + if (fmt == GL_FLOAT_RGB32_NV && target != GL_TEXTURE_RECTANGLE) + fmt = GL_RGB16; + } + type = bpp == 6 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE; + glCreateClearTex(gl, target, fmt, GL_RGB, type, filter, w, h, 0); + glUploadTex(gl, target, GL_RGB, type, + data, w * bpp, 0, 0, w, h, 0); + free(data); + if (width) + *width = w; + if (height) + *height = h; + if (maxval) + *maxval = m; + return 1; +} + + +/** + * \brief Setup ATI version of register combiners for YUV to RGB conversion. + * \param csp_params parameters used for colorspace conversion + * \param text if set use the GL_ATI_text_fragment_shader API as + * used on OS X. + */ +static void glSetupYUVFragmentATI(GL *gl, struct mp_csp_params *csp_params, + int text) +{ + GLint i; + float yuv2rgb[3][4]; + + gl->GetIntegerv(GL_MAX_TEXTURE_UNITS, &i); + if (i < 3) + mp_msg(MSGT_VO, MSGL_ERR, + "[gl] 3 texture units needed for YUV combiner (ATI) support (found %i)\n", i); + + mp_get_yuv2rgb_coeffs(csp_params, yuv2rgb); + for (i = 0; i < 3; i++) { + int j; + yuv2rgb[i][3] -= -0.5 * (yuv2rgb[i][1] + yuv2rgb[i][2]); + for (j = 0; j < 4; j++) { + yuv2rgb[i][j] *= 0.125; + yuv2rgb[i][j] += 0.5; + if (yuv2rgb[i][j] > 1) + yuv2rgb[i][j] = 1; + if (yuv2rgb[i][j] < 0) + yuv2rgb[i][j] = 0; + } + } + if (text == 0) { + GLfloat c0[4] = { yuv2rgb[0][0], yuv2rgb[1][0], yuv2rgb[2][0] }; + GLfloat c1[4] = { yuv2rgb[0][1], yuv2rgb[1][1], yuv2rgb[2][1] }; + GLfloat c2[4] = { yuv2rgb[0][2], yuv2rgb[1][2], yuv2rgb[2][2] }; + GLfloat c3[4] = { yuv2rgb[0][3], yuv2rgb[1][3], yuv2rgb[2][3] }; + if (!gl->BeginFragmentShader || !gl->EndFragmentShader || + !gl->SetFragmentShaderConstant || !gl->SampleMap || + !gl->ColorFragmentOp2 || !gl->ColorFragmentOp3) { + mp_msg(MSGT_VO, MSGL_FATAL, "[gl] Combiner (ATI) functions missing!\n"); + return; + } + gl->GetIntegerv(GL_NUM_FRAGMENT_REGISTERS_ATI, &i); + if (i < 3) + mp_msg(MSGT_VO, MSGL_ERR, + "[gl] 3 registers needed for YUV combiner (ATI) support (found %i)\n", i); + gl->BeginFragmentShader(); + gl->SetFragmentShaderConstant(GL_CON_0_ATI, c0); + gl->SetFragmentShaderConstant(GL_CON_1_ATI, c1); + gl->SetFragmentShaderConstant(GL_CON_2_ATI, c2); + gl->SetFragmentShaderConstant(GL_CON_3_ATI, c3); + gl->SampleMap(GL_REG_0_ATI, GL_TEXTURE0, GL_SWIZZLE_STR_ATI); + gl->SampleMap(GL_REG_1_ATI, GL_TEXTURE1, GL_SWIZZLE_STR_ATI); + gl->SampleMap(GL_REG_2_ATI, GL_TEXTURE2, GL_SWIZZLE_STR_ATI); + gl->ColorFragmentOp2(GL_MUL_ATI, GL_REG_1_ATI, GL_NONE, GL_NONE, + GL_REG_1_ATI, GL_NONE, GL_BIAS_BIT_ATI, + GL_CON_1_ATI, GL_NONE, GL_BIAS_BIT_ATI); + gl->ColorFragmentOp3(GL_MAD_ATI, GL_REG_2_ATI, GL_NONE, GL_NONE, + GL_REG_2_ATI, GL_NONE, GL_BIAS_BIT_ATI, + GL_CON_2_ATI, GL_NONE, GL_BIAS_BIT_ATI, + GL_REG_1_ATI, GL_NONE, GL_NONE); + gl->ColorFragmentOp3(GL_MAD_ATI, GL_REG_0_ATI, GL_NONE, GL_NONE, + GL_REG_0_ATI, GL_NONE, GL_NONE, + GL_CON_0_ATI, GL_NONE, GL_BIAS_BIT_ATI, + GL_REG_2_ATI, GL_NONE, GL_NONE); + gl->ColorFragmentOp2(GL_ADD_ATI, GL_REG_0_ATI, GL_NONE, GL_8X_BIT_ATI, + GL_REG_0_ATI, GL_NONE, GL_NONE, + GL_CON_3_ATI, GL_NONE, GL_BIAS_BIT_ATI); + gl->EndFragmentShader(); + } else { + static const char template[] = + "!!ATIfs1.0\n" + "StartConstants;\n" + " CONSTANT c0 = {%e, %e, %e};\n" + " CONSTANT c1 = {%e, %e, %e};\n" + " CONSTANT c2 = {%e, %e, %e};\n" + " CONSTANT c3 = {%e, %e, %e};\n" + "EndConstants;\n" + "StartOutputPass;\n" + " SampleMap r0, t0.str;\n" + " SampleMap r1, t1.str;\n" + " SampleMap r2, t2.str;\n" + " MUL r1.rgb, r1.bias, c1.bias;\n" + " MAD r2.rgb, r2.bias, c2.bias, r1;\n" + " MAD r0.rgb, r0, c0.bias, r2;\n" + " ADD r0.rgb.8x, r0, c3.bias;\n" + "EndPass;\n"; + char buffer[512]; + snprintf(buffer, sizeof(buffer), template, + yuv2rgb[0][0], yuv2rgb[1][0], yuv2rgb[2][0], + yuv2rgb[0][1], yuv2rgb[1][1], yuv2rgb[2][1], + yuv2rgb[0][2], yuv2rgb[1][2], yuv2rgb[2][2], + yuv2rgb[0][3], yuv2rgb[1][3], yuv2rgb[2][3]); + mp_msg(MSGT_VO, MSGL_DBG2, "[gl] generated fragment program:\n%s\n", + buffer); + loadGPUProgram(gl, GL_TEXT_FRAGMENT_SHADER_ATI, buffer); + } +} + +// Replace all occurances of variables named "$"+name (e.g. $foo) in *text with +// replace, and return the result. *text must have been allocated with talloc. +static void replace_var_str(char **text, const char *name, const char *replace) +{ + size_t namelen = strlen(name); + char *nextvar = *text; + void *parent = talloc_parent(*text); + for (;;) { + nextvar = strchr(nextvar, '$'); + if (!nextvar) + break; + char *until = nextvar; + nextvar++; + if (strncmp(nextvar, name, namelen) != 0) + continue; + nextvar += namelen; + // try not to replace prefixes of other vars (e.g. $foo vs. $foo_bar) + char term = nextvar[0]; + if (isalnum(term) || term == '_') + continue; + int prelength = until - *text; + int postlength = nextvar - *text; + char *n = talloc_asprintf(parent, "%.*s%s%s", prelength, *text, replace, + nextvar); + talloc_free(*text); + *text = n; + nextvar = *text + postlength; + } +} + +static void replace_var_float(char **text, const char *name, float replace) +{ + char *s = talloc_asprintf(NULL, "%e", replace); + replace_var_str(text, name, s); + talloc_free(s); +} + +static void replace_var_char(char **text, const char *name, char replace) +{ + char s[2] = { replace, '\0' }; + replace_var_str(text, name, s); +} + +// Append template to *text. Possibly initialize *text if it's NULL. +static void append_template(char **text, const char* template) +{ + if (!text) + *text = talloc_strdup(NULL, template); + else + *text = talloc_strdup_append(*text, template); +} + +/** + * \brief helper function for gen_spline_lookup_tex + * \param x subpixel-position ((0,1) range) to calculate weights for + * \param dst where to store transformed weights, must provide space for 4 GLfloats + * + * calculates the weights and stores them after appropriate transformation + * for the scaler fragment program. + */ +static void store_weights(float x, GLfloat *dst) +{ + float w0 = (((-1 * x + 3) * x - 3) * x + 1) / 6; + float w1 = (((3 * x - 6) * x + 0) * x + 4) / 6; + float w2 = (((-3 * x + 3) * x + 3) * x + 1) / 6; + float w3 = (((1 * x + 0) * x + 0) * x + 0) / 6; + *dst++ = 1 + x - w1 / (w0 + w1); + *dst++ = 1 - x + w3 / (w2 + w3); + *dst++ = w0 + w1; + *dst++ = 0; +} + +//! to avoid artefacts this should be rather large +#define LOOKUP_BSPLINE_RES (2 * 1024) +/** + * \brief creates the 1D lookup texture needed for fast higher-order filtering + * \param unit texture unit to attach texture to + */ +static void gen_spline_lookup_tex(GL *gl, GLenum unit) +{ + GLfloat *tex = calloc(4 * LOOKUP_BSPLINE_RES, sizeof(*tex)); + GLfloat *tp = tex; + int i; + for (i = 0; i < LOOKUP_BSPLINE_RES; i++) { + float x = (float)(i + 0.5) / LOOKUP_BSPLINE_RES; + store_weights(x, tp); + tp += 4; + } + store_weights(0, tex); + store_weights(1, &tex[4 * (LOOKUP_BSPLINE_RES - 1)]); + gl->ActiveTexture(unit); + gl->TexImage1D(GL_TEXTURE_1D, 0, GL_RGBA16, LOOKUP_BSPLINE_RES, 0, GL_RGBA, + GL_FLOAT, tex); + gl->TexParameterf(GL_TEXTURE_1D, GL_TEXTURE_PRIORITY, 1.0); + gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_REPEAT); + gl->ActiveTexture(GL_TEXTURE0); + free(tex); +} + +#define NOISE_RES 2048 + +/** + * \brief creates the 1D lookup texture needed to generate pseudo-random numbers. + * \param unit texture unit to attach texture to + */ +static void gen_noise_lookup_tex(GL *gl, GLenum unit) { + GLfloat *tex = calloc(NOISE_RES, sizeof(*tex)); + uint32_t lcg = 0x79381c11; + int i; + for (i = 0; i < NOISE_RES; i++) + tex[i] = (double)i / (NOISE_RES - 1); + for (i = 0; i < NOISE_RES - 1; i++) { + int remain = NOISE_RES - i; + int idx = i + (lcg >> 16) % remain; + GLfloat tmp = tex[i]; + tex[i] = tex[idx]; + tex[idx] = tmp; + lcg = lcg * 1664525 + 1013904223; + } + gl->ActiveTexture(unit); + gl->TexImage1D(GL_TEXTURE_1D, 0, 1, NOISE_RES, 0, GL_RED, GL_FLOAT, tex); + gl->TexParameterf(GL_TEXTURE_1D, GL_TEXTURE_PRIORITY, 1.0); + gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_REPEAT); + gl->ActiveTexture(GL_TEXTURE0); + free(tex); +} + +#define SAMPLE(dest, coord, texture) \ + "TEX textemp, " coord ", " texture ", $tex_type;\n" \ + "MOV " dest ", textemp.r;\n" + +static const char bilin_filt_template[] = + SAMPLE("yuv.$out_comp","fragment.texcoord[$in_tex]","texture[$in_tex]"); + +#define BICUB_FILT_MAIN \ + /* first y-interpolation */ \ + "ADD coord, fragment.texcoord[$in_tex].xyxy, cdelta.xyxw;\n" \ + "ADD coord2, fragment.texcoord[$in_tex].xyxy, cdelta.zyzw;\n" \ + SAMPLE("a.r","coord.xyxy","texture[$in_tex]") \ + SAMPLE("a.g","coord.zwzw","texture[$in_tex]") \ + /* second y-interpolation */ \ + SAMPLE("b.r","coord2.xyxy","texture[$in_tex]") \ + SAMPLE("b.g","coord2.zwzw","texture[$in_tex]") \ + "LRP a.b, parmy.b, a.rrrr, a.gggg;\n" \ + "LRP a.a, parmy.b, b.rrrr, b.gggg;\n" \ + /* x-interpolation */ \ + "LRP yuv.$out_comp, parmx.b, a.bbbb, a.aaaa;\n" + +static const char bicub_filt_template_2D[] = + "MAD coord.xy, fragment.texcoord[$in_tex], {$texw, $texh}, {0.5, 0.5};\n" + "TEX parmx, coord.x, texture[$texs], 1D;\n" + "MUL cdelta.xz, parmx.rrgg, {-$ptw, 0, $ptw, 0};\n" + "TEX parmy, coord.y, texture[$texs], 1D;\n" + "MUL cdelta.yw, parmy.rrgg, {0, -$pth, 0, $pth};\n" + BICUB_FILT_MAIN; + +static const char bicub_filt_template_RECT[] = + "ADD coord, fragment.texcoord[$in_tex], {0.5, 0.5};\n" + "TEX parmx, coord.x, texture[$texs], 1D;\n" + "MUL cdelta.xz, parmx.rrgg, {-1, 0, 1, 0};\n" + "TEX parmy, coord.y, texture[$texs], 1D;\n" + "MUL cdelta.yw, parmy.rrgg, {0, -1, 0, 1};\n" + BICUB_FILT_MAIN; + +#define CALCWEIGHTS(t, s) \ + "MAD "t ", {-0.5, 0.1666, 0.3333, -0.3333}, "s ", {1, 0, -0.5, 0.5};\n" \ + "MAD "t ", "t ", "s ", {0, 0, -0.5, 0.5};\n" \ + "MAD "t ", "t ", "s ", {-0.6666, 0, 0.8333, 0.1666};\n" \ + "RCP a.x, "t ".z;\n" \ + "RCP a.y, "t ".w;\n" \ + "MAD "t ".xy, "t ".xyxy, a.xyxy, {1, 1, 0, 0};\n" \ + "ADD "t ".x, "t ".xxxx, "s ";\n" \ + "SUB "t ".y, "t ".yyyy, "s ";\n" + +static const char bicub_notex_filt_template_2D[] = + "MAD coord.xy, fragment.texcoord[$in_tex], {$texw, $texh}, {0.5, 0.5};\n" + "FRC coord.xy, coord.xyxy;\n" + CALCWEIGHTS("parmx", "coord.xxxx") + "MUL cdelta.xz, parmx.rrgg, {-$ptw, 0, $ptw, 0};\n" + CALCWEIGHTS("parmy", "coord.yyyy") + "MUL cdelta.yw, parmy.rrgg, {0, -$pth, 0, $pth};\n" + BICUB_FILT_MAIN; + +static const char bicub_notex_filt_template_RECT[] = + "ADD coord, fragment.texcoord[$in_tex], {0.5, 0.5};\n" + "FRC coord.xy, coord.xyxy;\n" + CALCWEIGHTS("parmx", "coord.xxxx") + "MUL cdelta.xz, parmx.rrgg, {-1, 0, 1, 0};\n" + CALCWEIGHTS("parmy", "coord.yyyy") + "MUL cdelta.yw, parmy.rrgg, {0, -1, 0, 1};\n" + BICUB_FILT_MAIN; + +#define BICUB_X_FILT_MAIN \ + "ADD coord.xy, fragment.texcoord[$in_tex].xyxy, cdelta.xyxy;\n" \ + "ADD coord2.xy, fragment.texcoord[$in_tex].xyxy, cdelta.zyzy;\n" \ + SAMPLE("a.r","coord","texture[$in_tex]") \ + SAMPLE("b.r","coord2","texture[$in_tex]") \ + /* x-interpolation */ \ + "LRP yuv.$out_comp, parmx.b, a.rrrr, b.rrrr;\n" + +static const char bicub_x_filt_template_2D[] = + "MAD coord.x, fragment.texcoord[$in_tex], {$texw}, {0.5};\n" + "TEX parmx, coord, texture[$texs], 1D;\n" + "MUL cdelta.xyz, parmx.rrgg, {-$ptw, 0, $ptw};\n" + BICUB_X_FILT_MAIN; + +static const char bicub_x_filt_template_RECT[] = + "ADD coord.x, fragment.texcoord[$in_tex], {0.5};\n" + "TEX parmx, coord, texture[$texs], 1D;\n" + "MUL cdelta.xyz, parmx.rrgg, {-1, 0, 1};\n" + BICUB_X_FILT_MAIN; + +static const char unsharp_filt_template[] = + "PARAM dcoord$out_comp = {$ptw_05, $pth_05, $ptw_05, -$pth_05};\n" + "ADD coord, fragment.texcoord[$in_tex].xyxy, dcoord$out_comp;\n" + "SUB coord2, fragment.texcoord[$in_tex].xyxy, dcoord$out_comp;\n" + SAMPLE("a.r","fragment.texcoord[$in_tex]","texture[$in_tex]") + SAMPLE("b.r","coord.xyxy","texture[$in_tex]") + SAMPLE("b.g","coord.zwzw","texture[$in_tex]") + "ADD b.r, b.r, b.g;\n" + SAMPLE("b.b","coord2.xyxy","texture[$in_tex]") + SAMPLE("b.g","coord2.zwzw","texture[$in_tex]") + "DP3 b, b, {0.25, 0.25, 0.25};\n" + "SUB b.r, a.r, b.r;\n" + "MAD textemp.r, b.r, {$strength}, a.r;\n" + "MOV yuv.$out_comp, textemp.r;\n"; + +static const char unsharp_filt_template2[] = + "PARAM dcoord$out_comp = {$ptw_12, $pth_12, $ptw_12, -$pth_12};\n" + "PARAM dcoord2$out_comp = {$ptw_15, 0, 0, $pth_15};\n" + "ADD coord, fragment.texcoord[$in_tex].xyxy, dcoord$out_comp;\n" + "SUB coord2, fragment.texcoord[$in_tex].xyxy, dcoord$out_comp;\n" + SAMPLE("a.r","fragment.texcoord[$in_tex]","texture[$in_tex]") + SAMPLE("b.r","coord.xyxy","texture[$in_tex]") + SAMPLE("b.g","coord.zwzw","texture[$in_tex]") + "ADD b.r, b.r, b.g;\n" + SAMPLE("b.b","coord2.xyxy","texture[$in_tex]") + SAMPLE("b.g","coord2.zwzw","texture[$in_tex]") + "ADD b.r, b.r, b.b;\n" + "ADD b.a, b.r, b.g;\n" + "ADD coord, fragment.texcoord[$in_tex].xyxy, dcoord2$out_comp;\n" + "SUB coord2, fragment.texcoord[$in_tex].xyxy, dcoord2$out_comp;\n" + SAMPLE("b.r","coord.xyxy","texture[$in_tex]") + SAMPLE("b.g","coord.zwzw","texture[$in_tex]") + "ADD b.r, b.r, b.g;\n" + SAMPLE("b.b","coord2.xyxy","texture[$in_tex]") + SAMPLE("b.g","coord2.zwzw","texture[$in_tex]") + "DP4 b.r, b, {-0.1171875, -0.1171875, -0.1171875, -0.09765625};\n" + "MAD b.r, a.r, {0.859375}, b.r;\n" + "MAD textemp.r, b.r, {$strength}, a.r;\n" + "MOV yuv.$out_comp, textemp.r;\n"; + +static const char yuv_prog_template[] = + "PARAM ycoef = {$cm11, $cm21, $cm31};\n" + "PARAM ucoef = {$cm12, $cm22, $cm32};\n" + "PARAM vcoef = {$cm13, $cm23, $cm33};\n" + "PARAM offsets = {$cm14, $cm24, $cm34};\n" + "TEMP res;\n" + "MAD res.rgb, yuv.rrrr, ycoef, offsets;\n" + "MAD res.rgb, yuv.gggg, ucoef, res;\n" + "MAD res.rgb, yuv.bbbb, vcoef, res;\n"; + +static const char yuv_pow_prog_template[] = + "PARAM ycoef = {$cm11, $cm21, $cm31};\n" + "PARAM ucoef = {$cm12, $cm22, $cm32};\n" + "PARAM vcoef = {$cm13, $cm23, $cm33};\n" + "PARAM offsets = {$cm14, $cm24, $cm34};\n" + "PARAM gamma = {$gamma_r, $gamma_g, $gamma_b};\n" + "TEMP res;\n" + "MAD res.rgb, yuv.rrrr, ycoef, offsets;\n" + "MAD res.rgb, yuv.gggg, ucoef, res;\n" + "MAD_SAT res.rgb, yuv.bbbb, vcoef, res;\n" + "POW res.r, res.r, gamma.r;\n" + "POW res.g, res.g, gamma.g;\n" + "POW res.b, res.b, gamma.b;\n"; + +static const char yuv_lookup_prog_template[] = + "PARAM ycoef = {$cm11, $cm21, $cm31, 0};\n" + "PARAM ucoef = {$cm12, $cm22, $cm32, 0};\n" + "PARAM vcoef = {$cm13, $cm23, $cm33, 0};\n" + "PARAM offsets = {$cm14, $cm24, $cm34, 0.125};\n" + "TEMP res;\n" + "MAD res, yuv.rrrr, ycoef, offsets;\n" + "MAD res.rgb, yuv.gggg, ucoef, res;\n" + "MAD res.rgb, yuv.bbbb, vcoef, res;\n" + "TEX res.r, res.raaa, texture[$conv_tex0], 2D;\n" + "ADD res.a, res.a, 0.25;\n" + "TEX res.g, res.gaaa, texture[$conv_tex0], 2D;\n" + "ADD res.a, res.a, 0.25;\n" + "TEX res.b, res.baaa, texture[$conv_tex0], 2D;\n"; + +static const char yuv_lookup3d_prog_template[] = + "TEMP res;\n" + "TEX res, yuv, texture[$conv_tex0], 3D;\n"; + +static const char noise_filt_template[] = + "MUL coord.xy, fragment.texcoord[0], {$noise_sx, $noise_sy};\n" + "TEMP rand;\n" + "TEX rand.r, coord.x, texture[$noise_filt_tex], 1D;\n" + "ADD rand.r, rand.r, coord.y;\n" + "TEX rand.r, rand.r, texture[$noise_filt_tex], 1D;\n" + "MAD res.rgb, rand.rrrr, {$noise_str, $noise_str, $noise_str}, res;\n"; + +/** + * \brief creates and initializes helper textures needed for scaling texture read + * \param scaler scaler type to create texture for + * \param texu contains next free texture unit number + * \param texs texture unit ids for the scaler are stored in this array + */ +static void create_scaler_textures(GL *gl, int scaler, int *texu, char *texs) +{ + switch (scaler) { + case YUV_SCALER_BILIN: + case YUV_SCALER_BICUB_NOTEX: + case YUV_SCALER_UNSHARP: + case YUV_SCALER_UNSHARP2: + break; + case YUV_SCALER_BICUB: + case YUV_SCALER_BICUB_X: + texs[0] = (*texu)++; + gen_spline_lookup_tex(gl, GL_TEXTURE0 + texs[0]); + texs[0] += '0'; + break; + default: + mp_msg(MSGT_VO, MSGL_ERR, "[gl] unknown scaler type %i\n", scaler); + } +} + +//! resolution of texture for gamma lookup table +#define LOOKUP_RES 512 +//! resolution for 3D yuv->rgb conversion lookup table +#define LOOKUP_3DRES 32 +/** + * \brief creates and initializes helper textures needed for yuv conversion + * \param params struct containing parameters like brightness, gamma, ... + * \param texu contains next free texture unit number + * \param texs texture unit ids for the conversion are stored in this array + */ +static void create_conv_textures(GL *gl, gl_conversion_params_t *params, + int *texu, char *texs) +{ + unsigned char *lookup_data = NULL; + int conv = YUV_CONVERSION(params->type); + switch (conv) { + case YUV_CONVERSION_FRAGMENT: + case YUV_CONVERSION_FRAGMENT_POW: + break; + case YUV_CONVERSION_FRAGMENT_LOOKUP: + texs[0] = (*texu)++; + gl->ActiveTexture(GL_TEXTURE0 + texs[0]); + lookup_data = malloc(4 * LOOKUP_RES); + mp_gen_gamma_map(lookup_data, LOOKUP_RES, params->csp_params.rgamma); + mp_gen_gamma_map(&lookup_data[LOOKUP_RES], LOOKUP_RES, + params->csp_params.ggamma); + mp_gen_gamma_map(&lookup_data[2 * LOOKUP_RES], LOOKUP_RES, + params->csp_params.bgamma); + glCreateClearTex(gl, GL_TEXTURE_2D, GL_LUMINANCE8, GL_LUMINANCE, + GL_UNSIGNED_BYTE, GL_LINEAR, LOOKUP_RES, 4, 0); + glUploadTex(gl, GL_TEXTURE_2D, GL_LUMINANCE, GL_UNSIGNED_BYTE, + lookup_data, LOOKUP_RES, 0, 0, LOOKUP_RES, 4, 0); + gl->ActiveTexture(GL_TEXTURE0); + texs[0] += '0'; + break; + case YUV_CONVERSION_FRAGMENT_LOOKUP3D: + { + int sz = LOOKUP_3DRES + 2; // texture size including borders + if (!gl->TexImage3D) { + mp_msg(MSGT_VO, MSGL_ERR, "[gl] Missing 3D texture function!\n"); + break; + } + texs[0] = (*texu)++; + gl->ActiveTexture(GL_TEXTURE0 + texs[0]); + lookup_data = malloc(3 * sz * sz * sz); + mp_gen_yuv2rgb_map(¶ms->csp_params, lookup_data, LOOKUP_3DRES); + glAdjustAlignment(gl, sz); + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + gl->TexImage3D(GL_TEXTURE_3D, 0, 3, sz, sz, sz, 1, + GL_RGB, GL_UNSIGNED_BYTE, lookup_data); + gl->TexParameterf(GL_TEXTURE_3D, GL_TEXTURE_PRIORITY, 1.0); + gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP); + gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP); + gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP); + gl->ActiveTexture(GL_TEXTURE0); + texs[0] += '0'; + } + break; + default: + mp_msg(MSGT_VO, MSGL_ERR, "[gl] unknown conversion type %i\n", conv); + } + free(lookup_data); +} + +/** + * \brief adds a scaling texture read at the current fragment program position + * \param scaler type of scaler to insert + * \param prog pointer to fragment program so far + * \param texs array containing the texture unit identifiers for this scaler + * \param in_tex texture unit the scaler should read from + * \param out_comp component of the yuv variable the scaler stores the result in + * \param rect if rectangular (pixel) adressing should be used for in_tex + * \param texw width of the in_tex texture + * \param texh height of the in_tex texture + * \param strength strength of filter effect if the scaler does some kind of filtering + */ +static void add_scaler(int scaler, char **prog, char *texs, + char in_tex, char out_comp, int rect, int texw, int texh, + double strength) +{ + const char *ttype = rect ? "RECT" : "2D"; + const float ptw = rect ? 1.0 : 1.0 / texw; + const float pth = rect ? 1.0 : 1.0 / texh; + switch (scaler) { + case YUV_SCALER_BILIN: + append_template(prog, bilin_filt_template); + break; + case YUV_SCALER_BICUB: + if (rect) + append_template(prog, bicub_filt_template_RECT); + else + append_template(prog, bicub_filt_template_2D); + break; + case YUV_SCALER_BICUB_X: + if (rect) + append_template(prog, bicub_x_filt_template_RECT); + else + append_template(prog, bicub_x_filt_template_2D); + break; + case YUV_SCALER_BICUB_NOTEX: + if (rect) + append_template(prog, bicub_notex_filt_template_RECT); + else + append_template(prog, bicub_notex_filt_template_2D); + break; + case YUV_SCALER_UNSHARP: + append_template(prog, unsharp_filt_template); + break; + case YUV_SCALER_UNSHARP2: + append_template(prog, unsharp_filt_template2); + break; + } + + replace_var_char(prog, "texs", texs[0]); + replace_var_char(prog, "in_tex", in_tex); + replace_var_char(prog, "out_comp", out_comp); + replace_var_str(prog, "tex_type", ttype); + replace_var_float(prog, "texw", texw); + replace_var_float(prog, "texh", texh); + replace_var_float(prog, "ptw", ptw); + replace_var_float(prog, "pth", pth); + + // this is silly, not sure if that couldn't be in the shader source instead + replace_var_float(prog, "ptw_05", ptw * 0.5); + replace_var_float(prog, "pth_05", pth * 0.5); + replace_var_float(prog, "ptw_15", ptw * 1.5); + replace_var_float(prog, "pth_15", pth * 1.5); + replace_var_float(prog, "ptw_12", ptw * 1.2); + replace_var_float(prog, "pth_12", pth * 1.2); + + replace_var_float(prog, "strength", strength); +} + +static const struct { + const char *name; + GLenum cur; + GLenum max; +} progstats[] = { + {"instructions", 0x88A0, 0x88A1}, + {"native instructions", 0x88A2, 0x88A3}, + {"temporaries", 0x88A4, 0x88A5}, + {"native temporaries", 0x88A6, 0x88A7}, + {"parameters", 0x88A8, 0x88A9}, + {"native parameters", 0x88AA, 0x88AB}, + {"attribs", 0x88AC, 0x88AD}, + {"native attribs", 0x88AE, 0x88AF}, + {"ALU instructions", 0x8805, 0x880B}, + {"TEX instructions", 0x8806, 0x880C}, + {"TEX indirections", 0x8807, 0x880D}, + {"native ALU instructions", 0x8808, 0x880E}, + {"native TEX instructions", 0x8809, 0x880F}, + {"native TEX indirections", 0x880A, 0x8810}, + {NULL, 0, 0} +}; + +/** + * \brief load the specified GPU Program + * \param target program target to load into, only GL_FRAGMENT_PROGRAM is tested + * \param prog program string + * \return 1 on success, 0 otherwise + */ +static int loadGPUProgram(GL *gl, GLenum target, char *prog) +{ + int i; + GLint cur = 0, max = 0, err = 0; + if (!gl->ProgramString) { + mp_msg(MSGT_VO, MSGL_ERR, "[gl] Missing GPU program function\n"); + return 0; |