diff options
Diffstat (limited to 'libvo')
-rw-r--r-- | libvo/d3d_shader_yuv.hlsl | 30 | ||||
-rw-r--r-- | libvo/d3d_shader_yuv_2ch.h | 170 | ||||
-rw-r--r-- | libvo/vo_direct3d.c | 80 |
3 files changed, 272 insertions, 8 deletions
diff --git a/libvo/d3d_shader_yuv.hlsl b/libvo/d3d_shader_yuv.hlsl index 9d46e536fc..b17e257210 100644 --- a/libvo/d3d_shader_yuv.hlsl +++ b/libvo/d3d_shader_yuv.hlsl @@ -1,20 +1,44 @@ // Compile with: // fxc.exe /Tps_2_0 /Fhd3d_shader_yuv.h d3d_shader_yuv.hlsl /Vnd3d_shader_yuv +// fxc.exe /Tps_2_0 /Fhd3d_shader_yuv_2ch.h d3d_shader_yuv.hlsl /Vnd3d_shader_yuv_2ch /DUSE_2CH=1 + +// Be careful with this shader. You can't use constant slots, since we don't +// load the shader with D3DX. All uniform variables are mapped to hardcoded +// constant slots. sampler2D tex0 : register(s0); sampler2D tex1 : register(s1); sampler2D tex2 : register(s2); uniform float4x4 colormatrix : register(c0); +uniform float2 depth : register(c5); + +#ifdef USE_2CH + +float1 sample(sampler2D tex, float2 t) +{ + // Sample from A8L8 format as if we sampled a single value from L16. + // We compute the 2 channel values back into one. + return dot(tex2D(tex, t).xw, depth); +} + +#else + +float1 sample(sampler2D tex, float2 t) +{ + return tex2D(tex, t).x; +} + +#endif float4 main(float2 t0 : TEXCOORD0, float2 t1 : TEXCOORD1, float2 t2 : TEXCOORD2) : COLOR { - float4 c = float4(tex2D(tex0, t0).x, - tex2D(tex1, t1).x, - tex2D(tex2, t2).x, + float4 c = float4(sample(tex0, t0), + sample(tex1, t1), + sample(tex2, t2), 1); return mul(c, colormatrix); } diff --git a/libvo/d3d_shader_yuv_2ch.h b/libvo/d3d_shader_yuv_2ch.h new file mode 100644 index 0000000000..45dcc73992 --- /dev/null +++ b/libvo/d3d_shader_yuv_2ch.h @@ -0,0 +1,170 @@ +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 9.27.952.3022 +// +// fxc /Tps_2_0 /Fhz:\tmp\mplayer\libvo\d3d_shader_yuv_2ch.h +// z:\tmp\mplayer\libvo\d3d_shader_yuv.hlsl /Vnd3d_shader_yuv_2ch +// /DUSE_2CH=1 +// +// +// Parameters: +// +// float4x4 colormatrix; +// float2 depth; +// sampler2D tex0; +// sampler2D tex1; +// sampler2D tex2; +// +// +// Registers: +// +// Name Reg Size +// ------------ ----- ---- +// colormatrix c0 4 +// depth c5 1 +// tex0 s0 1 +// tex1 s1 1 +// tex2 s2 1 +// + + ps_2_0 + def c4, 1, 0, 0, 0 + dcl t0.xy + dcl t1.xy + dcl t2.xy + dcl_2d s0 + dcl_2d s1 + dcl_2d s2 + texld r0, t0, s0 + texld r1, t1, s1 + texld r2, t2, s2 + mul r0.x, r0.x, c5.x + mad r0.x, r0.w, c5.y, r0.x + mul r1.x, r1.x, c5.x + mad r0.y, r1.w, c5.y, r1.x + mul r1.x, r2.x, c5.x + mad r0.z, r2.w, c5.y, r1.x + mov r0.w, c4.x + dp4 r1.x, r0, c0 + dp4 r1.y, r0, c1 + dp4 r1.z, r0, c2 + dp4 r1.w, r0, c3 + mov oC0, r1 + +// approximately 15 instruction slots used (3 texture, 12 arithmetic) +#endif + +const BYTE d3d_shader_yuv_2ch[] = +{ + 0, 2, 255, 255, 254, 255, + 78, 0, 67, 84, 65, 66, + 28, 0, 0, 0, 3, 1, + 0, 0, 0, 2, 255, 255, + 5, 0, 0, 0, 28, 0, + 0, 0, 0, 1, 0, 0, + 252, 0, 0, 0, 128, 0, + 0, 0, 2, 0, 0, 0, + 4, 0, 2, 0, 140, 0, + 0, 0, 0, 0, 0, 0, + 156, 0, 0, 0, 2, 0, + 5, 0, 1, 0, 22, 0, + 164, 0, 0, 0, 0, 0, + 0, 0, 180, 0, 0, 0, + 3, 0, 0, 0, 1, 0, + 2, 0, 188, 0, 0, 0, + 0, 0, 0, 0, 204, 0, + 0, 0, 3, 0, 1, 0, + 1, 0, 6, 0, 212, 0, + 0, 0, 0, 0, 0, 0, + 228, 0, 0, 0, 3, 0, + 2, 0, 1, 0, 10, 0, + 236, 0, 0, 0, 0, 0, + 0, 0, 99, 111, 108, 111, + 114, 109, 97, 116, 114, 105, + 120, 0, 3, 0, 3, 0, + 4, 0, 4, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 100, 101, 112, 116, 104, 0, + 171, 171, 1, 0, 3, 0, + 1, 0, 2, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 116, 101, 120, 48, 0, 171, + 171, 171, 4, 0, 12, 0, + 1, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 116, 101, 120, 49, 0, 171, + 171, 171, 4, 0, 12, 0, + 1, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 116, 101, 120, 50, 0, 171, + 171, 171, 4, 0, 12, 0, + 1, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 112, 115, 95, 50, 95, 48, + 0, 77, 105, 99, 114, 111, + 115, 111, 102, 116, 32, 40, + 82, 41, 32, 72, 76, 83, + 76, 32, 83, 104, 97, 100, + 101, 114, 32, 67, 111, 109, + 112, 105, 108, 101, 114, 32, + 57, 46, 50, 55, 46, 57, + 53, 50, 46, 51, 48, 50, + 50, 0, 81, 0, 0, 5, + 4, 0, 15, 160, 0, 0, + 128, 63, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 31, 0, 0, 2, + 0, 0, 0, 128, 0, 0, + 3, 176, 31, 0, 0, 2, + 0, 0, 0, 128, 1, 0, + 3, 176, 31, 0, 0, 2, + 0, 0, 0, 128, 2, 0, + 3, 176, 31, 0, 0, 2, + 0, 0, 0, 144, 0, 8, + 15, 160, 31, 0, 0, 2, + 0, 0, 0, 144, 1, 8, + 15, 160, 31, 0, 0, 2, + 0, 0, 0, 144, 2, 8, + 15, 160, 66, 0, 0, 3, + 0, 0, 15, 128, 0, 0, + 228, 176, 0, 8, 228, 160, + 66, 0, 0, 3, 1, 0, + 15, 128, 1, 0, 228, 176, + 1, 8, 228, 160, 66, 0, + 0, 3, 2, 0, 15, 128, + 2, 0, 228, 176, 2, 8, + 228, 160, 5, 0, 0, 3, + 0, 0, 1, 128, 0, 0, + 0, 128, 5, 0, 0, 160, + 4, 0, 0, 4, 0, 0, + 1, 128, 0, 0, 255, 128, + 5, 0, 85, 160, 0, 0, + 0, 128, 5, 0, 0, 3, + 1, 0, 1, 128, 1, 0, + 0, 128, 5, 0, 0, 160, + 4, 0, 0, 4, 0, 0, + 2, 128, 1, 0, 255, 128, + 5, 0, 85, 160, 1, 0, + 0, 128, 5, 0, 0, 3, + 1, 0, 1, 128, 2, 0, + 0, 128, 5, 0, 0, 160, + 4, 0, 0, 4, 0, 0, + 4, 128, 2, 0, 255, 128, + 5, 0, 85, 160, 1, 0, + 0, 128, 1, 0, 0, 2, + 0, 0, 8, 128, 4, 0, + 0, 160, 9, 0, 0, 3, + 1, 0, 1, 128, 0, 0, + 228, 128, 0, 0, 228, 160, + 9, 0, 0, 3, 1, 0, + 2, 128, 0, 0, 228, 128, + 1, 0, 228, 160, 9, 0, + 0, 3, 1, 0, 4, 128, + 0, 0, 228, 128, 2, 0, + 228, 160, 9, 0, 0, 3, + 1, 0, 8, 128, 0, 0, + 228, 128, 3, 0, 228, 160, + 1, 0, 0, 2, 0, 8, + 15, 128, 1, 0, 228, 128, + 255, 255, 0, 0 +}; diff --git a/libvo/vo_direct3d.c b/libvo/vo_direct3d.c index 95bff26454..fdecee8c94 100644 --- a/libvo/vo_direct3d.c +++ b/libvo/vo_direct3d.c @@ -48,13 +48,15 @@ // shaders generated by fxc.exe from d3d_shader_yuv.hlsl #include "d3d_shader_yuv.h" +#include "d3d_shader_yuv_2ch.h" // TODO: beg someone to add this (there is already IMGFMT_Y8) // equals MAKEFOURCC('Y', '1', '6', ' ') #define IMGFMT_Y16 0x20363159 +#define IMGFMT_A8Y8 MAKEFOURCC('A', '8', 'Y', '8') -#define IMGFMT_IS_Y(x) ((x) == IMGFMT_Y8 || (x) == IMGFMT_Y16) +#define IMGFMT_IS_Y(x) ((x) == IMGFMT_Y8 || (x) == IMGFMT_Y16 || (x) == IMGFMT_A8Y8) #define IMGFMT_Y_DEPTH(x) ((x) == IMGFMT_Y8 ? 8 : 16) #define DEVTYPE D3DDEVTYPE_HAL @@ -132,6 +134,7 @@ typedef struct d3d_priv { int opt_texture_memory; int opt_swap_discard; int opt_exact_backbuffer; + int opt_16bit_textures; struct vo *vo; @@ -151,6 +154,7 @@ typedef struct d3d_priv { StretchRect */ bool use_shaders; /**< use shader for YUV color conversion (or possibly for RGB video equalizers) */ + bool use_2ch_hack; /**< 2 byte YUV formats use 2 channel hack */ int plane_count; struct texplane planes[3]; @@ -190,6 +194,7 @@ typedef struct d3d_priv { int max_texture_height; /**< from the device capabilities */ D3DMATRIX d3d_colormatrix; + float d3d_depth_vector[4]; struct mp_csp_details colorspace; struct mp_csp_equalizer video_eq; @@ -224,6 +229,7 @@ static const struct fmt_entry fmt_table[] = { // grayscale (can be considered both packed and planar) {IMGFMT_Y8, D3DFMT_L8}, {IMGFMT_Y16, D3DFMT_L16}, + {IMGFMT_A8Y8, D3DFMT_A8L8}, {0}, }; @@ -1013,6 +1019,9 @@ static uint32_t d3d_draw_frame(d3d_priv *priv) IDirect3DDevice9_SetPixelShaderConstantF(priv->d3d_device, 0, &priv->d3d_colormatrix._11, 4); + IDirect3DDevice9_SetPixelShaderConstantF(priv->d3d_device, 5, + priv->d3d_depth_vector, + 1); } IDirect3DDevice9_SetFVF(priv->d3d_device, D3DFVF_VIDEO_VERTEX); @@ -1130,7 +1139,14 @@ static D3DFORMAT check_shader_conversion(d3d_priv *priv, uint32_t fmt) bool is_8bit = component_bits == 8; if (!is_8bit && priv->opt_only_8bit) return 0; - return check_format(priv, is_8bit ? IMGFMT_Y8 : IMGFMT_Y16, true); + int texfmt = IMGFMT_Y8; + if (!is_8bit) { + if (priv->opt_16bit_textures) + texfmt = IMGFMT_Y16; + else + texfmt = IMGFMT_A8Y8; + } + return check_format(priv, texfmt, true); } // Return if the image format can be used. If it can, decide which rendering @@ -1165,6 +1181,7 @@ static bool init_rendering_mode(d3d_priv *priv, uint32_t fmt, bool initialize) priv->use_shaders = false; priv->use_textures = false; + priv->use_2ch_hack = false; priv->movie_src_fmt = 0; priv->pixel_shader_data = NULL; priv->plane_count = 0; @@ -1209,7 +1226,14 @@ static bool init_rendering_mode(d3d_priv *priv, uint32_t fmt, bool initialize) planes[n].clearval = get_chroma_clear_val(component_bits); } } - priv->pixel_shader_data = d3d_shader_yuv; + if (shader_d3dfmt != D3DFMT_A8L8) { + priv->pixel_shader_data = d3d_shader_yuv; + } else { + mp_msg(MSGT_VO, MSGL_WARN, "<vo_direct3d>Using YUV 2ch hack.\n"); + + priv->pixel_shader_data = d3d_shader_yuv_2ch; + priv->use_2ch_hack = true; + } } for (n = 0; n < priv->plane_count; n++) { @@ -1253,6 +1277,30 @@ static int query_format(d3d_priv *priv, uint32_t movie_fmt) * * ****************************************************************************/ +static void get_2ch_depth_multiplier(int depth, float *out_f1, float *out_f2) { + // How to get these values: + // The suffix i8 and i16 is for values with 8/16 bit fixed point numbers. + // The suffix f is for float, ideally in the range 0.0-1.0. + // c_i8 is a two component vector, sampled from a two channel texture. + // (c_i8.x is the low byte, c_i8.y is the high byte) + // r_f is the resulting color scalar value. + // + // c_i8 = c_f * (2^8-1) + // r_i16 = c_i8.x + c_i8.y * 2^8 + // r_f = r_i16 / (2^16-1) + // = c_f.x * (2^8-1) / (2^16-1) + c_f.y * (2^8-1) * 2^8 / (2^16-1) + // = c_f.x * ((2^8-1) / (2^16-1)) + c_f.y * (2^8 * ((2^8-1) / (2^16-1))) + // out = ((2^8-1) / (2^16-1), 2^8 * ((2^8-1) / (2^16-1))) + // The result color is r_f = dot(c_f, out). + // Same goes for other bit depth, such as 10 bit. Assuming (2^depth-1) is + // the maximum possible value at that depth, you have to scale the value + // r_i16 with it, the factor (2^16-1) in the formula above has to be + // replaced with (2^depth-1). + float factor = (float)((1 << 8) - 1) / (float)((1 << depth) - 1); + *out_f1 = factor; + *out_f2 = 256.0 * factor; +} + static void update_colorspace(d3d_priv *priv) { float coeff[3][4]; @@ -1260,8 +1308,19 @@ static void update_colorspace(d3d_priv *priv) mp_csp_copy_equalizer_values(&csp, &priv->video_eq); if (priv->use_shaders) { - csp.input_bits = priv->planes[0].bits_per_pixel; - csp.texture_bits = (csp.input_bits + 7) & ~7; + if (!priv->use_2ch_hack) { + csp.input_bits = priv->planes[0].bits_per_pixel; + csp.texture_bits = (csp.input_bits + 7) & ~7; + } else { + float f1, f2; + get_2ch_depth_multiplier(priv->planes[0].bits_per_pixel, &f1, &f2); + priv->d3d_depth_vector[0] = f1; + priv->d3d_depth_vector[1] = f2; + priv->d3d_depth_vector[2] = priv->d3d_depth_vector[3] = 0; + // no change + csp.input_bits = 8; + csp.texture_bits = 8; + } } mp_get_yuv2rgb_coeffs(&csp, coeff); @@ -1314,6 +1373,14 @@ const char *options_help_text = "-vo direct3d command line help:\n" " Might be slower too, as it must (?) clear every frame.\n" " exact-backbuffer\n" " Always resize the backbuffer to window size.\n" +" no16bit-textures\n" +" Don't use textures with a 16 bit color channel for YUV formats that\n" +" use more than 8 bits per component. Instead, use D3DFMT_A8L8 textures\n" +" and compute the values sampled from the 2 channels back into one.\n" +" Might be slower, since the shader becomes slightly more complicated.\n" +" Might work better, if your drivers either don't support D3DFMT_L16,\n" +" or if either the texture unit or the shaders don't operate in at least\n" +" 16 bit precision.\n" ""; /** @brief libvo Callback: Preinitialize the video card. @@ -1331,6 +1398,8 @@ static int preinit_internal(struct vo *vo, const char *arg, bool allow_shaders) *priv = (d3d_priv) { .vo = vo, + .opt_16bit_textures = true, + .colorspace = MP_CSP_DETAILS_DEFAULTS, .video_eq = { MP_CSP_EQ_CAPS_COLORMATRIX }, }; @@ -1351,6 +1420,7 @@ static int preinit_internal(struct vo *vo, const char *arg, bool allow_shaders) {"texture-memory", OPT_ARG_INT, &priv->opt_texture_memory}, {"swap-discard", OPT_ARG_BOOL, &priv->opt_swap_discard}, {"exact-backbuffer", OPT_ARG_BOOL, &priv->opt_exact_backbuffer}, + {"16bit-textures", OPT_ARG_BOOL, &priv->opt_16bit_textures}, {NULL} }; if (subopt_parse(arg, subopts) != 0) { |