summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libvo/d3d_shader_yuv.hlsl30
-rw-r--r--libvo/d3d_shader_yuv_2ch.h170
-rw-r--r--libvo/vo_direct3d.c80
3 files changed, 272 insertions, 8 deletions
diff --git a/libvo/d3d_shader_yuv.hlsl b/libvo/d3d_shader_yuv.hlsl
index 9d46e536fc..b17e257210 100644
--- a/libvo/d3d_shader_yuv.hlsl
+++ b/libvo/d3d_shader_yuv.hlsl
@@ -1,20 +1,44 @@
// Compile with:
// fxc.exe /Tps_2_0 /Fhd3d_shader_yuv.h d3d_shader_yuv.hlsl /Vnd3d_shader_yuv
+// fxc.exe /Tps_2_0 /Fhd3d_shader_yuv_2ch.h d3d_shader_yuv.hlsl /Vnd3d_shader_yuv_2ch /DUSE_2CH=1
+
+// Be careful with this shader. You can't use constant slots, since we don't
+// load the shader with D3DX. All uniform variables are mapped to hardcoded
+// constant slots.
sampler2D tex0 : register(s0);
sampler2D tex1 : register(s1);
sampler2D tex2 : register(s2);
uniform float4x4 colormatrix : register(c0);
+uniform float2 depth : register(c5);
+
+#ifdef USE_2CH
+
+float1 sample(sampler2D tex, float2 t)
+{
+ // Sample from A8L8 format as if we sampled a single value from L16.
+ // We compute the 2 channel values back into one.
+ return dot(tex2D(tex, t).xw, depth);
+}
+
+#else
+
+float1 sample(sampler2D tex, float2 t)
+{
+ return tex2D(tex, t).x;
+}
+
+#endif
float4 main(float2 t0 : TEXCOORD0,
float2 t1 : TEXCOORD1,
float2 t2 : TEXCOORD2)
: COLOR
{
- float4 c = float4(tex2D(tex0, t0).x,
- tex2D(tex1, t1).x,
- tex2D(tex2, t2).x,
+ float4 c = float4(sample(tex0, t0),
+ sample(tex1, t1),
+ sample(tex2, t2),
1);
return mul(c, colormatrix);
}
diff --git a/libvo/d3d_shader_yuv_2ch.h b/libvo/d3d_shader_yuv_2ch.h
new file mode 100644
index 0000000000..45dcc73992
--- /dev/null
+++ b/libvo/d3d_shader_yuv_2ch.h
@@ -0,0 +1,170 @@
+#if 0
+//
+// Generated by Microsoft (R) HLSL Shader Compiler 9.27.952.3022
+//
+// fxc /Tps_2_0 /Fhz:\tmp\mplayer\libvo\d3d_shader_yuv_2ch.h
+// z:\tmp\mplayer\libvo\d3d_shader_yuv.hlsl /Vnd3d_shader_yuv_2ch
+// /DUSE_2CH=1
+//
+//
+// Parameters:
+//
+// float4x4 colormatrix;
+// float2 depth;
+// sampler2D tex0;
+// sampler2D tex1;
+// sampler2D tex2;
+//
+//
+// Registers:
+//
+// Name Reg Size
+// ------------ ----- ----
+// colormatrix c0 4
+// depth c5 1
+// tex0 s0 1
+// tex1 s1 1
+// tex2 s2 1
+//
+
+ ps_2_0
+ def c4, 1, 0, 0, 0
+ dcl t0.xy
+ dcl t1.xy
+ dcl t2.xy
+ dcl_2d s0
+ dcl_2d s1
+ dcl_2d s2
+ texld r0, t0, s0
+ texld r1, t1, s1
+ texld r2, t2, s2
+ mul r0.x, r0.x, c5.x
+ mad r0.x, r0.w, c5.y, r0.x
+ mul r1.x, r1.x, c5.x
+ mad r0.y, r1.w, c5.y, r1.x
+ mul r1.x, r2.x, c5.x
+ mad r0.z, r2.w, c5.y, r1.x
+ mov r0.w, c4.x
+ dp4 r1.x, r0, c0
+ dp4 r1.y, r0, c1
+ dp4 r1.z, r0, c2
+ dp4 r1.w, r0, c3
+ mov oC0, r1
+
+// approximately 15 instruction slots used (3 texture, 12 arithmetic)
+#endif
+
+const BYTE d3d_shader_yuv_2ch[] =
+{
+ 0, 2, 255, 255, 254, 255,
+ 78, 0, 67, 84, 65, 66,
+ 28, 0, 0, 0, 3, 1,
+ 0, 0, 0, 2, 255, 255,
+ 5, 0, 0, 0, 28, 0,
+ 0, 0, 0, 1, 0, 0,
+ 252, 0, 0, 0, 128, 0,
+ 0, 0, 2, 0, 0, 0,
+ 4, 0, 2, 0, 140, 0,
+ 0, 0, 0, 0, 0, 0,
+ 156, 0, 0, 0, 2, 0,
+ 5, 0, 1, 0, 22, 0,
+ 164, 0, 0, 0, 0, 0,
+ 0, 0, 180, 0, 0, 0,
+ 3, 0, 0, 0, 1, 0,
+ 2, 0, 188, 0, 0, 0,
+ 0, 0, 0, 0, 204, 0,
+ 0, 0, 3, 0, 1, 0,
+ 1, 0, 6, 0, 212, 0,
+ 0, 0, 0, 0, 0, 0,
+ 228, 0, 0, 0, 3, 0,
+ 2, 0, 1, 0, 10, 0,
+ 236, 0, 0, 0, 0, 0,
+ 0, 0, 99, 111, 108, 111,
+ 114, 109, 97, 116, 114, 105,
+ 120, 0, 3, 0, 3, 0,
+ 4, 0, 4, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0,
+ 100, 101, 112, 116, 104, 0,
+ 171, 171, 1, 0, 3, 0,
+ 1, 0, 2, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0,
+ 116, 101, 120, 48, 0, 171,
+ 171, 171, 4, 0, 12, 0,
+ 1, 0, 1, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0,
+ 116, 101, 120, 49, 0, 171,
+ 171, 171, 4, 0, 12, 0,
+ 1, 0, 1, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0,
+ 116, 101, 120, 50, 0, 171,
+ 171, 171, 4, 0, 12, 0,
+ 1, 0, 1, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0,
+ 112, 115, 95, 50, 95, 48,
+ 0, 77, 105, 99, 114, 111,
+ 115, 111, 102, 116, 32, 40,
+ 82, 41, 32, 72, 76, 83,
+ 76, 32, 83, 104, 97, 100,
+ 101, 114, 32, 67, 111, 109,
+ 112, 105, 108, 101, 114, 32,
+ 57, 46, 50, 55, 46, 57,
+ 53, 50, 46, 51, 48, 50,
+ 50, 0, 81, 0, 0, 5,
+ 4, 0, 15, 160, 0, 0,
+ 128, 63, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 31, 0, 0, 2,
+ 0, 0, 0, 128, 0, 0,
+ 3, 176, 31, 0, 0, 2,
+ 0, 0, 0, 128, 1, 0,
+ 3, 176, 31, 0, 0, 2,
+ 0, 0, 0, 128, 2, 0,
+ 3, 176, 31, 0, 0, 2,
+ 0, 0, 0, 144, 0, 8,
+ 15, 160, 31, 0, 0, 2,
+ 0, 0, 0, 144, 1, 8,
+ 15, 160, 31, 0, 0, 2,
+ 0, 0, 0, 144, 2, 8,
+ 15, 160, 66, 0, 0, 3,
+ 0, 0, 15, 128, 0, 0,
+ 228, 176, 0, 8, 228, 160,
+ 66, 0, 0, 3, 1, 0,
+ 15, 128, 1, 0, 228, 176,
+ 1, 8, 228, 160, 66, 0,
+ 0, 3, 2, 0, 15, 128,
+ 2, 0, 228, 176, 2, 8,
+ 228, 160, 5, 0, 0, 3,
+ 0, 0, 1, 128, 0, 0,
+ 0, 128, 5, 0, 0, 160,
+ 4, 0, 0, 4, 0, 0,
+ 1, 128, 0, 0, 255, 128,
+ 5, 0, 85, 160, 0, 0,
+ 0, 128, 5, 0, 0, 3,
+ 1, 0, 1, 128, 1, 0,
+ 0, 128, 5, 0, 0, 160,
+ 4, 0, 0, 4, 0, 0,
+ 2, 128, 1, 0, 255, 128,
+ 5, 0, 85, 160, 1, 0,
+ 0, 128, 5, 0, 0, 3,
+ 1, 0, 1, 128, 2, 0,
+ 0, 128, 5, 0, 0, 160,
+ 4, 0, 0, 4, 0, 0,
+ 4, 128, 2, 0, 255, 128,
+ 5, 0, 85, 160, 1, 0,
+ 0, 128, 1, 0, 0, 2,
+ 0, 0, 8, 128, 4, 0,
+ 0, 160, 9, 0, 0, 3,
+ 1, 0, 1, 128, 0, 0,
+ 228, 128, 0, 0, 228, 160,
+ 9, 0, 0, 3, 1, 0,
+ 2, 128, 0, 0, 228, 128,
+ 1, 0, 228, 160, 9, 0,
+ 0, 3, 1, 0, 4, 128,
+ 0, 0, 228, 128, 2, 0,
+ 228, 160, 9, 0, 0, 3,
+ 1, 0, 8, 128, 0, 0,
+ 228, 128, 3, 0, 228, 160,
+ 1, 0, 0, 2, 0, 8,
+ 15, 128, 1, 0, 228, 128,
+ 255, 255, 0, 0
+};
diff --git a/libvo/vo_direct3d.c b/libvo/vo_direct3d.c
index 95bff26454..fdecee8c94 100644
--- a/libvo/vo_direct3d.c
+++ b/libvo/vo_direct3d.c
@@ -48,13 +48,15 @@
// shaders generated by fxc.exe from d3d_shader_yuv.hlsl
#include "d3d_shader_yuv.h"
+#include "d3d_shader_yuv_2ch.h"
// TODO: beg someone to add this (there is already IMGFMT_Y8)
// equals MAKEFOURCC('Y', '1', '6', ' ')
#define IMGFMT_Y16 0x20363159
+#define IMGFMT_A8Y8 MAKEFOURCC('A', '8', 'Y', '8')
-#define IMGFMT_IS_Y(x) ((x) == IMGFMT_Y8 || (x) == IMGFMT_Y16)
+#define IMGFMT_IS_Y(x) ((x) == IMGFMT_Y8 || (x) == IMGFMT_Y16 || (x) == IMGFMT_A8Y8)
#define IMGFMT_Y_DEPTH(x) ((x) == IMGFMT_Y8 ? 8 : 16)
#define DEVTYPE D3DDEVTYPE_HAL
@@ -132,6 +134,7 @@ typedef struct d3d_priv {
int opt_texture_memory;
int opt_swap_discard;
int opt_exact_backbuffer;
+ int opt_16bit_textures;
struct vo *vo;
@@ -151,6 +154,7 @@ typedef struct d3d_priv {
StretchRect */
bool use_shaders; /**< use shader for YUV color conversion
(or possibly for RGB video equalizers) */
+ bool use_2ch_hack; /**< 2 byte YUV formats use 2 channel hack */
int plane_count;
struct texplane planes[3];
@@ -190,6 +194,7 @@ typedef struct d3d_priv {
int max_texture_height; /**< from the device capabilities */
D3DMATRIX d3d_colormatrix;
+ float d3d_depth_vector[4];
struct mp_csp_details colorspace;
struct mp_csp_equalizer video_eq;
@@ -224,6 +229,7 @@ static const struct fmt_entry fmt_table[] = {
// grayscale (can be considered both packed and planar)
{IMGFMT_Y8, D3DFMT_L8},
{IMGFMT_Y16, D3DFMT_L16},
+ {IMGFMT_A8Y8, D3DFMT_A8L8},
{0},
};
@@ -1013,6 +1019,9 @@ static uint32_t d3d_draw_frame(d3d_priv *priv)
IDirect3DDevice9_SetPixelShaderConstantF(priv->d3d_device, 0,
&priv->d3d_colormatrix._11,
4);
+ IDirect3DDevice9_SetPixelShaderConstantF(priv->d3d_device, 5,
+ priv->d3d_depth_vector,
+ 1);
}
IDirect3DDevice9_SetFVF(priv->d3d_device, D3DFVF_VIDEO_VERTEX);
@@ -1130,7 +1139,14 @@ static D3DFORMAT check_shader_conversion(d3d_priv *priv, uint32_t fmt)
bool is_8bit = component_bits == 8;
if (!is_8bit && priv->opt_only_8bit)
return 0;
- return check_format(priv, is_8bit ? IMGFMT_Y8 : IMGFMT_Y16, true);
+ int texfmt = IMGFMT_Y8;
+ if (!is_8bit) {
+ if (priv->opt_16bit_textures)
+ texfmt = IMGFMT_Y16;
+ else
+ texfmt = IMGFMT_A8Y8;
+ }
+ return check_format(priv, texfmt, true);
}
// Return if the image format can be used. If it can, decide which rendering
@@ -1165,6 +1181,7 @@ static bool init_rendering_mode(d3d_priv *priv, uint32_t fmt, bool initialize)
priv->use_shaders = false;
priv->use_textures = false;
+ priv->use_2ch_hack = false;
priv->movie_src_fmt = 0;
priv->pixel_shader_data = NULL;
priv->plane_count = 0;
@@ -1209,7 +1226,14 @@ static bool init_rendering_mode(d3d_priv *priv, uint32_t fmt, bool initialize)
planes[n].clearval = get_chroma_clear_val(component_bits);
}
}
- priv->pixel_shader_data = d3d_shader_yuv;
+ if (shader_d3dfmt != D3DFMT_A8L8) {
+ priv->pixel_shader_data = d3d_shader_yuv;
+ } else {
+ mp_msg(MSGT_VO, MSGL_WARN, "<vo_direct3d>Using YUV 2ch hack.\n");
+
+ priv->pixel_shader_data = d3d_shader_yuv_2ch;
+ priv->use_2ch_hack = true;
+ }
}
for (n = 0; n < priv->plane_count; n++) {
@@ -1253,6 +1277,30 @@ static int query_format(d3d_priv *priv, uint32_t movie_fmt)
* *
****************************************************************************/
+static void get_2ch_depth_multiplier(int depth, float *out_f1, float *out_f2) {
+ // How to get these values:
+ // The suffix i8 and i16 is for values with 8/16 bit fixed point numbers.
+ // The suffix f is for float, ideally in the range 0.0-1.0.
+ // c_i8 is a two component vector, sampled from a two channel texture.
+ // (c_i8.x is the low byte, c_i8.y is the high byte)
+ // r_f is the resulting color scalar value.
+ //
+ // c_i8 = c_f * (2^8-1)
+ // r_i16 = c_i8.x + c_i8.y * 2^8
+ // r_f = r_i16 / (2^16-1)
+ // = c_f.x * (2^8-1) / (2^16-1) + c_f.y * (2^8-1) * 2^8 / (2^16-1)
+ // = c_f.x * ((2^8-1) / (2^16-1)) + c_f.y * (2^8 * ((2^8-1) / (2^16-1)))
+ // out = ((2^8-1) / (2^16-1), 2^8 * ((2^8-1) / (2^16-1)))
+ // The result color is r_f = dot(c_f, out).
+ // Same goes for other bit depth, such as 10 bit. Assuming (2^depth-1) is
+ // the maximum possible value at that depth, you have to scale the value
+ // r_i16 with it, the factor (2^16-1) in the formula above has to be
+ // replaced with (2^depth-1).
+ float factor = (float)((1 << 8) - 1) / (float)((1 << depth) - 1);
+ *out_f1 = factor;
+ *out_f2 = 256.0 * factor;
+}
+
static void update_colorspace(d3d_priv *priv)
{
float coeff[3][4];
@@ -1260,8 +1308,19 @@ static void update_colorspace(d3d_priv *priv)
mp_csp_copy_equalizer_values(&csp, &priv->video_eq);
if (priv->use_shaders) {
- csp.input_bits = priv->planes[0].bits_per_pixel;
- csp.texture_bits = (csp.input_bits + 7) & ~7;
+ if (!priv->use_2ch_hack) {
+ csp.input_bits = priv->planes[0].bits_per_pixel;
+ csp.texture_bits = (csp.input_bits + 7) & ~7;
+ } else {
+ float f1, f2;
+ get_2ch_depth_multiplier(priv->planes[0].bits_per_pixel, &f1, &f2);
+ priv->d3d_depth_vector[0] = f1;
+ priv->d3d_depth_vector[1] = f2;
+ priv->d3d_depth_vector[2] = priv->d3d_depth_vector[3] = 0;
+ // no change
+ csp.input_bits = 8;
+ csp.texture_bits = 8;
+ }
}
mp_get_yuv2rgb_coeffs(&csp, coeff);
@@ -1314,6 +1373,14 @@ const char *options_help_text = "-vo direct3d command line help:\n"
" Might be slower too, as it must (?) clear every frame.\n"
" exact-backbuffer\n"
" Always resize the backbuffer to window size.\n"
+" no16bit-textures\n"
+" Don't use textures with a 16 bit color channel for YUV formats that\n"
+" use more than 8 bits per component. Instead, use D3DFMT_A8L8 textures\n"
+" and compute the values sampled from the 2 channels back into one.\n"
+" Might be slower, since the shader becomes slightly more complicated.\n"
+" Might work better, if your drivers either don't support D3DFMT_L16,\n"
+" or if either the texture unit or the shaders don't operate in at least\n"
+" 16 bit precision.\n"
"";
/** @brief libvo Callback: Preinitialize the video card.
@@ -1331,6 +1398,8 @@ static int preinit_internal(struct vo *vo, const char *arg, bool allow_shaders)
*priv = (d3d_priv) {
.vo = vo,
+ .opt_16bit_textures = true,
+
.colorspace = MP_CSP_DETAILS_DEFAULTS,
.video_eq = { MP_CSP_EQ_CAPS_COLORMATRIX },
};
@@ -1351,6 +1420,7 @@ static int preinit_internal(struct vo *vo, const char *arg, bool allow_shaders)
{"texture-memory", OPT_ARG_INT, &priv->opt_texture_memory},
{"swap-discard", OPT_ARG_BOOL, &priv->opt_swap_discard},
{"exact-backbuffer", OPT_ARG_BOOL, &priv->opt_exact_backbuffer},
+ {"16bit-textures", OPT_ARG_BOOL, &priv->opt_16bit_textures},
{NULL}
};
if (subopt_parse(arg, subopts) != 0) {