vo_direct3d: add hack for using 2 channel textures for 10 bit pixel formats

This actually applies to YUV formats with 9-16 bit depths. This hack is disabled by default, and the VO will use 16 bit textures normally. It can be enabled by passing the no16bit-textures option is passed to vo_direct3d. Then the VO will use D3DFMT_A8L8 as texture formats for the YUV plane (instead of D3DFMT_L16), and compute the sampled two color values back into one. In some cases it might be undesireable to use 16 bit texture formats. At least some OpenGL drivers on Linux (Mesa + Intel) round values sampled from 16 bit textures back into 8 bit, which loses 8 from 10 bit color information when playing 10 bit formats. It is unknown whether there are D3D9 drivers which do this, so this commit might be removed later.
author: wm4 <wm4@mplayer2.org> 2011-11-06 07:40:06 +0100
committer: wm4 <wm4@mplayer2.org> 2012-03-17 21:06:28 +0100
commit: 032a3b827219235f39151cb186314c0b532f9197 (patch)
tree: b480f5d9927901084e25150fe4cfc08b8d9ff9d5 /libvo
parent: 8393796f0bb77b6c03af1df1111ac345dc549306 (diff)
download: mpv-032a3b827219235f39151cb186314c0b532f9197.tar.bz2
mpv-032a3b827219235f39151cb186314c0b532f9197.tar.xz
3 files changed, 272 insertions, 8 deletions
diff --git a/libvo/d3d_shader_yuv.hlsl b/libvo/d3d_shader_yuv.hlsl
index 9d46e536fc..b17e257210 100644
--- a/libvo/d3d_shader_yuv.hlsl
+++ b/libvo/d3d_shader_yuv.hlsl
@@ -1,20 +1,44 @@
 // Compile with:
 // fxc.exe /Tps_2_0 /Fhd3d_shader_yuv.h d3d_shader_yuv.hlsl /Vnd3d_shader_yuv
+// fxc.exe /Tps_2_0 /Fhd3d_shader_yuv_2ch.h d3d_shader_yuv.hlsl /Vnd3d_shader_yuv_2ch /DUSE_2CH=1
+
+// Be careful with this shader. You can't use constant slots, since we don't
+// load the shader with D3DX. All uniform variables are mapped to hardcoded
+// constant slots.
 
 sampler2D tex0 : register(s0);
 sampler2D tex1 : register(s1);
 sampler2D tex2 : register(s2);
 
 uniform float4x4 colormatrix : register(c0);
+uniform float2 depth : register(c5);
+
+#ifdef USE_2CH
+
+float1 sample(sampler2D tex, float2 t)
+{
+    // Sample from A8L8 format as if we sampled a single value from L16.
+    // We compute the 2 channel values back into one.
+    return dot(tex2D(tex, t).xw, depth);
+}
+
+#else
+
+float1 sample(sampler2D tex, float2 t)
+{
+    return tex2D(tex, t).x;
+}
+
+#endif
 
 float4 main(float2 t0 : TEXCOORD0,
             float2 t1 : TEXCOORD1,
             float2 t2 : TEXCOORD2)
             : COLOR
 {
-    float4 c = float4(tex2D(tex0, t0).x,
-                      tex2D(tex1, t1).x,
-                      tex2D(tex2, t2).x,
+    float4 c = float4(sample(tex0, t0),
+                      sample(tex1, t1),
+                      sample(tex2, t2),
                       1);
     return mul(c, colormatrix);
 }
diff --git a/libvo/d3d_shader_yuv_2ch.h b/libvo/d3d_shader_yuv_2ch.h
new file mode 100644
index 0000000000..45dcc73992
--- /dev/null
+++ b/libvo/d3d_shader_yuv_2ch.h
@@ -0,0 +1,170 @@
+#if 0
+//
+// Generated by Microsoft (R) HLSL Shader Compiler 9.27.952.3022
+//
+//   fxc /Tps_2_0 /Fhz:\tmp\mplayer\libvo\d3d_shader_yuv_2ch.h
+//    z:\tmp\mplayer\libvo\d3d_shader_yuv.hlsl /Vnd3d_shader_yuv_2ch
+//    /DUSE_2CH=1
+//
+//
+// Parameters:
+//
+//   float4x4 colormatrix;
+//   float2 depth;
+//   sampler2D tex0;
+//   sampler2D tex1;
+//   sampler2D tex2;
+//
+//
+// Registers:
+//
+//   Name         Reg   Size
+//   ------------ ----- ----
+//   colormatrix  c0       4
+//   depth        c5       1
+//   tex0         s0       1
+//   tex1         s1       1
+//   tex2         s2       1
+//
+
+    ps_2_0
+    def c4, 1, 0, 0, 0
+    dcl t0.xy
+    dcl t1.xy
+    dcl t2.xy
+    dcl_2d s0
+    dcl_2d s1
+    dcl_2d s2
+    texld r0, t0, s0
+    texld r1, t1, s1
+    texld r2, t2, s2
+    mul r0.x, r0.x, c5.x
+    mad r0.x, r0.w, c5.y, r0.x
+    mul r1.x, r1.x, c5.x
+    mad r0.y, r1.w, c5.y, r1.x
+    mul r1.x, r2.x, c5.x
+    mad r0.z, r2.w, c5.y, r1.x
+    mov r0.w, c4.x
+    dp4 r1.x, r0, c0
+    dp4 r1.y, r0, c1
+    dp4 r1.z, r0, c2
+    dp4 r1.w, r0, c3
+    mov oC0, r1
+
+// approximately 15 instruction slots used (3 texture, 12 arithmetic)
+#endif
+
+const BYTE d3d_shader_yuv_2ch[] =
+{
+      0,   2, 255, 255, 254, 255, 
+     78,   0,  67,  84,  65,  66, 
+     28,   0,   0,   0,   3,   1, 
+      0,   0,   0,   2, 255, 255, 
+      5,   0,   0,   0,  28,   0, 
+      0,   0,   0,   1,   0,   0, 
+    252,   0,   0,   0, 128,   0, 
+      0,   0,   2,   0,   0,   0, 
+      4,   0,   2,   0, 140,   0, 
+      0,   0,   0,   0,   0,   0, 
+    156,   0,   0,   0,   2,   0, 
+      5,   0,   1,   0,  22,   0, 
+    164,   0,   0,   0,   0,   0, 
+      0,   0, 180,   0,   0,   0, 
+      3,   0,   0,   0,   1,   0, 
+      2,   0, 188,   0,   0,   0, 
+      0,   0,   0,   0, 204,   0, 
+      0,   0,   3,   0,   1,   0, 
+      1,   0,   6,   0, 212,   0, 
+      0,   0,   0,   0,   0,   0, 
+    228,   0,   0,   0,   3,   0, 
+      2,   0,   1,   0,  10,   0, 
+    236,   0,   0,   0,   0,   0, 
+      0,   0,  99, 111, 108, 111, 
+    114, 109,  97, 116, 114, 105, 
+    120,   0,   3,   0,   3,   0, 
+      4,   0,   4,   0,   1,   0, 
+      0,   0,   0,   0,   0,   0, 
+    100, 101, 112, 116, 104,   0, 
+    171, 171,   1,   0,   3,   0, 
+      1,   0,   2,   0,   1,   0, 
+      0,   0,   0,   0,   0,   0, 
+    116, 101, 120,  48,   0, 171, 
+    171, 171,   4,   0,  12,   0, 
+      1,   0,   1,   0,   1,   0, 
+      0,   0,   0,   0,   0,   0, 
+    116, 101, 120,  49,   0, 171, 
+    171, 171,   4,   0,  12,   0, 
+      1,   0,   1,   0,   1,   0, 
+      0,   0,   0,   0,   0,   0, 
+    116, 101, 120,  50,   0, 171, 
+    171, 171,   4,   0,  12,   0, 
+      1,   0,   1,   0,   1,   0, 
+      0,   0,   0,   0,   0,   0, 
+    112, 115,  95,  50,  95,  48, 
+      0,  77, 105,  99, 114, 111, 
+    115, 111, 102, 116,  32,  40, 
+     82,  41,  32,  72,  76,  83, 
+     76,  32,  83, 104,  97, 100, 
+    101, 114,  32,  67, 111, 109, 
+    112, 105, 108, 101, 114,  32, 
+     57,  46,  50,  55,  46,  57, 
+     53,  50,  46,  51,  48,  50, 
+     50,   0,  81,   0,   0,   5, 
+      4,   0,  15, 160,   0,   0, 
+    128,  63,   0,   0,   0,   0, 
+      0,   0,   0,   0,   0,   0, 
+      0,   0,  31,   0,   0,   2, 
+      0,   0,   0, 128,   0,   0, 
+      3, 176,  31,   0,   0,   2, 
+      0,   0,   0, 128,   1,   0, 
+      3, 176,  31,   0,   0,   2, 
+      0,   0,   0, 128,   2,   0, 
+      3, 176,  31,   0,   0,   2, 
+      0,   0,   0, 144,   0,   8, 
+     15, 160,  31,   0,   0,   2, 
+      0,   0,   0, 144,   1,   8, 
+     15, 160,  31,   0,   0,   2, 
+      0,   0,   0, 144,   2,   8, 
+     15, 160,  66,   0,   0,   3, 
+      0,   0,  15, 128,   0,   0, 
+    228, 176,   0,   8, 228, 160, 
+     66,   0,   0,   3,   1,   0, 
+     15, 128,   1,   0, 228, 176, 
+      1,   8, 228, 160,  66,   0, 
+      0,   3,   2,   0,  15, 128, 
+      2,   0, 228, 176,   2,   8, 
+    228, 160,   5,   0,   0,   3, 
+      0,   0,   1, 128,   0,   0, 
+      0, 128,   5,   0,   0, 160, 
+      4,   0,   0,   4,   0,   0, 
+      1, 128,   0,   0, 255, 128, 
+      5,   0,  85, 160,   0,   0, 
+      0, 128,   5,   0,   0,   3, 
+      1,   0,   1, 128,   1,   0, 
+      0, 128,   5,   0,   0, 160, 
+      4,   0,   0,   4,   0,   0, 
+      2, 128,   1,   0, 255, 128, 
+      5,   0,  85, 160,   1,   0, 
+      0, 128,   5,   0,   0,   3, 
+      1,   0,   1, 128,   2,   0, 
+      0, 128,   5,   0,   0, 160, 
+      4,   0,   0,   4,   0,   0, 
+      4, 128,   2,   0, 255, 128, 
+      5,   0,  85, 160,   1,   0, 
+      0, 128,   1,   0,   0,   2, 
+      0,   0,   8, 128,   4,   0, 
+      0, 160,   9,   0,   0,   3, 
+      1,   0,   1, 128,   0,   0, 
+    228, 128,   0,   0, 228, 160, 
+      9,   0,   0,   3,   1,   0, 
+      2, 128,   0,   0, 228, 128, 
+      1,   0, 228, 160,   9,   0, 
+      0,   3,   1,   0,   4, 128, 
+      0,   0, 228, 128,   2,   0, 
+    228, 160,   9,   0,   0,   3, 
+      1,   0,   8, 128,   0,   0, 
+    228, 128,   3,   0, 228, 160, 
+      1,   0,   0,   2,   0,   8, 
+     15, 128,   1,   0, 228, 128, 
+    255, 255,   0,   0
+};
diff --git a/libvo/vo_direct3d.c b/libvo/vo_direct3d.c
index 95bff26454..fdecee8c94 100644
--- a/libvo/vo_direct3d.c
+++ b/libvo/vo_direct3d.c
@@ -48,13 +48,15 @@
 
 // shaders generated by fxc.exe from d3d_shader_yuv.hlsl
 #include "d3d_shader_yuv.h"
+#include "d3d_shader_yuv_2ch.h"
 
 
 // TODO: beg someone to add this (there is already IMGFMT_Y8)
 // equals MAKEFOURCC('Y', '1', '6', ' ')
 #define IMGFMT_Y16 0x20363159
+#define IMGFMT_A8Y8 MAKEFOURCC('A', '8', 'Y', '8')
 
-#define IMGFMT_IS_Y(x) ((x) == IMGFMT_Y8 || (x) == IMGFMT_Y16)
+#define IMGFMT_IS_Y(x) ((x) == IMGFMT_Y8 || (x) == IMGFMT_Y16 || (x) == IMGFMT_A8Y8)
 #define IMGFMT_Y_DEPTH(x) ((x) == IMGFMT_Y8 ? 8 : 16)
 
 #define DEVTYPE D3DDEVTYPE_HAL
@@ -132,6 +134,7 @@ typedef struct d3d_priv {
     int opt_texture_memory;
     int opt_swap_discard;
     int opt_exact_backbuffer;
+    int opt_16bit_textures;
 
     struct vo *vo;
 
@@ -151,6 +154,7 @@ typedef struct d3d_priv {
                                 StretchRect */
     bool use_shaders;           /**< use shader for YUV color conversion
                                 (or possibly for RGB video equalizers) */
+    bool use_2ch_hack;          /**< 2 byte YUV formats use 2 channel hack */
 
     int plane_count;
     struct texplane planes[3];
@@ -190,6 +194,7 @@ typedef struct d3d_priv {
     int max_texture_height;         /**< from the device capabilities */
 
     D3DMATRIX d3d_colormatrix;
+    float d3d_depth_vector[4];
     struct mp_csp_details colorspace;
     struct mp_csp_equalizer video_eq;
 
@@ -224,6 +229,7 @@ static const struct fmt_entry fmt_table[] = {
     // grayscale (can be considered both packed and planar)
     {IMGFMT_Y8,    D3DFMT_L8},
     {IMGFMT_Y16,   D3DFMT_L16},
+    {IMGFMT_A8Y8,  D3DFMT_A8L8},
     {0},
 };
 
@@ -1013,6 +1019,9 @@ static uint32_t d3d_draw_frame(d3d_priv *priv)
             IDirect3DDevice9_SetPixelShaderConstantF(priv->d3d_device, 0,
                                                      &priv->d3d_colormatrix._11,
                                                      4);
+            IDirect3DDevice9_SetPixelShaderConstantF(priv->d3d_device, 5,
+                                                     priv->d3d_depth_vector,
+                                                     1);
         }
 
         IDirect3DDevice9_SetFVF(priv->d3d_device, D3DFVF_VIDEO_VERTEX);
@@ -1130,7 +1139,14 @@ static D3DFORMAT check_shader_conversion(d3d_priv *priv, uint32_t fmt)
     bool is_8bit = component_bits == 8;
     if (!is_8bit && priv->opt_only_8bit)
         return 0;
-    return check_format(priv, is_8bit ? IMGFMT_Y8 : IMGFMT_Y16, true);
+    int texfmt = IMGFMT_Y8;
+    if (!is_8bit) {
+        if (priv->opt_16bit_textures)
+            texfmt = IMGFMT_Y16;
+        else
+            texfmt = IMGFMT_A8Y8;
+    }
+    return check_format(priv, texfmt, true);
 }
 
 // Return if the image format can be used. If it can, decide which rendering
@@ -1165,6 +1181,7 @@ static bool init_rendering_mode(d3d_priv *priv, uint32_t fmt, bool initialize)
 
     priv->use_shaders = false;
     priv->use_textures = false;
+    priv->use_2ch_hack = false;
     priv->movie_src_fmt = 0;
     priv->pixel_shader_data = NULL;
     priv->plane_count = 0;
@@ -1209,7 +1226,14 @@ static bool init_rendering_mode(d3d_priv *priv, uint32_t fmt, bool initialize)
                     planes[n].clearval = get_chroma_clear_val(component_bits);
                 }
             }
-            priv->pixel_shader_data = d3d_shader_yuv;
+            if (shader_d3dfmt != D3DFMT_A8L8) {
+                priv->pixel_shader_data = d3d_shader_yuv;
+            } else {
+                mp_msg(MSGT_VO, MSGL_WARN, "<vo_direct3d>Using YUV 2ch hack.\n");
+
+                priv->pixel_shader_data = d3d_shader_yuv_2ch;
+                priv->use_2ch_hack = true;
+            }
         }
 
         for (n = 0; n < priv->plane_count; n++) {
@@ -1253,6 +1277,30 @@ static int query_format(d3d_priv *priv, uint32_t movie_fmt)
  *                                                                          *
  ****************************************************************************/
 
+static void get_2ch_depth_multiplier(int depth, float *out_f1, float *out_f2) {
+    // How to get these values:
+    //  The suffix i8 and i16 is for values with 8/16 bit fixed point numbers.
+    //  The suffix f is for float, ideally in the range 0.0-1.0.
+    //  c_i8 is a two component vector, sampled from a two channel texture.
+    //  (c_i8.x is the low byte, c_i8.y is the high byte)
+    //  r_f is the resulting color scalar value.
+    //
+    //  c_i8 = c_f * (2^8-1)
+    //  r_i16 = c_i8.x + c_i8.y * 2^8
+    //  r_f = r_i16 / (2^16-1)
+    //      = c_f.x * (2^8-1) / (2^16-1) + c_f.y * (2^8-1) * 2^8 / (2^16-1)
+    //      = c_f.x * ((2^8-1) / (2^16-1)) + c_f.y * (2^8 * ((2^8-1) / (2^16-1)))
+    // out = ((2^8-1) / (2^16-1),  2^8 * ((2^8-1) / (2^16-1)))
+    // The result color is r_f = dot(c_f, out).
+    // Same goes for other bit depth, such as 10 bit. Assuming (2^depth-1) is
+    // the maximum possible value at that depth, you have to scale the value
+    // r_i16 with it, the factor (2^16-1) in the formula above has to be
+    // replaced with (2^depth-1).
+    float factor = (float)((1 << 8) - 1) / (float)((1 << depth) - 1);
+    *out_f1 = factor;
+    *out_f2 = 256.0 * factor;
+}
+
 static void update_colorspace(d3d_priv *priv)
 {
     float coeff[3][4];
@@ -1260,8 +1308,19 @@ static void update_colorspace(d3d_priv *priv)
     mp_csp_copy_equalizer_values(&csp, &priv->video_eq);
 
     if (priv->use_shaders) {
-        csp.input_bits = priv->planes[0].bits_per_pixel;
-        csp.texture_bits = (csp.input_bits + 7) & ~7;
+        if (!priv->use_2ch_hack) {
+            csp.input_bits = priv->planes[0].bits_per_pixel;
+            csp.texture_bits = (csp.input_bits + 7) & ~7;
+        } else {
+            float f1, f2;
+            get_2ch_depth_multiplier(priv->planes[0].bits_per_pixel, &f1, &f2);
+            priv->d3d_depth_vector[0] = f1;
+            priv->d3d_depth_vector[1] = f2;
+            priv->d3d_depth_vector[2] = priv->d3d_depth_vector[3] = 0;
+            // no change
+            csp.input_bits = 8;
+            csp.texture_bits = 8;
+        }
     }
 
     mp_get_yuv2rgb_coeffs(&csp, coeff);
@@ -1314,6 +1373,14 @@ const char *options_help_text = "-vo direct3d command line help:\n"
 "        Might be slower too, as it must (?) clear every frame.\n"
 "    exact-backbuffer\n"
 "        Always resize the backbuffer to window size.\n"
+"    no16bit-textures\n"
+"        Don't use textures with a 16 bit color channel for YUV formats that\n"
+"        use more than 8 bits per component. Instead, use D3DFMT_A8L8 textures\n"
+"        and compute the values sampled from the 2 channels back into one.\n"
+"        Might be slower, since the shader becomes slightly more complicated.\n"
+"        Might work better, if your drivers either don't support D3DFMT_L16,\n"
+"        or if either the texture unit or the shaders don't operate in at least\n"
+"        16 bit precision.\n"
 "";
 
 /** @brief libvo Callback: Preinitialize the video card.
@@ -1331,6 +1398,8 @@ static int preinit_internal(struct vo *vo, const char *arg, bool allow_shaders)
     *priv = (d3d_priv) {
         .vo = vo,
 
+        .opt_16bit_textures = true,
+
         .colorspace = MP_CSP_DETAILS_DEFAULTS,
         .video_eq = { MP_CSP_EQ_CAPS_COLORMATRIX },
     };
@@ -1351,6 +1420,7 @@ static int preinit_internal(struct vo *vo, const char *arg, bool allow_shaders)
         {"texture-memory", OPT_ARG_INT, &priv->opt_texture_memory},
         {"swap-discard", OPT_ARG_BOOL, &priv->opt_swap_discard},
         {"exact-backbuffer", OPT_ARG_BOOL, &priv->opt_exact_backbuffer},
+        {"16bit-textures", OPT_ARG_BOOL, &priv->opt_16bit_textures},
         {NULL}
     };
     if (subopt_parse(arg, subopts) != 0) {
author	wm4 <wm4@mplayer2.org>	2011-11-06 07:40:06 +0100
committer	wm4 <wm4@mplayer2.org>	2012-03-17 21:06:28 +0100
commit	032a3b827219235f39151cb186314c0b532f9197 (patch)
tree	b480f5d9927901084e25150fe4cfc08b8d9ff9d5 /libvo
parent	8393796f0bb77b6c03af1df1111ac345dc549306 (diff)
download	mpv-032a3b827219235f39151cb186314c0b532f9197.tar.bz2 mpv-032a3b827219235f39151cb186314c0b532f9197.tar.xz