summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2020-05-13 22:50:31 +0200
committerwm4 <wm4@nowhere>2020-05-13 23:16:32 +0200
commitfd8e856afecc567764926876739772e79ade028f (patch)
treef214ca0c78c6788dc3681509d41b1675fcf53f16
parent97706a2aec04bc91b8ac4d7c2fa3549d44ccc661 (diff)
downloadmpv-fd8e856afecc567764926876739772e79ade028f.tar.bz2
mpv-fd8e856afecc567764926876739772e79ade028f.tar.xz
vo_direct3d: dumb down OSD rendering
Render most of the OSD on the CPU, then draw it using a relatively simple method. Do this for minimum code maintenance overhead. (While it doesn't matter for vo_direct3d, and the effort spent here is probably more than this would ever hope, I do hope to simplify the internal OSD API for all these fringe VOs. Only vo_gpu should be allowed to do more sophisticated things.) If your GPU is shit (which it will be if you "want" to use vo_direct3d), this might actually improve performance... is what I'd say, but out of laziness a full screen sized texture gets uploaded on every OSD/subtitle change, so maybe not.
-rw-r--r--video/out/vo_direct3d.c256
1 files changed, 92 insertions, 164 deletions
diff --git a/video/out/vo_direct3d.c b/video/out/vo_direct3d.c
index c5fc53e64c..02572a8c53 100644
--- a/video/out/vo_direct3d.c
+++ b/video/out/vo_direct3d.c
@@ -30,6 +30,7 @@
#include "config.h"
#include "options/options.h"
#include "options/m_option.h"
+#include "sub/draw_bmp.h"
#include "mpv_talloc.h"
#include "vo.h"
#include "video/csputils.h"
@@ -48,11 +49,10 @@
#define DEVTYPE D3DDEVTYPE_HAL
//#define DEVTYPE D3DDEVTYPE_REF
-#define D3DFVF_OSD_VERTEX (D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_DIFFUSE)
+#define D3DFVF_OSD_VERTEX (D3DFVF_XYZ | D3DFVF_TEX1)
typedef struct {
float x, y, z;
- D3DCOLOR color;
float tu, tv;
} vertex_osd;
@@ -73,13 +73,7 @@ struct d3dtex {
IDirect3DTexture9 *device;
};
-struct osdpart {
- enum sub_bitmap_format format;
- int change_id;
- struct d3dtex texture;
- int num_vertices;
- vertex_osd *vertices;
-};
+#define MAX_OSD_RECTS 64
/* Global variables "priv" structure. I try to keep their count low.
*/
@@ -136,11 +130,12 @@ typedef struct d3d_priv {
int max_texture_width; /**< from the device capabilities */
int max_texture_height; /**< from the device capabilities */
- D3DFORMAT osd_fmt_table[SUBBITMAP_COUNT];
-
D3DMATRIX d3d_colormatrix;
- struct osdpart *osd[MAX_OSD_PARTS];
+ struct mp_draw_sub_cache *osd_cache;
+ struct d3dtex osd_texture;
+ int osd_num_vertices;
+ vertex_osd osd_vertices[MAX_OSD_RECTS * 6];
} d3d_priv;
struct fmt_entry {
@@ -366,12 +361,7 @@ static void destroy_d3d_surfaces(d3d_priv *priv)
MP_VERBOSE(priv, "destroy_d3d_surfaces called.\n");
d3d_destroy_video_objects(priv);
-
- for (int n = 0; n < MAX_OSD_PARTS; n++) {
- struct osdpart *osd = priv->osd[n];
- d3dtex_release(priv, &osd->texture);
- osd->change_id = -1;
- }
+ d3dtex_release(priv, &priv->osd_texture);
if (priv->d3d_backbuf)
IDirect3DSurface9_Release(priv->d3d_backbuf);
@@ -495,27 +485,18 @@ static bool init_d3d(d3d_priv *priv)
if (priv->opt_force_power_of_2)
priv->device_caps_power2_only = 1;
- priv->osd_fmt_table[SUBBITMAP_LIBASS] = D3DFMT_A8;
- priv->osd_fmt_table[SUBBITMAP_RGBA] = D3DFMT_A8R8G8B8;
-
- for (int n = 0; n < MP_ARRAY_SIZE(priv->osd_fmt_table); n++) {
- int fmt = priv->osd_fmt_table[n];
- if (fmt && FAILED(IDirect3D9_CheckDeviceFormat(priv->d3d_handle,
- D3DADAPTER_DEFAULT,
- DEVTYPE,
- priv->desktop_fmt,
- D3DUSAGE_DYNAMIC | D3DUSAGE_QUERY_FILTER,
- D3DRTYPE_TEXTURE,
- fmt)))
- {
- MP_VERBOSE(priv, "OSD format %#x not supported.\n", fmt);
- priv->osd_fmt_table[n] = 0;
- }
+ if (FAILED(IDirect3D9_CheckDeviceFormat(priv->d3d_handle,
+ D3DADAPTER_DEFAULT,
+ DEVTYPE,
+ priv->desktop_fmt,
+ D3DUSAGE_DYNAMIC | D3DUSAGE_QUERY_FILTER,
+ D3DRTYPE_TEXTURE,
+ D3DFMT_A8R8G8B8)))
+ {
+ MP_ERR(priv, "OSD texture format not supported.\n");
+ return false;
}
- if (!priv->osd_fmt_table[SUBBITMAP_RGBA])
- MP_WARN(priv, "GPU too old - no OSD support.\n");
-
if (!change_d3d_backbuffer(priv))
return false;
@@ -830,9 +811,6 @@ static int preinit(struct vo *vo)
priv->vo = vo;
priv->log = vo->log;
- for (int n = 0; n < MAX_OSD_PARTS; n++)
- priv->osd[n] = talloc_zero(priv, struct osdpart);
-
priv->d3d9_dll = LoadLibraryA("d3d9.dll");
if (!priv->d3d9_dll) {
MP_ERR(priv, "Unable to dynamically load d3d9.dll\n");
@@ -1110,130 +1088,110 @@ error_exit:
return NULL;
}
-static D3DCOLOR ass_to_d3d_color(uint32_t color)
+static void update_osd(d3d_priv *priv)
{
- uint32_t r = (color >> 24) & 0xff;
- uint32_t g = (color >> 16) & 0xff;
- uint32_t b = (color >> 8) & 0xff;
- uint32_t a = 0xff - (color & 0xff);
- return D3DCOLOR_ARGB(a, r, g, b);
-}
+ if (!priv->osd_cache)
+ priv->osd_cache = mp_draw_sub_alloc(priv, priv->vo->global);
-static int next_pow2(int v)
-{
- for (int x = 0; x < 30; x++) {
- if ((1 << x) >= v)
- return 1 << x;
- }
- return INT_MAX;
-}
+ struct sub_bitmap_list *sbs = osd_render(priv->vo->osd, priv->osd_res,
+ priv->osd_pts, 0, mp_draw_sub_formats);
-static bool upload_osd(d3d_priv *priv, struct osdpart *osd,
- struct sub_bitmaps *imgs)
-{
- D3DFORMAT fmt = priv->osd_fmt_table[imgs->format];
+ struct mp_rect act_rc[MAX_OSD_RECTS], mod_rc[64];
+ int num_act_rc = 0, num_mod_rc = 0;
- assert(imgs->packed);
+ struct mp_image *osd = mp_draw_sub_overlay(priv->osd_cache, sbs,
+ act_rc, MP_ARRAY_SIZE(act_rc), &num_act_rc,
+ mod_rc, MP_ARRAY_SIZE(mod_rc), &num_mod_rc);
- osd->change_id = imgs->change_id;
- osd->num_vertices = 0;
+ talloc_free(sbs);
- if (imgs->packed_w > osd->texture.tex_w
- || imgs->packed_h > osd->texture.tex_h
- || osd->format != imgs->format)
- {
- osd->format = imgs->format;
+ if (!osd) {
+ MP_ERR(priv, "Failed to render OSD.\n");
+ return;
+ }
+
+ if (!num_mod_rc && priv->osd_texture.system)
+ return; // nothing changed
- int new_w = next_pow2(imgs->packed_w);
- int new_h = next_pow2(imgs->packed_h);
+ priv->osd_num_vertices = 0;
+
+ if (osd->w > priv->osd_texture.tex_w || osd->h > priv->osd_texture.tex_h) {
+ int new_w = osd->w;
+ int new_h = osd->h;
d3d_fix_texture_size(priv, &new_w, &new_h);
MP_DBG(priv, "reallocate OSD surface to %dx%d.\n", new_w, new_h);
- d3dtex_release(priv, &osd->texture);
- d3dtex_allocate(priv, &osd->texture, fmt, new_w, new_h);
-
- if (!osd->texture.system)
- return false; // failed to allocate
+ d3dtex_release(priv, &priv->osd_texture);
+ if (!d3dtex_allocate(priv, &priv->osd_texture, D3DFMT_A8R8G8B8,
+ new_w, new_h))
+ return;
}
- RECT dirty_rc = { 0, 0, imgs->packed_w, imgs->packed_h };
+ // Lazy; could/should use the bounding rect, or perform multiple lock calls.
+ // The previous approach (fully packed texture) was more efficient.
+ RECT dirty_rc = { 0, 0, priv->osd_texture.w, priv->osd_texture.h };
D3DLOCKED_RECT locked_rect;
- if (FAILED(IDirect3DTexture9_LockRect(osd->texture.system, 0, &locked_rect,
+ if (FAILED(IDirect3DTexture9_LockRect(priv->osd_texture.system, 0, &locked_rect,
&dirty_rc, 0)))
{
MP_ERR(priv, "OSD texture lock failed.\n");
- return false;
+ return;
}
- int ps = fmt == D3DFMT_A8 ? 1 : 4;
- memcpy_pic(locked_rect.pBits, imgs->packed->planes[0], ps * imgs->packed_w,
- imgs->packed_h, locked_rect.Pitch, imgs->packed->stride[0]);
+ for (int n = 0; n < num_mod_rc; n++) {
+ struct mp_rect rc = mod_rc[n];
+ int w = mp_rect_w(rc);
+ int h = mp_rect_h(rc);
+ void *src = mp_image_pixel_ptr(osd, 0, rc.x0, rc.y0);
+ void *dst = (char *)locked_rect.pBits + locked_rect.Pitch * rc.y0 +
+ rc.x0 * 4;
+ memcpy_pic(dst, src, w * 4, h, locked_rect.Pitch, osd->stride[0]);
+ }
- if (FAILED(IDirect3DTexture9_UnlockRect(osd->texture.system, 0))) {
+ if (FAILED(IDirect3DTexture9_UnlockRect(priv->osd_texture.system, 0))) {
MP_ERR(priv, "OSD texture unlock failed.\n");
- return false;
+ return;
}
- if (!d3dtex_update(priv, &osd->texture))
- return false;
+ if (!d3dtex_update(priv, &priv->osd_texture))
+ return;
// We need 2 primitives per quad which makes 6 vertices.
- osd->num_vertices = imgs->num_parts * 6;
- MP_TARRAY_GROW(osd, osd->vertices, osd->num_vertices);
-
- float tex_w = osd->texture.tex_w;
- float tex_h = osd->texture.tex_h;
-
- for (int n = 0; n < imgs->num_parts; n++) {
- struct sub_bitmap *b = &imgs->parts[n];
-
- D3DCOLOR color = imgs->format == SUBBITMAP_LIBASS
- ? ass_to_d3d_color(b->libass.color)
- : D3DCOLOR_ARGB(255, 255, 255, 255);
-
- float x0 = b->x;
- float y0 = b->y;
- float x1 = b->x + b->dw;
- float y1 = b->y + b->dh;
- float tx0 = b->src_x / tex_w;
- float ty0 = b->src_y / tex_h;
- float tx1 = (b->src_x + b->w) / tex_w;
- float ty1 = (b->src_y + b->h) / tex_h;
-
- vertex_osd *v = &osd->vertices[n * 6];
- v[0] = (vertex_osd) { x0, y0, 0, color, tx0, ty0 };
- v[1] = (vertex_osd) { x1, y0, 0, color, tx1, ty0 };
- v[2] = (vertex_osd) { x0, y1, 0, color, tx0, ty1 };
- v[3] = (vertex_osd) { x1, y1, 0, color, tx1, ty1 };
- v[4] = v[2];
- v[5] = v[1];
- }
+ priv->osd_num_vertices = num_act_rc * 6;
- return true;
-}
-
-static struct osdpart *generate_osd(d3d_priv *priv, struct sub_bitmaps *imgs)
-{
- if (imgs->num_parts == 0 || !priv->osd_fmt_table[imgs->format])
- return NULL;
+ float tex_w = priv->osd_texture.tex_w;
+ float tex_h = priv->osd_texture.tex_h;
- struct osdpart *osd = priv->osd[imgs->render_index];
+ for (int n = 0; n < num_act_rc; n++) {
+ struct mp_rect rc = act_rc[n];
- if (imgs->change_id != osd->change_id)
- upload_osd(priv, osd, imgs);
+ float tx0 = rc.x0 / tex_w;
+ float ty0 = rc.y0 / tex_h;
+ float tx1 = rc.x1 / tex_w;
+ float ty1 = rc.y1 / tex_h;
- return osd->num_vertices ? osd : NULL;
+ vertex_osd *v = &priv->osd_vertices[n * 6];
+ v[0] = (vertex_osd) { rc.x0, rc.y0, 0, tx0, ty0 };
+ v[1] = (vertex_osd) { rc.x1, rc.y0, 0, tx1, ty0 };
+ v[2] = (vertex_osd) { rc.x0, rc.y1, 0, tx0, ty1 };
+ v[3] = (vertex_osd) { rc.x1, rc.y1, 0, tx1, ty1 };
+ v[4] = v[2];
+ v[5] = v[1];
+ }
}
-static void draw_osd_cb(void *ctx, struct sub_bitmaps *imgs)
+static void draw_osd(struct vo *vo)
{
- d3d_priv *priv = ctx;
+ d3d_priv *priv = vo->priv;
+ if (!priv->d3d_device)
+ return;
- struct osdpart *osd = generate_osd(priv, imgs);
- if (!osd)
+ update_osd(priv);
+
+ if (!priv->osd_num_vertices)
return;
d3d_begin_scene(priv);
@@ -1242,31 +1200,16 @@ static void draw_osd_cb(void *ctx, struct sub_bitmaps *imgs)
D3DRS_ALPHABLENDENABLE, TRUE);
IDirect3DDevice9_SetTexture(priv->d3d_device, 0,
- d3dtex_get_render_texture(priv, &osd->texture));
-
- if (imgs->format == SUBBITMAP_LIBASS) {
- // do not use the color value from the A8 texture, because that is black
- IDirect3DDevice9_SetRenderState(priv->d3d_device,D3DRS_TEXTUREFACTOR,
- 0xFFFFFFFF);
- IDirect3DDevice9_SetTextureStageState(priv->d3d_device,0,
- D3DTSS_COLORARG1, D3DTA_TFACTOR);
+ d3dtex_get_render_texture(priv, &priv->osd_texture));
- IDirect3DDevice9_SetTextureStageState(priv->d3d_device, 0,
- D3DTSS_ALPHAOP, D3DTOP_MODULATE);
- } else {
- IDirect3DDevice9_SetRenderState(priv->d3d_device, D3DRS_SRCBLEND,
- D3DBLEND_ONE);
- }
+ IDirect3DDevice9_SetRenderState(priv->d3d_device, D3DRS_SRCBLEND,
+ D3DBLEND_ONE);
IDirect3DDevice9_SetFVF(priv->d3d_device, D3DFVF_OSD_VERTEX);
IDirect3DDevice9_DrawPrimitiveUP(priv->d3d_device, D3DPT_TRIANGLELIST,
- osd->num_vertices / 3,
- osd->vertices, sizeof(vertex_osd));
+ priv->osd_num_vertices / 3,
+ priv->osd_vertices, sizeof(vertex_osd));
- IDirect3DDevice9_SetTextureStageState(priv->d3d_device,0,
- D3DTSS_COLORARG1, D3DTA_TEXTURE);
- IDirect3DDevice9_SetTextureStageState(priv->d3d_device, 0,
- D3DTSS_ALPHAOP, D3DTOP_SELECTARG1);
IDirect3DDevice9_SetRenderState(priv->d3d_device,
D3DRS_SRCBLEND, D3DBLEND_SRCALPHA);
@@ -1276,21 +1219,6 @@ static void draw_osd_cb(void *ctx, struct sub_bitmaps *imgs)
D3DRS_ALPHABLENDENABLE, FALSE);
}
-
-static void draw_osd(struct vo *vo)
-{
- d3d_priv *priv = vo->priv;
- if (!priv->d3d_device)
- return;
-
- bool osd_fmt_supported[SUBBITMAP_COUNT];
- for (int n = 0; n < SUBBITMAP_COUNT; n++)
- osd_fmt_supported[n] = !!priv->osd_fmt_table[n];
-
- osd_draw(vo->osd, priv->osd_res, priv->osd_pts, 0, osd_fmt_supported,
- draw_osd_cb, priv);
-}
-
#define OPT_BASE_STRUCT d3d_priv
static const struct m_option opts[] = {