diff options
author | Dr.Smile <vabnick@gmail.com> | 2021-09-30 08:19:36 +0300 |
---|---|---|
committer | Dr.Smile <vabnick@gmail.com> | 2022-12-04 02:17:38 +0300 |
commit | 6fec5b1f45a294f4d9335c405ca7ae49f3620b5a (patch) | |
tree | 9c0d6332ace0c521a5c13776bd3a28710cbb6e8e /libass | |
parent | 99017a5bb0e88a3ad7100ecedb63c80475f5bbca (diff) | |
download | libass-6fec5b1f45a294f4d9335c405ca7ae49f3620b5a.tar.bz2 libass-6fec5b1f45a294f4d9335c405ca7ae49f3620b5a.tar.xz |
Switch to a new CPU flag infrastructure
This opens a possibility to selectively disable
particular implementations at runtime.
Diffstat (limited to 'libass')
-rw-r--r-- | libass/ass_bitmap_engine.c | 61 | ||||
-rw-r--r-- | libass/ass_bitmap_engine.h | 15 | ||||
-rw-r--r-- | libass/ass_render.c | 13 | ||||
-rw-r--r-- | libass/ass_utils.c | 42 | ||||
-rw-r--r-- | libass/ass_utils.h | 4 |
5 files changed, 74 insertions, 61 deletions
diff --git a/libass/ass_bitmap_engine.c b/libass/ass_bitmap_engine.c index 54bb5c7..ef3f6b3 100644 --- a/libass/ass_bitmap_engine.c +++ b/libass/ass_bitmap_engine.c @@ -19,7 +19,11 @@ #include "config.h" #include "ass_compat.h" +#include <stdbool.h> + #include "ass_bitmap_engine.h" +#include "x86/cpuid.h" + #define RASTERIZER_PROTOTYPES(tile_size, suffix) \ FillSolidTileFunc ass_fill_solid_tile ## tile_size ## _ ## suffix; \ @@ -99,3 +103,60 @@ BITMAP_ENGINE(5, 4, 16, avx2) #endif #endif + + +unsigned ass_get_cpu_flags(unsigned mask) +{ + unsigned flags = ASS_CPU_FLAG_NONE; + +#if CONFIG_ASM && ARCH_X86 + + if (!ass_has_cpuid()) + return flags & mask; + + uint32_t eax = 0, ebx, ecx, edx; + ass_get_cpuid(&eax, &ebx, &ecx, &edx); + uint32_t max_leaf = eax; + + bool avx = false; + if (max_leaf >= 1) { + eax = 1; + ass_get_cpuid(&eax, &ebx, &ecx, &edx); + if (edx & (1 << 26)) // SSE2 + flags |= ASS_CPU_FLAG_X86_SSE2; + + if (ecx & (1 << 27) && // OSXSAVE + ecx & (1 << 28)) { // AVX + uint32_t xcr0l, xcr0h; + ass_get_xgetbv(0, &xcr0l, &xcr0h); + if (xcr0l & (1 << 1) && // XSAVE for XMM + xcr0l & (1 << 2)) // XSAVE for YMM + avx = true; + } + } + + if (max_leaf >= 7) { + eax = 7; + ass_get_cpuid(&eax, &ebx, &ecx, &edx); + if (avx && ebx & (1 << 5)) // AVX2 + flags |= ASS_CPU_FLAG_X86_AVX2; + } + +#endif + + return flags & mask; +} + +const BitmapEngine *ass_bitmap_engine_init(unsigned mask) +{ +#if CONFIG_ASM + unsigned flags = ass_get_cpu_flags(mask); +#if ARCH_X86 + if (flags & ASS_CPU_FLAG_X86_AVX2) + return &ass_bitmap_engine_avx2; + if (flags & ASS_CPU_FLAG_X86_SSE2) + return &ass_bitmap_engine_sse2; +#endif +#endif + return &ass_bitmap_engine_c; +} diff --git a/libass/ass_bitmap_engine.h b/libass/ass_bitmap_engine.h index 8628bd9..7175031 100644 --- a/libass/ass_bitmap_engine.h +++ b/libass/ass_bitmap_engine.h @@ -80,8 +80,17 @@ typedef struct { ParamFilterFunc *blur_horz[5], *blur_vert[5]; } BitmapEngine; -extern const BitmapEngine ass_bitmap_engine_c; -extern const BitmapEngine ass_bitmap_engine_sse2; -extern const BitmapEngine ass_bitmap_engine_avx2; +enum { + ASS_CPU_FLAG_NONE = 0x0000, +#if ARCH_X86 + ASS_CPU_FLAG_X86_SSE2 = 0x0001, + ASS_CPU_FLAG_X86_AVX2 = 0x0002, +#endif + ASS_CPU_FLAG_ALL = 0x0FFF, +}; + +unsigned ass_get_cpu_flags(unsigned mask); + +const BitmapEngine *ass_bitmap_engine_init(unsigned mask); #endif /* LIBASS_BITMAP_ENGINE_H */ diff --git a/libass/ass_render.c b/libass/ass_render.c index d0e1447..29a4374 100644 --- a/libass/ass_render.c +++ b/libass/ass_render.c @@ -106,18 +106,7 @@ ASS_Renderer *ass_renderer_init(ASS_Library *library) priv->ftlibrary = ft; // images_root and related stuff is zero-filled in calloc -#if CONFIG_ASM && ARCH_X86 - bool sse2, avx2; - ass_cpu_capabilities(&sse2, &avx2); - if (avx2) - priv->engine = &ass_bitmap_engine_avx2; - else if (sse2) - priv->engine = &ass_bitmap_engine_sse2; - else - priv->engine = &ass_bitmap_engine_c; -#else - priv->engine = &ass_bitmap_engine_c; -#endif + priv->engine = ass_bitmap_engine_init(ASS_CPU_FLAG_ALL); if (!ass_rasterizer_init(priv->engine, &priv->state.rasterizer, RASTERIZER_PRECISION)) goto fail; diff --git a/libass/ass_utils.c b/libass/ass_utils.c index 9075e20..2c7923d 100644 --- a/libass/ass_utils.c +++ b/libass/ass_utils.c @@ -30,48 +30,6 @@ #include "ass_utils.h" #include "ass_string.h" -#if CONFIG_ASM && ARCH_X86 - -#include "x86/cpuid.h" - -void ass_cpu_capabilities(bool *sse2, bool *avx2) -{ - *sse2 = false; - *avx2 = false; - - if (!ass_has_cpuid()) - return; - - uint32_t eax = 0, ebx, ecx, edx; - ass_get_cpuid(&eax, &ebx, &ecx, &edx); - uint32_t max_leaf = eax; - bool avx = false; - - if (max_leaf >= 1) { - eax = 1; - ass_get_cpuid(&eax, &ebx, &ecx, &edx); - if (edx & (1 << 26)) // SSE2 - *sse2 = true; - - if (ecx & (1 << 27) && // OSXSAVE - ecx & (1 << 28)) { // AVX - uint32_t xcr0l, xcr0h; - ass_get_xgetbv(0, &xcr0l, &xcr0h); - if (xcr0l & (1 << 1) && // XSAVE for XMM - xcr0l & (1 << 2)) // XSAVE for YMM - avx = true; - } - } - - if (max_leaf >= 7) { - eax = 7; - ass_get_cpuid(&eax, &ebx, &ecx, &edx); - if (avx && ebx & (1 << 5)) // AVX2 - *avx2 = true; - } -} -#endif // ASM - // Fallbacks #ifndef HAVE_STRDUP char *ass_strdup_fallback(const char *str) diff --git a/libass/ass_utils.h b/libass/ass_utils.h index 7d68326..a1beb76 100644 --- a/libass/ass_utils.h +++ b/libass/ass_utils.h @@ -51,10 +51,6 @@ #define FEATURE_MASK(feat) (((uint32_t) 1) << (feat)) -#if CONFIG_ASM && ARCH_X86 -void ass_cpu_capabilities(bool *sse2, bool *avx2); -#endif - typedef struct { const char *str; size_t len; |