diff options
author | Oneric <oneric@oneric.stub> | 2022-03-16 19:50:43 +0100 |
---|---|---|
committer | Oneric <oneric@oneric.stub> | 2022-03-17 23:58:33 +0100 |
commit | 9608c8c838e1046601c27f6473e23654fedf1716 (patch) | |
tree | b1d4c88bc34b9db0899743af58cd3628a22b89b1 | |
parent | 5f0e8450f834894b2745238e3d32ff4878710ec8 (diff) | |
download | libass-9608c8c838e1046601c27f6473e23654fedf1716.tar.bz2 libass-9608c8c838e1046601c27f6473e23654fedf1716.tar.xz |
asm/x86: check highest supported leaf for cpuid
If a higher then supported leaf is requested, at least Intel processors
will silently return the data of the highest supported leaf,
invalidating the following feature tests.
Reported in: https://github.com/libass/libass/pull/603
-rw-r--r-- | libass/ass_render.c | 6 | ||||
-rw-r--r-- | libass/ass_utils.c | 50 | ||||
-rw-r--r-- | libass/ass_utils.h | 4 |
3 files changed, 33 insertions, 27 deletions
diff --git a/libass/ass_render.c b/libass/ass_render.c index 40b6d5d..cf22bec 100644 --- a/libass/ass_render.c +++ b/libass/ass_render.c @@ -75,9 +75,11 @@ ASS_Renderer *ass_renderer_init(ASS_Library *library) // images_root and related stuff is zero-filled in calloc #if CONFIG_ASM && ARCH_X86 - if (has_avx2()) + bool sse2, avx2; + ass_cpu_capabilities(&sse2, &avx2); + if (avx2) priv->engine = &ass_bitmap_engine_avx2; - else if (has_sse2()) + else if (sse2) priv->engine = &ass_bitmap_engine_sse2; else priv->engine = &ass_bitmap_engine_c; diff --git a/libass/ass_utils.c b/libass/ass_utils.c index 3248cf3..0acc891 100644 --- a/libass/ass_utils.c +++ b/libass/ass_utils.c @@ -35,33 +35,39 @@ #include "x86/cpuid.h" -int has_sse2(void) +void ass_cpu_capabilities(bool *sse2, bool *avx2) { - uint32_t eax = 1, ebx, ecx, edx; - ass_get_cpuid(&eax, &ebx, &ecx, &edx); - return (edx >> 26) & 0x1; -} + *sse2 = false; + *avx2 = false; -int has_avx(void) -{ - uint32_t eax = 1, ebx, ecx, edx; + uint32_t eax = 0, ebx, ecx, edx; ass_get_cpuid(&eax, &ebx, &ecx, &edx); - if (!(ecx & (1 << 27))) // not OSXSAVE - return 0; - uint32_t misc = ecx; - ass_get_xgetbv(0, &eax, &edx); - if ((eax & 0x6) != 0x6) - return 0; - return (misc >> 28) & 0x1; -} + uint32_t max_leaf = eax; + bool avx = false; + + if (max_leaf >= 1) { + eax = 1; + ass_get_cpuid(&eax, &ebx, &ecx, &edx); + if (edx & (1 << 26)) // SSE2 + *sse2 = true; + + if (ecx & (1 << 27) && // OSXSAVE + ecx & (1 << 28)) { // AVX + uint32_t xcr0l, xcr0h; + ass_get_xgetbv(0, &xcr0l, &xcr0h); + if (xcr0l & (1 << 1) && // XSAVE for XMM + xcr0l & (1 << 2)) // XSAVE for YMM + avx = true; + } + } -int has_avx2(void) -{ - uint32_t eax = 7, ebx, ecx, edx; - ass_get_cpuid(&eax, &ebx, &ecx, &edx); - return (ebx >> 5) & has_avx(); + if (max_leaf >= 7) { + eax = 7; + ass_get_cpuid(&eax, &ebx, &ecx, &edx); + if (avx && ebx & (1 << 5)) // AVX2 + *avx2 = true; + } } - #endif // ASM // Fallbacks diff --git a/libass/ass_utils.h b/libass/ass_utils.h index 9d6e166..7fc922e 100644 --- a/libass/ass_utils.h +++ b/libass/ass_utils.h @@ -51,9 +51,7 @@ #define FEATURE_MASK(feat) (((uint32_t) 1) << (feat)) #if CONFIG_ASM && ARCH_X86 -int has_sse2(void); -int has_avx(void); -int has_avx2(void); +void ass_cpu_capabilities(bool *sse2, bool *avx2); #endif typedef struct { |