summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOneric <oneric@oneric.stub>2022-03-16 19:50:43 +0100
committerOneric <oneric@oneric.stub>2022-03-17 23:58:33 +0100
commit9608c8c838e1046601c27f6473e23654fedf1716 (patch)
treeb1d4c88bc34b9db0899743af58cd3628a22b89b1
parent5f0e8450f834894b2745238e3d32ff4878710ec8 (diff)
downloadlibass-9608c8c838e1046601c27f6473e23654fedf1716.tar.bz2
libass-9608c8c838e1046601c27f6473e23654fedf1716.tar.xz
asm/x86: check highest supported leaf for cpuid
If a higher then supported leaf is requested, at least Intel processors will silently return the data of the highest supported leaf, invalidating the following feature tests. Reported in: https://github.com/libass/libass/pull/603
-rw-r--r--libass/ass_render.c6
-rw-r--r--libass/ass_utils.c50
-rw-r--r--libass/ass_utils.h4
3 files changed, 33 insertions, 27 deletions
diff --git a/libass/ass_render.c b/libass/ass_render.c
index 40b6d5d..cf22bec 100644
--- a/libass/ass_render.c
+++ b/libass/ass_render.c
@@ -75,9 +75,11 @@ ASS_Renderer *ass_renderer_init(ASS_Library *library)
// images_root and related stuff is zero-filled in calloc
#if CONFIG_ASM && ARCH_X86
- if (has_avx2())
+ bool sse2, avx2;
+ ass_cpu_capabilities(&sse2, &avx2);
+ if (avx2)
priv->engine = &ass_bitmap_engine_avx2;
- else if (has_sse2())
+ else if (sse2)
priv->engine = &ass_bitmap_engine_sse2;
else
priv->engine = &ass_bitmap_engine_c;
diff --git a/libass/ass_utils.c b/libass/ass_utils.c
index 3248cf3..0acc891 100644
--- a/libass/ass_utils.c
+++ b/libass/ass_utils.c
@@ -35,33 +35,39 @@
#include "x86/cpuid.h"
-int has_sse2(void)
+void ass_cpu_capabilities(bool *sse2, bool *avx2)
{
- uint32_t eax = 1, ebx, ecx, edx;
- ass_get_cpuid(&eax, &ebx, &ecx, &edx);
- return (edx >> 26) & 0x1;
-}
+ *sse2 = false;
+ *avx2 = false;
-int has_avx(void)
-{
- uint32_t eax = 1, ebx, ecx, edx;
+ uint32_t eax = 0, ebx, ecx, edx;
ass_get_cpuid(&eax, &ebx, &ecx, &edx);
- if (!(ecx & (1 << 27))) // not OSXSAVE
- return 0;
- uint32_t misc = ecx;
- ass_get_xgetbv(0, &eax, &edx);
- if ((eax & 0x6) != 0x6)
- return 0;
- return (misc >> 28) & 0x1;
-}
+ uint32_t max_leaf = eax;
+ bool avx = false;
+
+ if (max_leaf >= 1) {
+ eax = 1;
+ ass_get_cpuid(&eax, &ebx, &ecx, &edx);
+ if (edx & (1 << 26)) // SSE2
+ *sse2 = true;
+
+ if (ecx & (1 << 27) && // OSXSAVE
+ ecx & (1 << 28)) { // AVX
+ uint32_t xcr0l, xcr0h;
+ ass_get_xgetbv(0, &xcr0l, &xcr0h);
+ if (xcr0l & (1 << 1) && // XSAVE for XMM
+ xcr0l & (1 << 2)) // XSAVE for YMM
+ avx = true;
+ }
+ }
-int has_avx2(void)
-{
- uint32_t eax = 7, ebx, ecx, edx;
- ass_get_cpuid(&eax, &ebx, &ecx, &edx);
- return (ebx >> 5) & has_avx();
+ if (max_leaf >= 7) {
+ eax = 7;
+ ass_get_cpuid(&eax, &ebx, &ecx, &edx);
+ if (avx && ebx & (1 << 5)) // AVX2
+ *avx2 = true;
+ }
}
-
#endif // ASM
// Fallbacks
diff --git a/libass/ass_utils.h b/libass/ass_utils.h
index 9d6e166..7fc922e 100644
--- a/libass/ass_utils.h
+++ b/libass/ass_utils.h
@@ -51,9 +51,7 @@
#define FEATURE_MASK(feat) (((uint32_t) 1) << (feat))
#if CONFIG_ASM && ARCH_X86
-int has_sse2(void);
-int has_avx(void);
-int has_avx2(void);
+void ass_cpu_capabilities(bool *sse2, bool *avx2);
#endif
typedef struct {