diff options
| author | Sam James <sam@gentoo.org> | 2024-07-13 20:04:00 +0100 |
|---|---|---|
| committer | Sam James <sam@gentoo.org> | 2024-07-13 20:05:07 +0100 |
| commit | 6e14169d7ff552b61714ecb212f11cec0bd7a3f0 (patch) | |
| tree | 3271687ea12ec0d920fe7391353bb02e418c4c4f /dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch | |
| parent | 7e765ff32ffbeb961cfc0732a456911ef540a05d (diff) | |
| download | gentoo-6e14169d7ff552b61714ecb212f11cec0bd7a3f0.tar.gz gentoo-6e14169d7ff552b61714ecb212f11cec0bd7a3f0.tar.bz2 gentoo-6e14169d7ff552b61714ecb212f11cec0bd7a3f0.zip | |
dev-cpp/xsimd: add 13.0.0, including backports needed by media-gfx/krita
See https://mail.kde.org/pipermail/distributions/2024-July/001511.html.
Signed-off-by: Sam James <sam@gentoo.org>
Diffstat (limited to 'dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch')
| -rw-r--r-- | dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch b/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch new file mode 100644 index 000000000000..6aab22cd8416 --- /dev/null +++ b/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch @@ -0,0 +1,148 @@ +https://mail.kde.org/pipermail/distributions/2024-July/001511.html +https://github.com/xtensor-stack/xsimd/commit/96edf0340492fa9c080f5182b38358ca85baef5e + +From 96edf0340492fa9c080f5182b38358ca85baef5e Mon Sep 17 00:00:00 2001 +From: Dmitry Kazakov <dimula73@gmail.com> +Date: Tue, 28 May 2024 22:21:08 +0200 +Subject: [PATCH] Fix detection of SSE/AVX/AVX512 when they are explicitly + disabled by OS + +Some CPU vulnerability mitigations may disable AVX functionality +on the hardware level via the XCR0 register. We should check that +manually to verify that OS actually allows us to use this feature. + +See https://bugs.kde.org/show_bug.cgi?id=484622 + +Fix #1025 +--- + include/xsimd/config/xsimd_cpuid.hpp | 91 ++++++++++++++++++++++------ + 1 file changed, 72 insertions(+), 19 deletions(-) + +diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp +index f22089bac..6dda3be09 100644 +--- a/include/xsimd/config/xsimd_cpuid.hpp ++++ b/include/xsimd/config/xsimd_cpuid.hpp +@@ -114,6 +114,35 @@ namespace xsimd + #endif + + #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) ++ ++ auto get_xcr0_low = []() noexcept ++ { ++ uint32_t xcr0; ++ ++#if defined(_MSC_VER) && _MSC_VER >= 1400 ++ ++ xcr0 = (uint32_t)_xgetbv(0); ++ ++#elif defined(__GNUC__) ++ ++ __asm__( ++ "xorl %%ecx, %%ecx\n" ++ "xgetbv\n" ++ : "=a"(xcr0) ++ : ++#if defined(__i386__) ++ : "ecx", "edx" ++#else ++ : "rcx", "rdx" ++#endif ++ ); ++ ++#else /* _MSC_VER < 1400 */ ++#error "_MSC_VER < 1400 is not supported" ++#endif /* _MSC_VER && _MSC_VER >= 1400 */ ++ return xcr0; ++ }; ++ + auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept + { + +@@ -148,19 +177,43 @@ namespace xsimd + + get_cpuid(regs1, 0x1); + +- sse2 = regs1[3] >> 26 & 1; +- sse3 = regs1[2] >> 0 & 1; +- ssse3 = regs1[2] >> 9 & 1; +- sse4_1 = regs1[2] >> 19 & 1; +- sse4_2 = regs1[2] >> 20 & 1; +- fma3_sse42 = regs1[2] >> 12 & 1; ++ // OS can explicitly disable the usage of SSE/AVX extensions ++ // by setting an appropriate flag in CR0 register ++ // ++ // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html ++ ++ unsigned sse_state_os_enabled = 1; ++ unsigned avx_state_os_enabled = 1; ++ unsigned avx512_state_os_enabled = 1; ++ ++ // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit ++ // 18] to enable XSETBV/XGETBV instructions to access XCR0 and ++ // to support processor extended state management using ++ // XSAVE/XRSTOR. ++ bool osxsave = regs1[2] >> 27 & 1; ++ if (osxsave) ++ { ++ ++ uint32_t xcr0 = get_xcr0_low(); ++ ++ sse_state_os_enabled = xcr0 >> 1 & 1; ++ avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled; ++ avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled; ++ } ++ ++ sse2 = regs1[3] >> 26 & sse_state_os_enabled; ++ sse3 = regs1[2] >> 0 & sse_state_os_enabled; ++ ssse3 = regs1[2] >> 9 & sse_state_os_enabled; ++ sse4_1 = regs1[2] >> 19 & sse_state_os_enabled; ++ sse4_2 = regs1[2] >> 20 & sse_state_os_enabled; ++ fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled; + +- avx = regs1[2] >> 28 & 1; ++ avx = regs1[2] >> 28 & avx_state_os_enabled; + fma3_avx = avx && fma3_sse42; + + int regs8[4]; + get_cpuid(regs8, 0x80000001); +- fma4 = regs8[2] >> 16 & 1; ++ fma4 = regs8[2] >> 16 & avx_state_os_enabled; + + // sse4a = regs[2] >> 6 & 1; + +@@ -168,23 +221,23 @@ namespace xsimd + + int regs7[4]; + get_cpuid(regs7, 0x7); +- avx2 = regs7[1] >> 5 & 1; ++ avx2 = regs7[1] >> 5 & avx_state_os_enabled; + + int regs7a[4]; + get_cpuid(regs7a, 0x7, 0x1); +- avxvnni = regs7a[0] >> 4 & 1; ++ avxvnni = regs7a[0] >> 4 & avx_state_os_enabled; + + fma3_avx2 = avx2 && fma3_sse42; + +- avx512f = regs7[1] >> 16 & 1; +- avx512cd = regs7[1] >> 28 & 1; +- avx512dq = regs7[1] >> 17 & 1; +- avx512bw = regs7[1] >> 30 & 1; +- avx512er = regs7[1] >> 27 & 1; +- avx512pf = regs7[1] >> 26 & 1; +- avx512ifma = regs7[1] >> 21 & 1; +- avx512vbmi = regs7[2] >> 1 & 1; +- avx512vnni_bw = regs7[2] >> 11 & 1; ++ avx512f = regs7[1] >> 16 & avx512_state_os_enabled; ++ avx512cd = regs7[1] >> 28 & avx512_state_os_enabled; ++ avx512dq = regs7[1] >> 17 & avx512_state_os_enabled; ++ avx512bw = regs7[1] >> 30 & avx512_state_os_enabled; ++ avx512er = regs7[1] >> 27 & avx512_state_os_enabled; ++ avx512pf = regs7[1] >> 26 & avx512_state_os_enabled; ++ avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled; ++ avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled; ++ avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled; + avx512vnni_vbmi = avx512vbmi && avx512vnni_bw; + #endif + } + |
