diff options
Diffstat (limited to 'dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch')
| -rw-r--r-- | dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch | 148 |
1 files changed, 0 insertions, 148 deletions
diff --git a/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch b/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch deleted file mode 100644 index 6aab22cd8416..000000000000 --- a/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch +++ /dev/null @@ -1,148 +0,0 @@ -https://mail.kde.org/pipermail/distributions/2024-July/001511.html -https://github.com/xtensor-stack/xsimd/commit/96edf0340492fa9c080f5182b38358ca85baef5e - -From 96edf0340492fa9c080f5182b38358ca85baef5e Mon Sep 17 00:00:00 2001 -From: Dmitry Kazakov <dimula73@gmail.com> -Date: Tue, 28 May 2024 22:21:08 +0200 -Subject: [PATCH] Fix detection of SSE/AVX/AVX512 when they are explicitly - disabled by OS - -Some CPU vulnerability mitigations may disable AVX functionality -on the hardware level via the XCR0 register. We should check that -manually to verify that OS actually allows us to use this feature. - -See https://bugs.kde.org/show_bug.cgi?id=484622 - -Fix #1025 ---- - include/xsimd/config/xsimd_cpuid.hpp | 91 ++++++++++++++++++++++------ - 1 file changed, 72 insertions(+), 19 deletions(-) - -diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp -index f22089bac..6dda3be09 100644 ---- a/include/xsimd/config/xsimd_cpuid.hpp -+++ b/include/xsimd/config/xsimd_cpuid.hpp -@@ -114,6 +114,35 @@ namespace xsimd - #endif - - #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) -+ -+ auto get_xcr0_low = []() noexcept -+ { -+ uint32_t xcr0; -+ -+#if defined(_MSC_VER) && _MSC_VER >= 1400 -+ -+ xcr0 = (uint32_t)_xgetbv(0); -+ -+#elif defined(__GNUC__) -+ -+ __asm__( -+ "xorl %%ecx, %%ecx\n" -+ "xgetbv\n" -+ : "=a"(xcr0) -+ : -+#if defined(__i386__) -+ : "ecx", "edx" -+#else -+ : "rcx", "rdx" -+#endif -+ ); -+ -+#else /* _MSC_VER < 1400 */ -+#error "_MSC_VER < 1400 is not supported" -+#endif /* _MSC_VER && _MSC_VER >= 1400 */ -+ return xcr0; -+ }; -+ - auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept - { - -@@ -148,19 +177,43 @@ namespace xsimd - - get_cpuid(regs1, 0x1); - -- sse2 = regs1[3] >> 26 & 1; -- sse3 = regs1[2] >> 0 & 1; -- ssse3 = regs1[2] >> 9 & 1; -- sse4_1 = regs1[2] >> 19 & 1; -- sse4_2 = regs1[2] >> 20 & 1; -- fma3_sse42 = regs1[2] >> 12 & 1; -+ // OS can explicitly disable the usage of SSE/AVX extensions -+ // by setting an appropriate flag in CR0 register -+ // -+ // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html -+ -+ unsigned sse_state_os_enabled = 1; -+ unsigned avx_state_os_enabled = 1; -+ unsigned avx512_state_os_enabled = 1; -+ -+ // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit -+ // 18] to enable XSETBV/XGETBV instructions to access XCR0 and -+ // to support processor extended state management using -+ // XSAVE/XRSTOR. -+ bool osxsave = regs1[2] >> 27 & 1; -+ if (osxsave) -+ { -+ -+ uint32_t xcr0 = get_xcr0_low(); -+ -+ sse_state_os_enabled = xcr0 >> 1 & 1; -+ avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled; -+ avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled; -+ } -+ -+ sse2 = regs1[3] >> 26 & sse_state_os_enabled; -+ sse3 = regs1[2] >> 0 & sse_state_os_enabled; -+ ssse3 = regs1[2] >> 9 & sse_state_os_enabled; -+ sse4_1 = regs1[2] >> 19 & sse_state_os_enabled; -+ sse4_2 = regs1[2] >> 20 & sse_state_os_enabled; -+ fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled; - -- avx = regs1[2] >> 28 & 1; -+ avx = regs1[2] >> 28 & avx_state_os_enabled; - fma3_avx = avx && fma3_sse42; - - int regs8[4]; - get_cpuid(regs8, 0x80000001); -- fma4 = regs8[2] >> 16 & 1; -+ fma4 = regs8[2] >> 16 & avx_state_os_enabled; - - // sse4a = regs[2] >> 6 & 1; - -@@ -168,23 +221,23 @@ namespace xsimd - - int regs7[4]; - get_cpuid(regs7, 0x7); -- avx2 = regs7[1] >> 5 & 1; -+ avx2 = regs7[1] >> 5 & avx_state_os_enabled; - - int regs7a[4]; - get_cpuid(regs7a, 0x7, 0x1); -- avxvnni = regs7a[0] >> 4 & 1; -+ avxvnni = regs7a[0] >> 4 & avx_state_os_enabled; - - fma3_avx2 = avx2 && fma3_sse42; - -- avx512f = regs7[1] >> 16 & 1; -- avx512cd = regs7[1] >> 28 & 1; -- avx512dq = regs7[1] >> 17 & 1; -- avx512bw = regs7[1] >> 30 & 1; -- avx512er = regs7[1] >> 27 & 1; -- avx512pf = regs7[1] >> 26 & 1; -- avx512ifma = regs7[1] >> 21 & 1; -- avx512vbmi = regs7[2] >> 1 & 1; -- avx512vnni_bw = regs7[2] >> 11 & 1; -+ avx512f = regs7[1] >> 16 & avx512_state_os_enabled; -+ avx512cd = regs7[1] >> 28 & avx512_state_os_enabled; -+ avx512dq = regs7[1] >> 17 & avx512_state_os_enabled; -+ avx512bw = regs7[1] >> 30 & avx512_state_os_enabled; -+ avx512er = regs7[1] >> 27 & avx512_state_os_enabled; -+ avx512pf = regs7[1] >> 26 & avx512_state_os_enabled; -+ avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled; -+ avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled; -+ avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled; - avx512vnni_vbmi = avx512vbmi && avx512vnni_bw; - #endif - } - |
