diff options
Diffstat (limited to 'dev-cpp/xsimd/files')
| -rw-r--r-- | dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch | 148 | ||||
| -rw-r--r-- | dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch | 88 |
2 files changed, 0 insertions, 236 deletions
diff --git a/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch b/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch deleted file mode 100644 index 6aab22cd8416..000000000000 --- a/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch +++ /dev/null @@ -1,148 +0,0 @@ -https://mail.kde.org/pipermail/distributions/2024-July/001511.html -https://github.com/xtensor-stack/xsimd/commit/96edf0340492fa9c080f5182b38358ca85baef5e - -From 96edf0340492fa9c080f5182b38358ca85baef5e Mon Sep 17 00:00:00 2001 -From: Dmitry Kazakov <dimula73@gmail.com> -Date: Tue, 28 May 2024 22:21:08 +0200 -Subject: [PATCH] Fix detection of SSE/AVX/AVX512 when they are explicitly - disabled by OS - -Some CPU vulnerability mitigations may disable AVX functionality -on the hardware level via the XCR0 register. We should check that -manually to verify that OS actually allows us to use this feature. - -See https://bugs.kde.org/show_bug.cgi?id=484622 - -Fix #1025 ---- - include/xsimd/config/xsimd_cpuid.hpp | 91 ++++++++++++++++++++++------ - 1 file changed, 72 insertions(+), 19 deletions(-) - -diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp -index f22089bac..6dda3be09 100644 ---- a/include/xsimd/config/xsimd_cpuid.hpp -+++ b/include/xsimd/config/xsimd_cpuid.hpp -@@ -114,6 +114,35 @@ namespace xsimd - #endif - - #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) -+ -+ auto get_xcr0_low = []() noexcept -+ { -+ uint32_t xcr0; -+ -+#if defined(_MSC_VER) && _MSC_VER >= 1400 -+ -+ xcr0 = (uint32_t)_xgetbv(0); -+ -+#elif defined(__GNUC__) -+ -+ __asm__( -+ "xorl %%ecx, %%ecx\n" -+ "xgetbv\n" -+ : "=a"(xcr0) -+ : -+#if defined(__i386__) -+ : "ecx", "edx" -+#else -+ : "rcx", "rdx" -+#endif -+ ); -+ -+#else /* _MSC_VER < 1400 */ -+#error "_MSC_VER < 1400 is not supported" -+#endif /* _MSC_VER && _MSC_VER >= 1400 */ -+ return xcr0; -+ }; -+ - auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept - { - -@@ -148,19 +177,43 @@ namespace xsimd - - get_cpuid(regs1, 0x1); - -- sse2 = regs1[3] >> 26 & 1; -- sse3 = regs1[2] >> 0 & 1; -- ssse3 = regs1[2] >> 9 & 1; -- sse4_1 = regs1[2] >> 19 & 1; -- sse4_2 = regs1[2] >> 20 & 1; -- fma3_sse42 = regs1[2] >> 12 & 1; -+ // OS can explicitly disable the usage of SSE/AVX extensions -+ // by setting an appropriate flag in CR0 register -+ // -+ // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html -+ -+ unsigned sse_state_os_enabled = 1; -+ unsigned avx_state_os_enabled = 1; -+ unsigned avx512_state_os_enabled = 1; -+ -+ // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit -+ // 18] to enable XSETBV/XGETBV instructions to access XCR0 and -+ // to support processor extended state management using -+ // XSAVE/XRSTOR. -+ bool osxsave = regs1[2] >> 27 & 1; -+ if (osxsave) -+ { -+ -+ uint32_t xcr0 = get_xcr0_low(); -+ -+ sse_state_os_enabled = xcr0 >> 1 & 1; -+ avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled; -+ avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled; -+ } -+ -+ sse2 = regs1[3] >> 26 & sse_state_os_enabled; -+ sse3 = regs1[2] >> 0 & sse_state_os_enabled; -+ ssse3 = regs1[2] >> 9 & sse_state_os_enabled; -+ sse4_1 = regs1[2] >> 19 & sse_state_os_enabled; -+ sse4_2 = regs1[2] >> 20 & sse_state_os_enabled; -+ fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled; - -- avx = regs1[2] >> 28 & 1; -+ avx = regs1[2] >> 28 & avx_state_os_enabled; - fma3_avx = avx && fma3_sse42; - - int regs8[4]; - get_cpuid(regs8, 0x80000001); -- fma4 = regs8[2] >> 16 & 1; -+ fma4 = regs8[2] >> 16 & avx_state_os_enabled; - - // sse4a = regs[2] >> 6 & 1; - -@@ -168,23 +221,23 @@ namespace xsimd - - int regs7[4]; - get_cpuid(regs7, 0x7); -- avx2 = regs7[1] >> 5 & 1; -+ avx2 = regs7[1] >> 5 & avx_state_os_enabled; - - int regs7a[4]; - get_cpuid(regs7a, 0x7, 0x1); -- avxvnni = regs7a[0] >> 4 & 1; -+ avxvnni = regs7a[0] >> 4 & avx_state_os_enabled; - - fma3_avx2 = avx2 && fma3_sse42; - -- avx512f = regs7[1] >> 16 & 1; -- avx512cd = regs7[1] >> 28 & 1; -- avx512dq = regs7[1] >> 17 & 1; -- avx512bw = regs7[1] >> 30 & 1; -- avx512er = regs7[1] >> 27 & 1; -- avx512pf = regs7[1] >> 26 & 1; -- avx512ifma = regs7[1] >> 21 & 1; -- avx512vbmi = regs7[2] >> 1 & 1; -- avx512vnni_bw = regs7[2] >> 11 & 1; -+ avx512f = regs7[1] >> 16 & avx512_state_os_enabled; -+ avx512cd = regs7[1] >> 28 & avx512_state_os_enabled; -+ avx512dq = regs7[1] >> 17 & avx512_state_os_enabled; -+ avx512bw = regs7[1] >> 30 & avx512_state_os_enabled; -+ avx512er = regs7[1] >> 27 & avx512_state_os_enabled; -+ avx512pf = regs7[1] >> 26 & avx512_state_os_enabled; -+ avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled; -+ avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled; -+ avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled; - avx512vnni_vbmi = avx512vbmi && avx512vnni_bw; - #endif - } - diff --git a/dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch b/dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch deleted file mode 100644 index 38ea56d1baa2..000000000000 --- a/dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch +++ /dev/null @@ -1,88 +0,0 @@ -https://mail.kde.org/pipermail/distributions/2024-July/001511.html -https://github.com/xtensor-stack/xsimd/commit/80a59235e3ffa51659aaa06f002bfd088b77023c - -From 80a59235e3ffa51659aaa06f002bfd088b77023c Mon Sep 17 00:00:00 2001 -From: Dmitry Kazakov <dimula73@gmail.com> -Date: Fri, 14 Jun 2024 10:19:55 +0200 -Subject: [PATCH] Fix xsimd::available_architectures().has() for sve and rvv - archs - -Ideally the patch CPU detection code should also check if the length -of SVE and RVV is actually supported by the current CPU implementation -(i.e. ZCR_Elx.LEN register for SVE and something else for RVV), but -I don't have such CPUs/emulators handy, so I cannot add such checks. - -Given that xsimd::available_architectures().has() is a new feature -of XSIMD13 and the length check has never been present in XSIMD, this -bug is not a regression at least. - -The patch also adds a unittest that reproduces the error the patch fixes ---- - include/xsimd/config/xsimd_cpuid.hpp | 12 ++++++++++-- - test/test_arch.cpp | 15 +++++++++++++++ - 2 files changed, 25 insertions(+), 2 deletions(-) - -diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp -index 6dda3be09..8021fceb8 100644 ---- a/include/xsimd/config/xsimd_cpuid.hpp -+++ b/include/xsimd/config/xsimd_cpuid.hpp -@@ -42,6 +42,10 @@ namespace xsimd - #define ARCH_FIELD_EX(arch, field_name) \ - unsigned field_name; \ - XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; } -+ -+#define ARCH_FIELD_EX_REUSE(arch, field_name) \ -+ XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; } -+ - #define ARCH_FIELD(name) ARCH_FIELD_EX(name, name) - - ARCH_FIELD(sse2) -@@ -72,8 +76,12 @@ namespace xsimd - ARCH_FIELD(neon) - ARCH_FIELD(neon64) - ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64) -- ARCH_FIELD(sve) -- ARCH_FIELD(rvv) -+ ARCH_FIELD_EX(detail::sve<512>, sve) -+ ARCH_FIELD_EX_REUSE(detail::sve<256>, sve) -+ ARCH_FIELD_EX_REUSE(detail::sve<128>, sve) -+ ARCH_FIELD_EX(detail::rvv<512>, rvv) -+ ARCH_FIELD_EX_REUSE(detail::rvv<256>, rvv) -+ ARCH_FIELD_EX_REUSE(detail::rvv<128>, rvv) - ARCH_FIELD(wasm) - - #undef ARCH_FIELD -diff --git a/test/test_arch.cpp b/test/test_arch.cpp -index b42073358..f1f50d546 100644 ---- a/test/test_arch.cpp -+++ b/test/test_arch.cpp -@@ -38,6 +38,16 @@ struct check_supported - } - }; - -+struct check_cpu_has_intruction_set -+{ -+ template <class Arch> -+ void operator()(Arch arch) const -+ { -+ static_assert(std::is_same<decltype(xsimd::available_architectures().has(arch)), bool>::value, -+ "cannot test instruction set availability on CPU"); -+ } -+}; -+ - struct check_available - { - template <class Arch> -@@ -71,6 +81,11 @@ TEST_CASE("[multi arch support]") - xsimd::supported_architectures::for_each(check_supported {}); - } - -+ SUBCASE("xsimd::available_architectures::has") -+ { -+ xsimd::all_architectures::for_each(check_cpu_has_intruction_set {}); -+ } -+ - SUBCASE("xsimd::default_arch::name") - { - constexpr char const* name = xsimd::default_arch::name(); - |
