summaryrefslogtreecommitdiff
path: root/dev-cpp/xsimd/files
diff options
context:
space:
mode:
Diffstat (limited to 'dev-cpp/xsimd/files')
-rw-r--r--dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch148
-rw-r--r--dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch88
2 files changed, 0 insertions, 236 deletions
diff --git a/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch b/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch
deleted file mode 100644
index 6aab22cd8416..000000000000
--- a/dev-cpp/xsimd/files/xsimd-13.0.0-detection-simd-with-mitigations.patch
+++ /dev/null
@@ -1,148 +0,0 @@
-https://mail.kde.org/pipermail/distributions/2024-July/001511.html
-https://github.com/xtensor-stack/xsimd/commit/96edf0340492fa9c080f5182b38358ca85baef5e
-
-From 96edf0340492fa9c080f5182b38358ca85baef5e Mon Sep 17 00:00:00 2001
-From: Dmitry Kazakov <dimula73@gmail.com>
-Date: Tue, 28 May 2024 22:21:08 +0200
-Subject: [PATCH] Fix detection of SSE/AVX/AVX512 when they are explicitly
- disabled by OS
-
-Some CPU vulnerability mitigations may disable AVX functionality
-on the hardware level via the XCR0 register. We should check that
-manually to verify that OS actually allows us to use this feature.
-
-See https://bugs.kde.org/show_bug.cgi?id=484622
-
-Fix #1025
----
- include/xsimd/config/xsimd_cpuid.hpp | 91 ++++++++++++++++++++++------
- 1 file changed, 72 insertions(+), 19 deletions(-)
-
-diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp
-index f22089bac..6dda3be09 100644
---- a/include/xsimd/config/xsimd_cpuid.hpp
-+++ b/include/xsimd/config/xsimd_cpuid.hpp
-@@ -114,6 +114,35 @@ namespace xsimd
- #endif
-
- #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
-+
-+ auto get_xcr0_low = []() noexcept
-+ {
-+ uint32_t xcr0;
-+
-+#if defined(_MSC_VER) && _MSC_VER >= 1400
-+
-+ xcr0 = (uint32_t)_xgetbv(0);
-+
-+#elif defined(__GNUC__)
-+
-+ __asm__(
-+ "xorl %%ecx, %%ecx\n"
-+ "xgetbv\n"
-+ : "=a"(xcr0)
-+ :
-+#if defined(__i386__)
-+ : "ecx", "edx"
-+#else
-+ : "rcx", "rdx"
-+#endif
-+ );
-+
-+#else /* _MSC_VER < 1400 */
-+#error "_MSC_VER < 1400 is not supported"
-+#endif /* _MSC_VER && _MSC_VER >= 1400 */
-+ return xcr0;
-+ };
-+
- auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept
- {
-
-@@ -148,19 +177,43 @@ namespace xsimd
-
- get_cpuid(regs1, 0x1);
-
-- sse2 = regs1[3] >> 26 & 1;
-- sse3 = regs1[2] >> 0 & 1;
-- ssse3 = regs1[2] >> 9 & 1;
-- sse4_1 = regs1[2] >> 19 & 1;
-- sse4_2 = regs1[2] >> 20 & 1;
-- fma3_sse42 = regs1[2] >> 12 & 1;
-+ // OS can explicitly disable the usage of SSE/AVX extensions
-+ // by setting an appropriate flag in CR0 register
-+ //
-+ // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html
-+
-+ unsigned sse_state_os_enabled = 1;
-+ unsigned avx_state_os_enabled = 1;
-+ unsigned avx512_state_os_enabled = 1;
-+
-+ // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit
-+ // 18] to enable XSETBV/XGETBV instructions to access XCR0 and
-+ // to support processor extended state management using
-+ // XSAVE/XRSTOR.
-+ bool osxsave = regs1[2] >> 27 & 1;
-+ if (osxsave)
-+ {
-+
-+ uint32_t xcr0 = get_xcr0_low();
-+
-+ sse_state_os_enabled = xcr0 >> 1 & 1;
-+ avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled;
-+ avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled;
-+ }
-+
-+ sse2 = regs1[3] >> 26 & sse_state_os_enabled;
-+ sse3 = regs1[2] >> 0 & sse_state_os_enabled;
-+ ssse3 = regs1[2] >> 9 & sse_state_os_enabled;
-+ sse4_1 = regs1[2] >> 19 & sse_state_os_enabled;
-+ sse4_2 = regs1[2] >> 20 & sse_state_os_enabled;
-+ fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled;
-
-- avx = regs1[2] >> 28 & 1;
-+ avx = regs1[2] >> 28 & avx_state_os_enabled;
- fma3_avx = avx && fma3_sse42;
-
- int regs8[4];
- get_cpuid(regs8, 0x80000001);
-- fma4 = regs8[2] >> 16 & 1;
-+ fma4 = regs8[2] >> 16 & avx_state_os_enabled;
-
- // sse4a = regs[2] >> 6 & 1;
-
-@@ -168,23 +221,23 @@ namespace xsimd
-
- int regs7[4];
- get_cpuid(regs7, 0x7);
-- avx2 = regs7[1] >> 5 & 1;
-+ avx2 = regs7[1] >> 5 & avx_state_os_enabled;
-
- int regs7a[4];
- get_cpuid(regs7a, 0x7, 0x1);
-- avxvnni = regs7a[0] >> 4 & 1;
-+ avxvnni = regs7a[0] >> 4 & avx_state_os_enabled;
-
- fma3_avx2 = avx2 && fma3_sse42;
-
-- avx512f = regs7[1] >> 16 & 1;
-- avx512cd = regs7[1] >> 28 & 1;
-- avx512dq = regs7[1] >> 17 & 1;
-- avx512bw = regs7[1] >> 30 & 1;
-- avx512er = regs7[1] >> 27 & 1;
-- avx512pf = regs7[1] >> 26 & 1;
-- avx512ifma = regs7[1] >> 21 & 1;
-- avx512vbmi = regs7[2] >> 1 & 1;
-- avx512vnni_bw = regs7[2] >> 11 & 1;
-+ avx512f = regs7[1] >> 16 & avx512_state_os_enabled;
-+ avx512cd = regs7[1] >> 28 & avx512_state_os_enabled;
-+ avx512dq = regs7[1] >> 17 & avx512_state_os_enabled;
-+ avx512bw = regs7[1] >> 30 & avx512_state_os_enabled;
-+ avx512er = regs7[1] >> 27 & avx512_state_os_enabled;
-+ avx512pf = regs7[1] >> 26 & avx512_state_os_enabled;
-+ avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled;
-+ avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled;
-+ avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled;
- avx512vnni_vbmi = avx512vbmi && avx512vnni_bw;
- #endif
- }
-
diff --git a/dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch b/dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch
deleted file mode 100644
index 38ea56d1baa2..000000000000
--- a/dev-cpp/xsimd/files/xsimd-13.0.0-sve-rvv.patch
+++ /dev/null
@@ -1,88 +0,0 @@
-https://mail.kde.org/pipermail/distributions/2024-July/001511.html
-https://github.com/xtensor-stack/xsimd/commit/80a59235e3ffa51659aaa06f002bfd088b77023c
-
-From 80a59235e3ffa51659aaa06f002bfd088b77023c Mon Sep 17 00:00:00 2001
-From: Dmitry Kazakov <dimula73@gmail.com>
-Date: Fri, 14 Jun 2024 10:19:55 +0200
-Subject: [PATCH] Fix xsimd::available_architectures().has() for sve and rvv
- archs
-
-Ideally the patch CPU detection code should also check if the length
-of SVE and RVV is actually supported by the current CPU implementation
-(i.e. ZCR_Elx.LEN register for SVE and something else for RVV), but
-I don't have such CPUs/emulators handy, so I cannot add such checks.
-
-Given that xsimd::available_architectures().has() is a new feature
-of XSIMD13 and the length check has never been present in XSIMD, this
-bug is not a regression at least.
-
-The patch also adds a unittest that reproduces the error the patch fixes
----
- include/xsimd/config/xsimd_cpuid.hpp | 12 ++++++++++--
- test/test_arch.cpp | 15 +++++++++++++++
- 2 files changed, 25 insertions(+), 2 deletions(-)
-
-diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp
-index 6dda3be09..8021fceb8 100644
---- a/include/xsimd/config/xsimd_cpuid.hpp
-+++ b/include/xsimd/config/xsimd_cpuid.hpp
-@@ -42,6 +42,10 @@ namespace xsimd
- #define ARCH_FIELD_EX(arch, field_name) \
- unsigned field_name; \
- XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; }
-+
-+#define ARCH_FIELD_EX_REUSE(arch, field_name) \
-+ XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; }
-+
- #define ARCH_FIELD(name) ARCH_FIELD_EX(name, name)
-
- ARCH_FIELD(sse2)
-@@ -72,8 +76,12 @@ namespace xsimd
- ARCH_FIELD(neon)
- ARCH_FIELD(neon64)
- ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64)
-- ARCH_FIELD(sve)
-- ARCH_FIELD(rvv)
-+ ARCH_FIELD_EX(detail::sve<512>, sve)
-+ ARCH_FIELD_EX_REUSE(detail::sve<256>, sve)
-+ ARCH_FIELD_EX_REUSE(detail::sve<128>, sve)
-+ ARCH_FIELD_EX(detail::rvv<512>, rvv)
-+ ARCH_FIELD_EX_REUSE(detail::rvv<256>, rvv)
-+ ARCH_FIELD_EX_REUSE(detail::rvv<128>, rvv)
- ARCH_FIELD(wasm)
-
- #undef ARCH_FIELD
-diff --git a/test/test_arch.cpp b/test/test_arch.cpp
-index b42073358..f1f50d546 100644
---- a/test/test_arch.cpp
-+++ b/test/test_arch.cpp
-@@ -38,6 +38,16 @@ struct check_supported
- }
- };
-
-+struct check_cpu_has_intruction_set
-+{
-+ template <class Arch>
-+ void operator()(Arch arch) const
-+ {
-+ static_assert(std::is_same<decltype(xsimd::available_architectures().has(arch)), bool>::value,
-+ "cannot test instruction set availability on CPU");
-+ }
-+};
-+
- struct check_available
- {
- template <class Arch>
-@@ -71,6 +81,11 @@ TEST_CASE("[multi arch support]")
- xsimd::supported_architectures::for_each(check_supported {});
- }
-
-+ SUBCASE("xsimd::available_architectures::has")
-+ {
-+ xsimd::all_architectures::for_each(check_cpu_has_intruction_set {});
-+ }
-+
- SUBCASE("xsimd::default_arch::name")
- {
- constexpr char const* name = xsimd::default_arch::name();
-