From 1f2eceb5dfcce9899649870caac59d62055e7b82 Mon Sep 17 00:00:00 2001 From: stefson Date: Tue, 7 Mar 2023 12:01:52 +0100 Subject: dev-cpp/highway: fix compile without neon optimization on armv7 Closes: https://bugs.gentoo.org/869077 Signed-off-by: Steffen Kuhn Closes: https://github.com/gentoo/gentoo/pull/29964 Signed-off-by: Sam James --- ...ile-for-armv7-targets-with-vfp4-and-lower.patch | 123 +++++++++++++++++++++ dev-cpp/highway/highway-1.0.1-r1.ebuild | 6 +- dev-cpp/highway/highway-1.0.3.ebuild | 4 + 3 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 dev-cpp/highway/files/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch (limited to 'dev-cpp') diff --git a/dev-cpp/highway/files/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch b/dev-cpp/highway/files/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch new file mode 100644 index 000000000000..ebf448cfbb24 --- /dev/null +++ b/dev-cpp/highway/files/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch @@ -0,0 +1,123 @@ +https://github.com/google/highway/commit/dc63f813c465f3bf95cb5b98f01aeed28b81173c +https://github.com/google/highway/pull/1143 + +https://github.com/google/highway/issues/834 +https://github.com/google/highway/issues/1032 + +https://bugs.gentoo.org/869077 + +From dc63f813c465f3bf95cb5b98f01aeed28b81173c Mon Sep 17 00:00:00 2001 +From: Julien Olivain +Date: Mon, 20 Feb 2023 23:22:28 +0100 +Subject: [PATCH] Fix compilation for armv7 targets with vfp < v4 and gcc >= 8 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When using a armv7 gcc >= 8 toolchain (like [1]) with Highway +configured with -DHWY_CMAKE_ARM7=OFF and HWY_ENABLE_CONTRIB=ON, +compilation fails with error: + + In file included from /build/highway-1.0.3/hwy/ops/arm_neon-inl.h:33, + from /build/highway-1.0.3/hwy/highway.h:358, + from /build/highway-1.0.3/hwy/contrib/sort/shared-inl.h:104, + from /build/highway-1.0.3/hwy/contrib/sort/traits128-inl.h:27, + from /build/highway-1.0.3/hwy/contrib/sort/vqsort_128d.cc:23, + from /build/highway-1.0.3/hwy/foreach_target.h:81, + from /build/highway-1.0.3/hwy/contrib/sort/vqsort_128d.cc:20: + /toolchain/lib/gcc/arm-buildroot-linux-gnueabihf/12.2.0/include/arm_neon.h: In function ‘void hwy::N_NEON::StoreU(Vec128, Full128, uint64_t*)’: + /toolchain/lib/gcc/arm-buildroot-linux-gnueabihf/12.2.0/include/arm_neon.h:11052:1: error: inlining failed in call to ‘always_inline’ ‘void vst1q_u64(uint64_t*, uint64x2_t)’: target specific option mismatch + 11052 | vst1q_u64 (uint64_t * __a, uint64x2_t __b) + | ^~~~~~~~~ + /build/highway-1.0.3/hwy/ops/arm_neon-inl.h:2786:12: note: called from here + 2786 | vst1q_u64(unaligned, v.raw); + | ~~~~~~~~~^~~~~~~~~~~~~~~~~~ + +The same errors happen when configured with HWY_ENABLE_EXAMPLES=ON, +or from client libraries like libjxl (at other places). + +The issue is that Highway Arm NEON ops have a dependency on the +Advanced SIMD (Neon) v2 and the VFPv4 floating-point instructions. +The SIMD (Neon) v1 and VFPv3 instructions are not supported. + +There was several attempts to fix variants of this issues. +See #834 and #1032. + +HWY_NEON target is selected only if __ARM_NEON is defined. See: +https://github.com/google/highway/blob/1.0.3/hwy/detect_targets.h#L251 + +This test is not sufficient since __ARM_NEON will be predefined in +any cases when Neon is enabled (neon-vfpv3, neon-vfpv4). + +The issue is that HWY_CMAKE_ARM7=ON implies VFPv4 / NEON SIMD v2. +When setting HWY_CMAKE_ARM7=OFF, "neon-vfpv4" will not be forced, +but the code is still using intrinsics assuming VFPv4. Gcc will fail +with error because code cannot be generated for the selected +architecture. + +This issue can be avoided by adding "-DHWY_DISABLED_TARGETS=HWY_NEON" in +CXXFLAGS. The problem with this solution is that every client program will +also need to do the same. This goes against the very purpose of +"hwy/detect_targets.h". + +Technically, Armv7-a processors with VFPv4 can be detected using some +ACLE (Arm C Language Extensions [2]) predefined macros: + +Basically, we want Highway to define HWY_NEON only when the target +supports SIMDv2/VFPv4 or higher. An older target with vfpv3 only +(e.g. Cortex-A8, A9, ...) would NOT define HWY_NEON, and therefore +would fallback on HWY_SCALAR implementation. + +However, not all compiler completely support ACLE. There is also +several versions too. So we cannot easily rely on macros like +"__ARM_VFPV4__" (which clang predefine, but not gcc). + +The alternative solution proposed in this patch, is to declare the +HWY_NEON target architecture as broken, when we detect the target is +Armv7-A, but mandatory features for vfpv4 (namely half-float, FMA) +are missing. Half-floats are tested using the macro __ARM_NEON_FP, +and the FMA with the macro __ARM_FEATURE_FMA. See ACLE [2]. The +intent of declaring the target as broken, rather than selecting +HWY_NEON only if vfpv4 features are detected is to remain a bit +conservative, since the detection is slithly inaccurate. + +For a given compiler/cflags, predefined macros for Arm/ACLE can be +reviewed with commands like: + + arm-linux-gnueabihf-gcc -mcpu=cortex-a9 -mfpu=neon-vfpv3 -Wp,-dM -E -c - < /dev/null | grep -Fi arm | sort + arm-linux-gnueabihf-gcc -mcpu=cortex-a7 -mfpu=neon-vfpv4 -Wp,-dM -E -c - < /dev/null | grep -Fi arm | sort + clang -target armv7a -mcpu=cortex-a9 -mfpu=neon-vfpv3 -mfloat-abi=hard -Wp,-dM -E -c - < /dev/null | grep -Fi arm | sort + clang -target armv7a -mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard -Wp,-dM -E -c - < /dev/null | grep -Fi arm | sort + +The different values of __ARM_NEON_FP can be seen, depending which +"-mfpu" is passed. Same for __ARM_FEATURE_FMA. + +[1] https://toolchains.bootlin.com/downloads/releases/toolchains/armv7-eabihf/tarballs/armv7-eabihf--glibc--bleeding-edge-2022.08-1.tar.bz2 +[2] https://github.com/ARM-software/acle/ + +Signed-off-by: Julien Olivain +--- + hwy/detect_targets.h | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hwy/detect_targets.h b/hwy/detect_targets.h +index 2beca95b..40ae7fe7 100644 +--- a/hwy/detect_targets.h ++++ b/hwy/detect_targets.h +@@ -154,6 +154,16 @@ + (defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN)) + #define HWY_BROKEN_TARGETS (HWY_NEON) + ++// armv7-a without a detected vfpv4 is not supported ++// (for example Cortex-A8, Cortex-A9) ++// vfpv4 always have neon half-float _and_ FMA. ++#elif HWY_ARCH_ARM_V7 && \ ++ (__ARM_ARCH_PROFILE == 'A') && \ ++ !defined(__ARM_VFPV4__) && \ ++ !((__ARM_NEON_FP & 0x2 /* half-float */) && \ ++ (__ARM_FEATURE_FMA == 1)) ++#define HWY_BROKEN_TARGETS (HWY_NEON) ++ + // SVE[2] require recent clang or gcc versions. + #elif (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \ + (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000) diff --git a/dev-cpp/highway/highway-1.0.1-r1.ebuild b/dev-cpp/highway/highway-1.0.1-r1.ebuild index e4a2c4b87d4b..b0a7900ce124 100644 --- a/dev-cpp/highway/highway-1.0.1-r1.ebuild +++ b/dev-cpp/highway/highway-1.0.1-r1.ebuild @@ -1,4 +1,4 @@ -# Copyright 2021-2022 Gentoo Authors +# Copyright 2021-2023 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 EAPI=8 @@ -24,6 +24,10 @@ DEPEND="test? ( dev-cpp/gtest[${MULTILIB_USEDEP}] )" RESTRICT="!test? ( test )" +PATCHES=( + "${FILESDIR}"/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch +) + multilib_src_configure() { local mycmakeargs=( -DHWY_CMAKE_ARM7=$(usex cpu_flags_arm_neon) diff --git a/dev-cpp/highway/highway-1.0.3.ebuild b/dev-cpp/highway/highway-1.0.3.ebuild index af752cf34a06..c4d4ed034ee9 100644 --- a/dev-cpp/highway/highway-1.0.3.ebuild +++ b/dev-cpp/highway/highway-1.0.3.ebuild @@ -24,6 +24,10 @@ DEPEND="test? ( dev-cpp/gtest[${MULTILIB_USEDEP}] )" RESTRICT="!test? ( test )" +PATCHES=( + "${FILESDIR}"/0001-fix-compile-for-armv7-targets-with-vfp4-and-lower.patch +) + multilib_src_configure() { local mycmakeargs=( -DHWY_CMAKE_ARM7=$(usex cpu_flags_arm_neon) -- cgit v1.2.3-65-gdbad