From f45825bd990a00a61e4b158e397e72a15f9754a3 Mon Sep 17 00:00:00 2001 From: falkTX Date: Sun, 1 Jan 2023 01:23:35 +0000 Subject: [PATCH] Add custom vcvtnq_s32_f32 implementation for armv7 Signed-off-by: falkTX --- deps/Makefile | 9 ++++++++ include/simd-compat/pmmintrin.h | 40 ++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/deps/Makefile b/deps/Makefile index 4540c8e..f7d2cb8 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -337,6 +337,15 @@ SURGE_CXX_FLAGS += -msse -msse2 -mfpmath=sse endif endif +# same flags as applied to main build +SURGE_CXX_FLAGS += -DSIMDE_ACCURACY_PREFERENCE=0 +SURGE_CXX_FLAGS += -DSIMDE_ENABLE_NATIVE_ALIASES +SURGE_CXX_FLAGS += -DSIMDE_FAST_CONVERSION_RANGE +SURGE_CXX_FLAGS += -DSIMDE_FAST_MATH +SURGE_CXX_FLAGS += -DSIMDE_FAST_NANS +SURGE_CXX_FLAGS += -DSIMDE_FAST_ROUND_MODE +SURGE_CXX_FLAGS += -DSIMDE_FAST_ROUND_TIES + # possibly use fftw? # ifeq ($(shell $(PKG_CONFIG) --exists fftw3 fftw3f && echo true),true) # SURGE_CXX_FLAGS += -DJUCE_DSP_USE_STATIC_FFTW=1 diff --git a/include/simd-compat/pmmintrin.h b/include/simd-compat/pmmintrin.h index 3fd9afc..2e39a23 100644 --- a/include/simd-compat/pmmintrin.h +++ b/include/simd-compat/pmmintrin.h @@ -67,6 +67,45 @@ __m64 _mm_set1_pi16(short w) */ #else +// add missing calls, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95399 +# ifdef __arm__ +# include +// custom vcvtnq_s32_f32 implementation for armv7, based on _mm_cvtps_epi32 from sse2neon +/* + * sse2neon is freely redistributable under the MIT License. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +static __inline__ int32x4_t __attribute__((__always_inline__, __nodebug__)) +vcvtnq_s32_f32(const float32x4_t a) +{ + const uint32x4_t signmask = vdupq_n_u32(0x80000000); + const float32x4_t half = vbslq_f32(signmask, a, vdupq_n_f32(0.5f)); /* +/- 0.5 */ + const int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(a, half)); /* round to integer: [a + 0.5]*/ + const int32x4_t r_trunc = vcvtq_s32_f32(a); /* truncate to integer: [a] */ + const int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */ + const int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone), vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */ + const float32x4_t delta = vsubq_f32(a, vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */ + const uint32x4_t is_delta_half = vceqq_f32(delta, half); /* delta == +/- 0.5 */ + return vbslq_s32(is_delta_half, r_even, r_normal); +} +# endif # define SIMDE_ACCURACY_PREFERENCE 0 # define SIMDE_ENABLE_NATIVE_ALIASES # define SIMDE_FAST_CONVERSION_RANGE @@ -77,5 +116,4 @@ __m64 _mm_set1_pi16(short w) # include "../simde/simde/x86/sse.h" # include "../simde/simde/x86/sse2.h" # include "../simde/simde/x86/sse3.h" - #endif