mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
libstdc++: add ARM SVE support to std::experimental::simd
libstdc++-v3/ChangeLog: * include/Makefile.am: Add simd_sve.h. * include/Makefile.in: Add simd_sve.h. * include/experimental/bits/simd.h: Add new SveAbi. * include/experimental/bits/simd_builtin.h: Use __no_sve_deduce_t to support existing Neon Abi. * include/experimental/bits/simd_converter.h: Convert sequentially when sve is available. * include/experimental/bits/simd_detail.h: Define sve specific macro. * include/experimental/bits/simd_math.h: Fallback frexp to execute sequntially when sve is available, to handle fixed_size_simd return type that always uses sve. * include/experimental/simd: Include bits/simd_sve.h. * testsuite/experimental/simd/tests/bits/main.h: Enable testing for sve128, sve256, sve512. * include/experimental/bits/simd_sve.h: New file. Signed-off-by: Srinivas Yadav Singanaboina <vasu.srinivasvasu.14@gmail.com>
This commit is contained in:
parent
0b02da5b99
commit
9ac3119fec
@ -835,6 +835,7 @@ experimental_bits_headers = \
|
||||
${experimental_bits_srcdir}/simd_neon.h \
|
||||
${experimental_bits_srcdir}/simd_ppc.h \
|
||||
${experimental_bits_srcdir}/simd_scalar.h \
|
||||
${experimental_bits_srcdir}/simd_sve.h \
|
||||
${experimental_bits_srcdir}/simd_x86.h \
|
||||
${experimental_bits_srcdir}/simd_x86_conversions.h \
|
||||
${experimental_bits_srcdir}/string_view.tcc \
|
||||
|
@ -1181,6 +1181,7 @@ experimental_bits_headers = \
|
||||
${experimental_bits_srcdir}/simd_neon.h \
|
||||
${experimental_bits_srcdir}/simd_ppc.h \
|
||||
${experimental_bits_srcdir}/simd_scalar.h \
|
||||
${experimental_bits_srcdir}/simd_sve.h \
|
||||
${experimental_bits_srcdir}/simd_x86.h \
|
||||
${experimental_bits_srcdir}/simd_x86_conversions.h \
|
||||
${experimental_bits_srcdir}/string_view.tcc \
|
||||
|
@ -39,12 +39,16 @@
|
||||
#include <functional>
|
||||
#include <iosfwd>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
|
||||
#if _GLIBCXX_SIMD_X86INTRIN
|
||||
#include <x86intrin.h>
|
||||
#elif _GLIBCXX_SIMD_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#if _GLIBCXX_SIMD_HAVE_SVE
|
||||
#include <arm_sve.h>
|
||||
#endif
|
||||
|
||||
/** @ingroup ts_simd
|
||||
* @{
|
||||
@ -83,6 +87,12 @@ using __m512d [[__gnu__::__vector_size__(64)]] = double;
|
||||
using __m512i [[__gnu__::__vector_size__(64)]] = long long;
|
||||
#endif
|
||||
|
||||
#if _GLIBCXX_SIMD_HAVE_SVE
|
||||
constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8;
|
||||
#else
|
||||
constexpr inline int __sve_vectorized_size_bytes = 0;
|
||||
#endif
|
||||
|
||||
namespace simd_abi {
|
||||
// simd_abi forward declarations {{{
|
||||
// implementation details:
|
||||
@ -108,6 +118,9 @@ template <int _UsedBytes>
|
||||
template <int _UsedBytes>
|
||||
struct _VecBltnBtmsk;
|
||||
|
||||
template <int _UsedBytes, int _TotalBytes = __sve_vectorized_size_bytes>
|
||||
struct _SveAbi;
|
||||
|
||||
template <typename _Tp, int _Np>
|
||||
using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
|
||||
|
||||
@ -123,6 +136,9 @@ template <int _UsedBytes = 64>
|
||||
template <int _UsedBytes = 16>
|
||||
using _Neon = _VecBuiltin<_UsedBytes>;
|
||||
|
||||
template <int _UsedBytes = __sve_vectorized_size_bytes>
|
||||
using _Sve = _SveAbi<_UsedBytes, __sve_vectorized_size_bytes>;
|
||||
|
||||
// implementation-defined:
|
||||
using __sse = _Sse<>;
|
||||
using __avx = _Avx<>;
|
||||
@ -130,6 +146,7 @@ using __avx512 = _Avx512<>;
|
||||
using __neon = _Neon<>;
|
||||
using __neon128 = _Neon<16>;
|
||||
using __neon64 = _Neon<8>;
|
||||
using __sve = _Sve<>;
|
||||
|
||||
// standard:
|
||||
template <typename _Tp, size_t _Np, typename...>
|
||||
@ -250,6 +267,9 @@ constexpr inline bool __support_neon_float =
|
||||
false;
|
||||
#endif
|
||||
|
||||
constexpr inline bool __have_sve = _GLIBCXX_SIMD_HAVE_SVE;
|
||||
constexpr inline bool __have_sve2 = _GLIBCXX_SIMD_HAVE_SVE2;
|
||||
|
||||
#ifdef _ARCH_PWR10
|
||||
constexpr inline bool __have_power10vec = true;
|
||||
#else
|
||||
@ -356,12 +376,14 @@ namespace __detail
|
||||
| (__have_avx512vnni << 27)
|
||||
| (__have_avx512vpopcntdq << 28)
|
||||
| (__have_avx512vp2intersect << 29);
|
||||
else if constexpr (__have_neon)
|
||||
else if constexpr (__have_neon || __have_sve)
|
||||
return __have_neon
|
||||
| (__have_neon_a32 << 1)
|
||||
| (__have_neon_a64 << 2)
|
||||
| (__have_neon_a64 << 2)
|
||||
| (__support_neon_float << 3);
|
||||
| (__support_neon_float << 3)
|
||||
| (__have_sve << 4)
|
||||
| (__have_sve2 << 5);
|
||||
else if constexpr (__have_power_vmx)
|
||||
return __have_power_vmx
|
||||
| (__have_power_vsx << 1)
|
||||
@ -733,6 +755,16 @@ template <typename _Abi>
|
||||
return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
|
||||
}
|
||||
|
||||
// }}}
|
||||
// __is_sve_abi {{{
|
||||
template <typename _Abi>
|
||||
constexpr bool
|
||||
__is_sve_abi()
|
||||
{
|
||||
constexpr auto _Bytes = __abi_bytes_v<_Abi>;
|
||||
return _Bytes <= __sve_vectorized_size_bytes && is_same_v<simd_abi::_Sve<_Bytes>, _Abi>;
|
||||
}
|
||||
|
||||
// }}}
|
||||
// __make_dependent_t {{{
|
||||
template <typename, typename _Up>
|
||||
@ -998,6 +1030,9 @@ template <typename _Tp>
|
||||
template <typename _Tp>
|
||||
using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
|
||||
|
||||
template <typename _Tp, size_t _Width>
|
||||
struct _SveSimdWrapper;
|
||||
|
||||
// }}}
|
||||
// __is_simd_wrapper {{{
|
||||
template <typename _Tp>
|
||||
@ -2830,7 +2865,8 @@ namespace simd_abi {
|
||||
// most of simd_abi is defined in simd_detail.h
|
||||
template <typename _Tp>
|
||||
inline constexpr int max_fixed_size
|
||||
= (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
|
||||
= ((__have_avx512bw && sizeof(_Tp) == 1)
|
||||
|| (__have_sve && __sve_vectorized_size_bytes/sizeof(_Tp) >= 64)) ? 64 : 32;
|
||||
|
||||
// compatible {{{
|
||||
#if defined __x86_64__ || defined __aarch64__
|
||||
@ -2858,6 +2894,8 @@ template <typename _Tp>
|
||||
constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
|
||||
if constexpr (__bytes == sizeof(_Tp))
|
||||
return static_cast<scalar*>(nullptr);
|
||||
else if constexpr (__have_sve)
|
||||
return static_cast<_SveAbi<__sve_vectorized_size_bytes>*>(nullptr);
|
||||
else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
|
||||
return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
|
||||
else
|
||||
@ -2951,6 +2989,9 @@ template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
|
||||
template <typename _Tp, size_t _Np, typename = void>
|
||||
struct __deduce_impl;
|
||||
|
||||
template <typename _Tp, size_t _Np, typename = void>
|
||||
struct __no_sve_deduce_impl;
|
||||
|
||||
namespace simd_abi {
|
||||
/**
|
||||
* @tparam _Tp The requested `value_type` for the elements.
|
||||
@ -2965,6 +3006,12 @@ template <typename _Tp, size_t _Np, typename...>
|
||||
|
||||
template <typename _Tp, size_t _Np, typename... _Abis>
|
||||
using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
|
||||
|
||||
template <typename _Tp, size_t _Np, typename...>
|
||||
struct __no_sve_deduce : __no_sve_deduce_impl<_Tp, _Np> {};
|
||||
|
||||
template <typename _Tp, size_t _Np, typename... _Abis>
|
||||
using __no_sve_deduce_t = typename __no_sve_deduce<_Tp, _Np, _Abis...>::type;
|
||||
} // namespace simd_abi
|
||||
|
||||
// }}}2
|
||||
@ -2974,13 +3021,27 @@ template <typename _Tp, typename _V, typename = void>
|
||||
|
||||
template <typename _Tp, typename _Up, typename _Abi>
|
||||
struct rebind_simd<_Tp, simd<_Up, _Abi>,
|
||||
void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
|
||||
{ using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
|
||||
void_t<std::conditional_t<!__is_sve_abi<_Abi>(),
|
||||
simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
|
||||
simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>>
|
||||
{
|
||||
using type = simd<_Tp, std::conditional_t<
|
||||
!__is_sve_abi<_Abi>(),
|
||||
simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
|
||||
simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>;
|
||||
};
|
||||
|
||||
template <typename _Tp, typename _Up, typename _Abi>
|
||||
struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
|
||||
void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
|
||||
{ using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
|
||||
void_t<std::conditional_t<!__is_sve_abi<_Abi>(),
|
||||
simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
|
||||
simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>>
|
||||
{
|
||||
using type = simd_mask<_Tp, std::conditional_t<
|
||||
!__is_sve_abi<_Abi>(),
|
||||
simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
|
||||
simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>;
|
||||
};
|
||||
|
||||
template <typename _Tp, typename _V>
|
||||
using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
|
||||
@ -3243,7 +3304,7 @@ template <typename _Tp, typename _Up, typename _Ap>
|
||||
else if constexpr (_Tp::size() == 1)
|
||||
return __x[0];
|
||||
else if constexpr (sizeof(_Tp) == sizeof(__x)
|
||||
&& !__is_fixed_size_abi_v<_Ap>)
|
||||
&& !__is_fixed_size_abi_v<_Ap> && !__is_sve_abi<_Ap>())
|
||||
return {__private_init,
|
||||
__vector_bitcast<typename _Tp::value_type, _Tp::size()>(
|
||||
_Ap::_S_masked(__data(__x))._M_data)};
|
||||
@ -4004,18 +4065,29 @@ template <typename _V, typename _Ap,
|
||||
split(const simd<typename _V::value_type, _Ap>& __x)
|
||||
{
|
||||
using _Tp = typename _V::value_type;
|
||||
|
||||
auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
|
||||
return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
|
||||
[&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
|
||||
return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
|
||||
{ return __x[__i * _V::size() + __j]; });
|
||||
});
|
||||
};
|
||||
|
||||
if constexpr (_Parts == 1)
|
||||
{
|
||||
return {simd_cast<_V>(__x)};
|
||||
}
|
||||
else if (__x._M_is_constprop())
|
||||
{
|
||||
return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
|
||||
[&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
|
||||
return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
|
||||
{ return __x[__i * _V::size() + __j]; });
|
||||
});
|
||||
return __gen_fallback();
|
||||
}
|
||||
#if _GLIBCXX_SIMD_HAVE_SVE
|
||||
else if constexpr(__is_sve_abi<_Ap>)
|
||||
{
|
||||
return __gen_fallback();
|
||||
}
|
||||
#endif
|
||||
else if constexpr (
|
||||
__is_fixed_size_abi_v<_Ap>
|
||||
&& (is_same_v<typename _V::abi_type, simd_abi::scalar>
|
||||
@ -4115,7 +4187,8 @@ template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
|
||||
constexpr size_t _N0 = _SL::template _S_at<0>();
|
||||
using _V = __deduced_simd<_Tp, _N0>;
|
||||
|
||||
if (__x._M_is_constprop())
|
||||
auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
|
||||
{
|
||||
return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
|
||||
[&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
|
||||
using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
|
||||
@ -4124,6 +4197,14 @@ template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
|
||||
return __x[__offset + __j];
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
if (__x._M_is_constprop())
|
||||
__gen_fallback();
|
||||
#if _GLIBCXX_SIMD_HAVE_SVE
|
||||
else if constexpr (__have_sve)
|
||||
__gen_fallback();
|
||||
#endif
|
||||
else if constexpr (_Np == _N0)
|
||||
{
|
||||
static_assert(sizeof...(_Sizes) == 1);
|
||||
@ -4510,8 +4591,11 @@ template <template <int> class _A0, template <int> class... _Rest>
|
||||
// 1. The ABI tag is valid for _Tp
|
||||
// 2. The storage overhead is no more than padding to fill the next
|
||||
// power-of-2 number of bytes
|
||||
if constexpr (_A0<_Bytes>::template _S_is_valid_v<
|
||||
_Tp> && __fullsize / 2 < _Np)
|
||||
if constexpr (_A0<_Bytes>::template _S_is_valid_v<_Tp>
|
||||
&& ((__is_sve_abi<_A0<_Bytes>>() && __have_sve
|
||||
&& (_Np <= __sve_vectorized_size_bytes/sizeof(_Tp)))
|
||||
|| (__fullsize / 2 < _Np))
|
||||
)
|
||||
return typename __decay_abi<_A0<_Bytes>>::type{};
|
||||
else
|
||||
{
|
||||
@ -4536,7 +4620,13 @@ template <template <int> class _A0, template <int> class... _Rest>
|
||||
// the following lists all native ABIs, which makes them accessible to
|
||||
// simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
|
||||
// matters: Whatever comes first has higher priority.
|
||||
using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
|
||||
using _AllNativeAbis = _AbiList<
|
||||
#if _GLIBCXX_SIMD_HAVE_SVE
|
||||
simd_abi::_SveAbi,
|
||||
#endif
|
||||
simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin, __scalar_abi_wrapper>;
|
||||
|
||||
using _NoSveAllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
|
||||
__scalar_abi_wrapper>;
|
||||
|
||||
// valid _SimdTraits specialization {{{1
|
||||
@ -4551,6 +4641,11 @@ template <typename _Tp, size_t _Np>
|
||||
_Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
|
||||
{ using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
|
||||
|
||||
template <typename _Tp, size_t _Np>
|
||||
struct __no_sve_deduce_impl<
|
||||
_Tp, _Np, enable_if_t<_NoSveAllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
|
||||
{ using type = _NoSveAllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
|
||||
|
||||
// fall back to fixed_size only if scalar and native ABIs don't match
|
||||
template <typename _Tp, size_t _Np, typename = void>
|
||||
struct __deduce_fixed_size_fallback {};
|
||||
@ -4563,6 +4658,12 @@ template <typename _Tp, size_t _Np>
|
||||
template <typename _Tp, size_t _Np, typename>
|
||||
struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
|
||||
|
||||
template <typename _Tp, size_t _Np, typename>
|
||||
struct __no_sve_deduce_impl
|
||||
: public __deduce_fixed_size_fallback<_Tp, _Np>
|
||||
{};
|
||||
|
||||
|
||||
//}}}1
|
||||
/// @endcond
|
||||
|
||||
|
@ -1614,7 +1614,7 @@ template <typename _Abi, typename>
|
||||
static_assert(_UW_size <= _TV_size);
|
||||
using _UW = _SimdWrapper<_Up, _UW_size>;
|
||||
using _UV = __vector_type_t<_Up, _UW_size>;
|
||||
using _UAbi = simd_abi::deduce_t<_Up, _UW_size>;
|
||||
using _UAbi = simd_abi::__no_sve_deduce_t<_Up, _UW_size>;
|
||||
if constexpr (_UW_size == _TV_size) // one convert+store
|
||||
{
|
||||
const _UW __converted = __convert<_UW>(__v);
|
||||
@ -1857,7 +1857,7 @@ template <typename _Abi, typename>
|
||||
else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>,
|
||||
plus<>>)
|
||||
{
|
||||
using _Ap = simd_abi::deduce_t<_Tp, __full_size>;
|
||||
using _Ap = simd_abi::__no_sve_deduce_t<_Tp, __full_size>;
|
||||
return _Ap::_SimdImpl::_S_reduce(
|
||||
simd<_Tp, _Ap>(__private_init,
|
||||
_Abi::_S_masked(__as_vector(__x))),
|
||||
@ -1866,7 +1866,7 @@ template <typename _Abi, typename>
|
||||
else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>,
|
||||
multiplies<>>)
|
||||
{
|
||||
using _Ap = simd_abi::deduce_t<_Tp, __full_size>;
|
||||
using _Ap = simd_abi::__no_sve_deduce_t<_Tp, __full_size>;
|
||||
using _TW = _SimdWrapper<_Tp, __full_size>;
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask_full
|
||||
= _Abi::template _S_implicit_mask<_Tp>().__as_full_vector();
|
||||
@ -1882,7 +1882,7 @@ template <typename _Abi, typename>
|
||||
}
|
||||
else if constexpr (_Np & 1)
|
||||
{
|
||||
using _Ap = simd_abi::deduce_t<_Tp, _Np - 1>;
|
||||
using _Ap = simd_abi::__no_sve_deduce_t<_Tp, _Np - 1>;
|
||||
return __binary_op(
|
||||
simd<_Tp, simd_abi::scalar>(_Ap::_SimdImpl::_S_reduce(
|
||||
simd<_Tp, _Ap>(
|
||||
@ -1936,7 +1936,7 @@ template <typename _Abi, typename>
|
||||
{
|
||||
static_assert(sizeof(__x) > __min_vector_size<_Tp>);
|
||||
static_assert((_Np & (_Np - 1)) == 0); // _Np must be a power of 2
|
||||
using _Ap = simd_abi::deduce_t<_Tp, _Np / 2>;
|
||||
using _Ap = simd_abi::__no_sve_deduce_t<_Tp, _Np / 2>;
|
||||
using _V = simd<_Tp, _Ap>;
|
||||
return _Ap::_SimdImpl::_S_reduce(
|
||||
__binary_op(_V(__private_init, __extract<0, 2>(__as_vector(__x))),
|
||||
@ -2376,83 +2376,95 @@ template <typename _Abi, typename>
|
||||
_GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t<int, _Np>
|
||||
_S_fpclassify(_SimdWrapper<_Tp, _Np> __x)
|
||||
{
|
||||
using _I = __int_for_sizeof_t<_Tp>;
|
||||
const auto __xn
|
||||
= __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x)));
|
||||
constexpr size_t _NI = sizeof(__xn) / sizeof(_I);
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __minn
|
||||
= __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>));
|
||||
if constexpr(__have_sve)
|
||||
{
|
||||
__fixed_size_storage_t<int, _Np> __r{};
|
||||
__execute_n_times<_Np>(
|
||||
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
|
||||
__r._M_set(__i, std::fpclassify(__x[__i]));
|
||||
});
|
||||
return __r;
|
||||
}
|
||||
else
|
||||
{
|
||||
using _I = __int_for_sizeof_t<_Tp>;
|
||||
const auto __xn
|
||||
= __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x)));
|
||||
constexpr size_t _NI = sizeof(__xn) / sizeof(_I);
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __minn
|
||||
= __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>));
|
||||
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal
|
||||
= __vector_broadcast<_NI, _I>(FP_NORMAL);
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal
|
||||
= __vector_broadcast<_NI, _I>(FP_NORMAL);
|
||||
#if !__FINITE_MATH_ONLY__
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
|
||||
= __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan
|
||||
= __vector_broadcast<_NI, _I>(FP_NAN);
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite
|
||||
= __vector_broadcast<_NI, _I>(FP_INFINITE);
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
|
||||
= __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan
|
||||
= __vector_broadcast<_NI, _I>(FP_NAN);
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite
|
||||
= __vector_broadcast<_NI, _I>(FP_INFINITE);
|
||||
#endif
|
||||
#ifndef __FAST_MATH__
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal
|
||||
= __vector_broadcast<_NI, _I>(FP_SUBNORMAL);
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal
|
||||
= __vector_broadcast<_NI, _I>(FP_SUBNORMAL);
|
||||
#endif
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero
|
||||
= __vector_broadcast<_NI, _I>(FP_ZERO);
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero
|
||||
= __vector_broadcast<_NI, _I>(FP_ZERO);
|
||||
|
||||
__vector_type_t<_I, _NI>
|
||||
__tmp = __xn < __minn
|
||||
#ifdef __FAST_MATH__
|
||||
? __fp_zero
|
||||
#else
|
||||
? (__xn == 0 ? __fp_zero : __fp_subnormal)
|
||||
#endif
|
||||
#if __FINITE_MATH_ONLY__
|
||||
: __fp_normal;
|
||||
#else
|
||||
: (__xn < __infn ? __fp_normal
|
||||
: (__xn == __infn ? __fp_infinite : __fp_nan));
|
||||
#endif
|
||||
__vector_type_t<_I, _NI>
|
||||
__tmp = __xn < __minn
|
||||
#ifdef __FAST_MATH__
|
||||
? __fp_zero
|
||||
#else
|
||||
? (__xn == 0 ? __fp_zero : __fp_subnormal)
|
||||
#endif
|
||||
#if __FINITE_MATH_ONLY__
|
||||
: __fp_normal;
|
||||
#else
|
||||
: (__xn < __infn ? __fp_normal
|
||||
: (__xn == __infn ? __fp_infinite : __fp_nan));
|
||||
#endif
|
||||
|
||||
if constexpr (sizeof(_I) == sizeof(int))
|
||||
{
|
||||
using _FixedInt = __fixed_size_storage_t<int, _Np>;
|
||||
const auto __as_int = __vector_bitcast<int, _Np>(__tmp);
|
||||
if constexpr (_FixedInt::_S_tuple_size == 1)
|
||||
return {__as_int};
|
||||
else if constexpr (_FixedInt::_S_tuple_size == 2
|
||||
&& is_same_v<
|
||||
typename _FixedInt::_SecondType::_FirstAbi,
|
||||
simd_abi::scalar>)
|
||||
return {__extract<0, 2>(__as_int), __as_int[_Np - 1]};
|
||||
else if constexpr (_FixedInt::_S_tuple_size == 2)
|
||||
return {__extract<0, 2>(__as_int),
|
||||
__auto_bitcast(__extract<1, 2>(__as_int))};
|
||||
if constexpr (sizeof(_I) == sizeof(int))
|
||||
{
|
||||
using _FixedInt = __fixed_size_storage_t<int, _Np>;
|
||||
const auto __as_int = __vector_bitcast<int, _Np>(__tmp);
|
||||
if constexpr (_FixedInt::_S_tuple_size == 1)
|
||||
return {__as_int};
|
||||
else if constexpr (_FixedInt::_S_tuple_size == 2
|
||||
&& is_same_v<
|
||||
typename _FixedInt::_SecondType::_FirstAbi,
|
||||
simd_abi::scalar>)
|
||||
return {__extract<0, 2>(__as_int), __as_int[_Np - 1]};
|
||||
else if constexpr (_FixedInt::_S_tuple_size == 2)
|
||||
return {__extract<0, 2>(__as_int),
|
||||
__auto_bitcast(__extract<1, 2>(__as_int))};
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
else if constexpr (_Np == 2 && sizeof(_I) == 8
|
||||
&& __fixed_size_storage_t<int, _Np>::_S_tuple_size == 2)
|
||||
{
|
||||
const auto __aslong = __vector_bitcast<_LLong>(__tmp);
|
||||
return {int(__aslong[0]), {int(__aslong[1])}};
|
||||
}
|
||||
#if _GLIBCXX_SIMD_X86INTRIN
|
||||
else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32
|
||||
&& __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
|
||||
return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)),
|
||||
__to_intrin(__hi128(__tmp)))};
|
||||
else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64
|
||||
&& __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
|
||||
return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))};
|
||||
#endif // _GLIBCXX_SIMD_X86INTRIN
|
||||
else if constexpr (__fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
|
||||
return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp),
|
||||
[](auto... __l) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
|
||||
return __make_wrapper<int>(__l...);
|
||||
})};
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
else if constexpr (_Np == 2 && sizeof(_I) == 8
|
||||
&& __fixed_size_storage_t<int, _Np>::_S_tuple_size == 2)
|
||||
{
|
||||
const auto __aslong = __vector_bitcast<_LLong>(__tmp);
|
||||
return {int(__aslong[0]), {int(__aslong[1])}};
|
||||
}
|
||||
#if _GLIBCXX_SIMD_X86INTRIN
|
||||
else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32
|
||||
&& __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
|
||||
return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)),
|
||||
__to_intrin(__hi128(__tmp)))};
|
||||
else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64
|
||||
&& __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
|
||||
return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))};
|
||||
#endif // _GLIBCXX_SIMD_X86INTRIN
|
||||
else if constexpr (__fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
|
||||
return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp),
|
||||
[](auto... __l) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
|
||||
return __make_wrapper<int>(__l...);
|
||||
})};
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
|
||||
// _S_increment & _S_decrement{{{2
|
||||
@ -2785,11 +2797,23 @@ template <typename _Abi, typename>
|
||||
return _R(_UAbi::_MaskImpl::_S_to_bits(__data(__x))._M_to_bits());
|
||||
}
|
||||
else
|
||||
return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
|
||||
_S_size<_Tp>>(
|
||||
__data(__x));
|
||||
}
|
||||
|
||||
{
|
||||
if constexpr(__is_sve_abi<_UAbi>())
|
||||
{
|
||||
simd_mask<_Tp> __r(false);
|
||||
constexpr size_t __min_size = std::min(__r.size(), __x.size());
|
||||
__execute_n_times<__min_size>(
|
||||
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
|
||||
__r[__i] = __x[__i];
|
||||
});
|
||||
return __data(__r);
|
||||
}
|
||||
else
|
||||
return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
|
||||
_S_size<_Tp>>(
|
||||
__data(__x));
|
||||
}
|
||||
}
|
||||
// }}}
|
||||
// _S_masked_load {{{2
|
||||
template <typename _Tp, size_t _Np>
|
||||
|
@ -28,6 +28,18 @@
|
||||
#if __cplusplus >= 201703L
|
||||
|
||||
_GLIBCXX_SIMD_BEGIN_NAMESPACE
|
||||
|
||||
template <typename _Arg, typename _Ret, typename _To, size_t _Np>
|
||||
_Ret __converter_fallback(_Arg __a)
|
||||
{
|
||||
_Ret __ret{};
|
||||
__execute_n_times<_Np>(
|
||||
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
|
||||
__ret._M_set(__i, static_cast<_To>(__a[__i]));
|
||||
});
|
||||
return __ret;
|
||||
}
|
||||
|
||||
// _SimdConverter scalar -> scalar {{{
|
||||
template <typename _From, typename _To>
|
||||
struct _SimdConverter<_From, simd_abi::scalar, _To, simd_abi::scalar,
|
||||
@ -56,14 +68,15 @@ template <typename _From, typename _To, typename _Abi>
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _SimdConverter "native 1" -> "native 2" {{{
|
||||
// _SimdConverter "native non-sve 1" -> "native non-sve 2" {{{
|
||||
template <typename _From, typename _To, typename _AFrom, typename _ATo>
|
||||
struct _SimdConverter<
|
||||
_From, _AFrom, _To, _ATo,
|
||||
enable_if_t<!disjunction_v<
|
||||
__is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>,
|
||||
is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>,
|
||||
conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>>>
|
||||
conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>
|
||||
&& !(__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>())>>
|
||||
{
|
||||
using _Arg = typename _AFrom::template __traits<_From>::_SimdMember;
|
||||
using _Ret = typename _ATo::template __traits<_To>::_SimdMember;
|
||||
@ -75,6 +88,26 @@ template <typename _From, typename _To, typename _AFrom, typename _ATo>
|
||||
{ return __vector_convert<_V>(__a, __more...); }
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _SimdConverter "native 1" -> "native 2" {{{
|
||||
template <typename _From, typename _To, typename _AFrom, typename _ATo>
|
||||
struct _SimdConverter<
|
||||
_From, _AFrom, _To, _ATo,
|
||||
enable_if_t<!disjunction_v<
|
||||
__is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>,
|
||||
is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>,
|
||||
conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>
|
||||
&& (__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>())
|
||||
>>
|
||||
{
|
||||
using _Arg = typename _AFrom::template __traits<_From>::_SimdMember;
|
||||
using _Ret = typename _ATo::template __traits<_To>::_SimdMember;
|
||||
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr _Ret
|
||||
operator()(_Arg __x) const noexcept
|
||||
{ return __converter_fallback<_Arg, _Ret, _To, simd_size_v<_To, _ATo>>(__x); }
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _SimdConverter scalar -> fixed_size<1> {{{1
|
||||
template <typename _From, typename _To>
|
||||
@ -111,6 +144,10 @@ template <typename _From, typename _To, int _Np>
|
||||
if constexpr (is_same_v<_From, _To>)
|
||||
return __x;
|
||||
|
||||
// fallback to sequential when sve is available
|
||||
else if constexpr (__have_sve)
|
||||
return __converter_fallback<_Arg, _Ret, _To, _Np>(__x);
|
||||
|
||||
// special case (optimize) int signedness casts
|
||||
else if constexpr (sizeof(_From) == sizeof(_To)
|
||||
&& is_integral_v<_From> && is_integral_v<_To>)
|
||||
@ -275,11 +312,14 @@ template <typename _From, typename _Ap, typename _To, int _Np>
|
||||
"_SimdConverter to fixed_size only works for equal element counts");
|
||||
|
||||
using _Ret = __fixed_size_storage_t<_To, _Np>;
|
||||
using _Arg = typename _SimdTraits<_From, _Ap>::_SimdMember;
|
||||
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr _Ret
|
||||
operator()(typename _SimdTraits<_From, _Ap>::_SimdMember __x) const noexcept
|
||||
operator()(_Arg __x) const noexcept
|
||||
{
|
||||
if constexpr (_Ret::_S_tuple_size == 1)
|
||||
if constexpr (__have_sve)
|
||||
return __converter_fallback<_Arg, _Ret, _To, _Np>(__x);
|
||||
else if constexpr (_Ret::_S_tuple_size == 1)
|
||||
return {__vector_convert<typename _Ret::_FirstType::_BuiltinType>(__x)};
|
||||
else
|
||||
{
|
||||
@ -316,12 +356,15 @@ template <typename _From, int _Np, typename _To, typename _Ap>
|
||||
"_SimdConverter to fixed_size only works for equal element counts");
|
||||
|
||||
using _Arg = __fixed_size_storage_t<_From, _Np>;
|
||||
using _Ret = typename _SimdTraits<_To, _Ap>::_SimdMember;
|
||||
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr
|
||||
typename _SimdTraits<_To, _Ap>::_SimdMember
|
||||
operator()(const _Arg& __x) const noexcept
|
||||
_Ret
|
||||
operator()(const _Arg& __x) const noexcept
|
||||
{
|
||||
if constexpr (_Arg::_S_tuple_size == 1)
|
||||
if constexpr(__have_sve)
|
||||
return __converter_fallback<_Arg, _Ret, _To, _Np>(__x);
|
||||
else if constexpr (_Arg::_S_tuple_size == 1)
|
||||
return __vector_convert<__vector_type_t<_To, _Np>>(__x.first);
|
||||
else if constexpr (_Arg::_S_is_homogeneous)
|
||||
return __call_with_n_evaluations<_Arg::_S_tuple_size>(
|
||||
|
@ -61,6 +61,16 @@
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_NEON_A64 0
|
||||
#endif
|
||||
#if (__ARM_FEATURE_SVE_BITS > 0 && __ARM_FEATURE_SVE_VECTOR_OPERATORS==1)
|
||||
#define _GLIBCXX_SIMD_HAVE_SVE 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_SVE 0
|
||||
#endif
|
||||
#ifdef __ARM_FEATURE_SVE2
|
||||
#define _GLIBCXX_SIMD_HAVE_SVE2 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_SVE2 0
|
||||
#endif
|
||||
//}}}
|
||||
// x86{{{
|
||||
#ifdef __MMX__
|
||||
@ -267,7 +277,7 @@
|
||||
#define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
|
||||
#define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
|
||||
|
||||
#if __STRICT_ANSI__ || defined __clang__
|
||||
#if _GLIBCXX_SIMD_HAVE_SVE || __STRICT_ANSI__ || defined __clang__
|
||||
#define _GLIBCXX_SIMD_CONSTEXPR
|
||||
#define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
|
||||
#else
|
||||
|
@ -652,6 +652,18 @@ template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
|
||||
(*__exp)[0] = __tmp;
|
||||
return __r;
|
||||
}
|
||||
else if constexpr (__is_sve_abi<_Abi>())
|
||||
{
|
||||
simd<_Tp, _Abi> __r;
|
||||
__execute_n_times<simd_size_v<_Tp, _Abi>>(
|
||||
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
|
||||
int __tmp;
|
||||
const auto __ri = std::frexp(__x[__i], &__tmp);
|
||||
(*__exp)[__i] = __tmp;
|
||||
__r[__i] = __ri;
|
||||
});
|
||||
return __r;
|
||||
}
|
||||
else if constexpr (__is_fixed_size_abi_v<_Abi>)
|
||||
return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
|
||||
#if _GLIBCXX_SIMD_X86INTRIN
|
||||
@ -1135,7 +1147,8 @@ _GLIBCXX_SIMD_CVTING2(hypot)
|
||||
_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
|
||||
|
||||
#ifndef __FAST_MATH__
|
||||
if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
|
||||
if constexpr (_V::size() > 1
|
||||
&& __is_neon_abi<_Abi>() && __have_neon && !__have_neon_a32)
|
||||
{ // With ARMv7 NEON, we have no subnormals and must use slightly
|
||||
// different strategy
|
||||
const _V __hi_exp = __hi & __inf;
|
||||
|
1852
libstdc++-v3/include/experimental/bits/simd_sve.h
Normal file
1852
libstdc++-v3/include/experimental/bits/simd_sve.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -80,6 +80,9 @@
|
||||
#include "bits/simd_x86.h"
|
||||
#elif _GLIBCXX_SIMD_HAVE_NEON
|
||||
#include "bits/simd_neon.h"
|
||||
#if _GLIBCXX_SIMD_HAVE_SVE
|
||||
#include "bits/simd_sve.h"
|
||||
#endif
|
||||
#elif __ALTIVEC__
|
||||
#include "bits/simd_ppc.h"
|
||||
#endif
|
||||
|
@ -29,6 +29,9 @@ template <class T>
|
||||
invoke_test<simd<T, simd_abi::scalar>>(int());
|
||||
invoke_test<simd<T, simd_abi::_VecBuiltin<16>>>(int());
|
||||
invoke_test<simd<T, simd_abi::_VecBltnBtmsk<64>>>(int());
|
||||
invoke_test<simd<T, simd_abi::_SveAbi<16>>>(int());
|
||||
invoke_test<simd<T, simd_abi::_SveAbi<32>>>(int());
|
||||
invoke_test<simd<T, simd_abi::_SveAbi<64>>>(int());
|
||||
#elif EXTENDEDTESTS == 0
|
||||
invoke_test<simd<T, simd_abi::_VecBuiltin<8>>>(int());
|
||||
invoke_test<simd<T, simd_abi::_VecBuiltin<12>>>(int());
|
||||
|
Loading…
Reference in New Issue
Block a user