1// Math overloads for simd -*- C++ -*-
2
3// Copyright (C) 2020-2021 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
26#define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
27
28#if __cplusplus >= 201703L
29
30#include <utility>
31#include <iomanip>
32
33_GLIBCXX_SIMD_BEGIN_NAMESPACE
34template <typename _Tp, typename _V>
35 using _Samesize = fixed_size_simd<_Tp, _V::size()>;
36
37// _Math_return_type {{{
38template <typename _DoubleR, typename _Tp, typename _Abi>
39 struct _Math_return_type;
40
41template <typename _DoubleR, typename _Tp, typename _Abi>
42 using _Math_return_type_t =
43 typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
44
45template <typename _Tp, typename _Abi>
46 struct _Math_return_type<double, _Tp, _Abi>
47 { using type = simd<_Tp, _Abi>; };
48
49template <typename _Tp, typename _Abi>
50 struct _Math_return_type<bool, _Tp, _Abi>
51 { using type = simd_mask<_Tp, _Abi>; };
52
53template <typename _DoubleR, typename _Tp, typename _Abi>
54 struct _Math_return_type
55 { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
56
57//}}}
58// _GLIBCXX_SIMD_MATH_CALL_ {{{
59#define _GLIBCXX_SIMD_MATH_CALL_(__name) \
60template <typename _Tp, typename _Abi, typename..., \
61 typename _R = _Math_return_type_t< \
62 decltype(std::__name(declval<double>())), _Tp, _Abi>> \
63 enable_if_t<is_floating_point_v<_Tp>, _R> \
64 __name(simd<_Tp, _Abi> __x) \
65 { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
66
67// }}}
68//_Extra_argument_type{{{
69template <typename _Up, typename _Tp, typename _Abi>
70 struct _Extra_argument_type;
71
72template <typename _Tp, typename _Abi>
73 struct _Extra_argument_type<_Tp*, _Tp, _Abi>
74 {
75 using type = simd<_Tp, _Abi>*;
76 static constexpr double* declval();
77 static constexpr bool __needs_temporary_scalar = true;
78
79 _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
80 { return &__data(*__x); }
81 };
82
83template <typename _Up, typename _Tp, typename _Abi>
84 struct _Extra_argument_type<_Up*, _Tp, _Abi>
85 {
86 static_assert(is_integral_v<_Up>);
87 using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
88 static constexpr _Up* declval();
89 static constexpr bool __needs_temporary_scalar = true;
90
91 _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
92 { return &__data(*__x); }
93 };
94
95template <typename _Tp, typename _Abi>
96 struct _Extra_argument_type<_Tp, _Tp, _Abi>
97 {
98 using type = simd<_Tp, _Abi>;
99 static constexpr double declval();
100 static constexpr bool __needs_temporary_scalar = false;
101
102 _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
103 _S_data(const type& __x)
104 { return __data(__x); }
105 };
106
107template <typename _Up, typename _Tp, typename _Abi>
108 struct _Extra_argument_type
109 {
110 static_assert(is_integral_v<_Up>);
111 using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
112 static constexpr _Up declval();
113 static constexpr bool __needs_temporary_scalar = false;
114
115 _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
116 _S_data(const type& __x)
117 { return __data(__x); }
118 };
119
120//}}}
121// _GLIBCXX_SIMD_MATH_CALL2_ {{{
122#define _GLIBCXX_SIMD_MATH_CALL2_(__name, arg2_) \
123template < \
124 typename _Tp, typename _Abi, typename..., \
125 typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
126 typename _R = _Math_return_type_t< \
127 decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \
128 enable_if_t<is_floating_point_v<_Tp>, _R> \
129 __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \
130 { \
131 return {__private_init, \
132 _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \
133 } \
134template <typename _Up, typename _Tp, typename _Abi> \
135 _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \
136 decltype(std::__name( \
137 declval<double>(), \
138 declval<enable_if_t< \
139 conjunction_v< \
140 is_same<arg2_, _Tp>, \
141 negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \
142 is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \
143 double>>())), \
144 _Tp, _Abi> \
145 __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \
146 { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
147
148// }}}
149// _GLIBCXX_SIMD_MATH_CALL3_ {{{
150#define _GLIBCXX_SIMD_MATH_CALL3_(__name, arg2_, arg3_) \
151template <typename _Tp, typename _Abi, typename..., \
152 typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
153 typename _Arg3 = _Extra_argument_type<arg3_, _Tp, _Abi>, \
154 typename _R = _Math_return_type_t< \
155 decltype(std::__name(declval<double>(), _Arg2::declval(), \
156 _Arg3::declval())), \
157 _Tp, _Abi>> \
158 enable_if_t<is_floating_point_v<_Tp>, _R> \
159 __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \
160 const typename _Arg3::type& __z) \
161 { \
162 return {__private_init, \
163 _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \
164 _Arg3::_S_data(__z))}; \
165 } \
166template < \
167 typename _T0, typename _T1, typename _T2, typename..., \
168 typename _U0 = __remove_cvref_t<_T0>, \
169 typename _U1 = __remove_cvref_t<_T1>, \
170 typename _U2 = __remove_cvref_t<_T2>, \
171 typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \
172 typename = enable_if_t<conjunction_v< \
173 is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \
174 is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \
175 negation<conjunction< \
176 is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \
177 _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \
178 declval<const _Simd&>(), \
179 declval<const _Simd&>())) \
180 __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \
181 { \
182 return __name(_Simd(static_cast<_T0&&>(__xx)), \
183 _Simd(static_cast<_T1&&>(__yy)), \
184 _Simd(static_cast<_T2&&>(__zz))); \
185 }
186
187// }}}
188// __cosSeries {{{
189template <typename _Abi>
190 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
191 __cosSeries(const simd<float, _Abi>& __x)
192 {
193 const simd<float, _Abi> __x2 = __x * __x;
194 simd<float, _Abi> __y;
195 __y = 0x1.ap-16f; // 1/8!
196 __y = __y * __x2 - 0x1.6c1p-10f; // -1/6!
197 __y = __y * __x2 + 0x1.555556p-5f; // 1/4!
198 return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
199 }
200
201template <typename _Abi>
202 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
203 __cosSeries(const simd<double, _Abi>& __x)
204 {
205 const simd<double, _Abi> __x2 = __x * __x;
206 simd<double, _Abi> __y;
207 __y = 0x1.AC00000000000p-45; // 1/16!
208 __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
209 __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12!
210 __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
211 __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8!
212 __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
213 __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4!
214 return (__y * __x2 - .5f) * __x2 + 1.f;
215 }
216
217// }}}
218// __sinSeries {{{
219template <typename _Abi>
220 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
221 __sinSeries(const simd<float, _Abi>& __x)
222 {
223 const simd<float, _Abi> __x2 = __x * __x;
224 simd<float, _Abi> __y;
225 __y = -0x1.9CC000p-13f; // -1/7!
226 __y = __y * __x2 + 0x1.111100p-7f; // 1/5!
227 __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
228 return __y * (__x2 * __x) + __x;
229 }
230
231template <typename _Abi>
232 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
233 __sinSeries(const simd<double, _Abi>& __x)
234 {
235 // __x = [0, 0.7854 = pi/4]
236 // __x² = [0, 0.6169 = pi²/8]
237 const simd<double, _Abi> __x2 = __x * __x;
238 simd<double, _Abi> __y;
239 __y = -0x1.ACF0000000000p-41; // -1/15!
240 __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13!
241 __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
242 __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9!
243 __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
244 __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5!
245 __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3!
246 return __y * (__x2 * __x) + __x;
247 }
248
249// }}}
250// __zero_low_bits {{{
251template <int _Bits, typename _Tp, typename _Abi>
252 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
253 __zero_low_bits(simd<_Tp, _Abi> __x)
254 {
255 const simd<_Tp, _Abi> __bitmask
256 = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
257 return {__private_init,
258 _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
259 }
260
261// }}}
262// __fold_input {{{
263
264/**@internal
265 * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
266 * quadrant 0: [-¼π, ¼π]
267 * quadrant 1: [ ¼π, ¾π]
268 * quadrant 2: [ ¾π, 1¼π]
269 * quadrant 3: [1¼π, 1¾π]
270 *
271 * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
272 * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
273 * ```
274 * y = trunc(x / ¼π);
275 * y += fmod(y, 2);
276 * ```
277 * This can be simplified by moving the (implicit) division by 2 into the
278 * truncation expression. The `+= fmod` effect can the be achieved by using
279 * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
280 * `2/Ï€ * x` is better (faster).
281 */
282template <typename _Tp, typename _Abi>
283 struct _Folded
284 {
285 simd<_Tp, _Abi> _M_x;
286 rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
287 };
288
289namespace __math_float {
290inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
291inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/Ï€
292inline constexpr float __pi_2_5bits0
293 = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
294inline constexpr float __pi_2_5bits0_rem
295 = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
296} // namespace __math_float
297namespace __math_double {
298inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
299inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/Ï€
300inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2
301} // namespace __math_double
302
303template <typename _Abi>
304 _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
305 __fold_input(const simd<float, _Abi>& __x)
306 {
307 using _V = simd<float, _Abi>;
308 using _IV = rebind_simd_t<int, _V>;
309 using namespace __math_float;
310 _Folded<float, _Abi> __r;
311 __r._M_x = abs(__x);
312#if 0
313 // zero most mantissa bits:
314 constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/Ï€
315 const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
316 // split π into 4 parts, the first three with 13 trailing zeros (to make the
317 // following multiplications precise):
318 constexpr float __pi0 = 0x1.920000p1f;
319 constexpr float __pi1 = 0x1.fb4000p-11f;
320 constexpr float __pi2 = 0x1.444000p-23f;
321 constexpr float __pi3 = 0x1.68c234p-38f;
322 __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
323#else
324 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
325 __r._M_quadrant = 0;
326 else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
327 {
328 const _V __y = nearbyint(__r._M_x * __2_over_pi);
329 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
330 __r._M_x -= __y * __pi_2_5bits0;
331 __r._M_x -= __y * __pi_2_5bits0_rem;
332 }
333 else
334 {
335 using __math_double::__2_over_pi;
336 using __math_double::__pi_2;
337 using _VD = rebind_simd_t<double, _V>;
338 _VD __xd = static_simd_cast<_VD>(__r._M_x);
339 _VD __y = nearbyint(__xd * __2_over_pi);
340 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
341 __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
342 }
343#endif
344 return __r;
345 }
346
347template <typename _Abi>
348 _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
349 __fold_input(const simd<double, _Abi>& __x)
350 {
351 using _V = simd<double, _Abi>;
352 using _IV = rebind_simd_t<int, _V>;
353 using namespace __math_double;
354
355 _Folded<double, _Abi> __r;
356 __r._M_x = abs(__x);
357 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
358 {
359 __r._M_quadrant = 0;
360 return __r;
361 }
362 const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
363 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
364
365 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
366 {
367 // x - y * pi/2, y uses no more than 11 mantissa bits
368 __r._M_x -= __y * 0x1.921FB54443000p0;
369 __r._M_x -= __y * -0x1.73DCB3B39A000p-43;
370 __r._M_x -= __y * 0x1.45C06E0E68948p-86;
371 }
372 else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
373 {
374 // x - y * pi/2, y uses no more than 29 mantissa bits
375 __r._M_x -= __y * 0x1.921FB40000000p0;
376 __r._M_x -= __y * 0x1.4442D00000000p-24;
377 __r._M_x -= __y * 0x1.8469898CC5170p-48;
378 }
379 else
380 {
381 // x - y * pi/2, y may require all mantissa bits
382 const _V __y_hi = __zero_low_bits<26>(__y);
383 const _V __y_lo = __y - __y_hi;
384 const auto __pi_2_1 = 0x1.921FB50000000p0;
385 const auto __pi_2_2 = 0x1.110B460000000p-26;
386 const auto __pi_2_3 = 0x1.1A62630000000p-54;
387 const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
388 __r._M_x = __r._M_x - __y_hi * __pi_2_1
389 - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
390 - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
391 - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
392 - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
393 - max(__y * __pi_2_4, __y_lo * __pi_2_3)
394 - min(__y * __pi_2_4, __y_lo * __pi_2_3);
395 }
396 return __r;
397 }
398
399// }}}
400// __extract_exponent_as_int {{{
401template <typename _Tp, typename _Abi>
402 rebind_simd_t<int, simd<_Tp, _Abi>>
403 __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
404 {
405 using _Vp = simd<_Tp, _Abi>;
406 using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
407 using namespace std::experimental::__float_bitwise_operators;
408 const _Vp __exponent_mask
409 = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
410 return static_simd_cast<rebind_simd_t<int, _Vp>>(
411 __bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
412 >> (__digits_v<_Tp> - 1));
413 }
414
415// }}}
416// __impl_or_fallback {{{
417template <typename ImplFun, typename FallbackFun, typename... _Args>
418 _GLIBCXX_SIMD_INTRINSIC auto
419 __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
420 _Args&&... __args)
421 -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
422 { return __impl_fun(static_cast<_Args&&>(__args)...); }
423
424template <typename ImplFun, typename FallbackFun, typename... _Args>
425 inline auto
426 __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
427 _Args&&... __args)
428 -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
429 { return __fallback_fun(static_cast<_Args&&>(__args)...); }
430
431template <typename... _Args>
432 _GLIBCXX_SIMD_INTRINSIC auto
433 __impl_or_fallback(_Args&&... __args)
434 {
435 return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
436 }
437//}}}
438
439// trigonometric functions {{{
440_GLIBCXX_SIMD_MATH_CALL_(acos)
441_GLIBCXX_SIMD_MATH_CALL_(asin)
442_GLIBCXX_SIMD_MATH_CALL_(atan)
443_GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
444
445/*
446 * algorithm for sine and cosine:
447 *
448 * The result can be calculated with sine or cosine depending on the π/4 section
449 * the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x²
450 *
451 * sine:
452 * Map -__x to __x and invert the output
453 * Extend precision of __x - n * π/4 by calculating
454 * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
455 *
456 * Calculate Taylor series with tuned coefficients.
457 * Fix sign.
458 */
459// cos{{{
460template <typename _Tp, typename _Abi>
461 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
462 cos(const simd<_Tp, _Abi>& __x)
463 {
464 using _V = simd<_Tp, _Abi>;
465 if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
466 return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
467 else
468 {
469 if constexpr (is_same_v<_Tp, float>)
470 if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
471 return static_simd_cast<_V>(
472 cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
473
474 const auto __f = __fold_input(__x);
475 // quadrant | effect
476 // 0 | cosSeries, +
477 // 1 | sinSeries, -
478 // 2 | cosSeries, -
479 // 3 | sinSeries, +
480 using namespace std::experimental::__float_bitwise_operators;
481 const _V __sign_flip
482 = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
483
484 const auto __need_cos = (__f._M_quadrant & 1) == 0;
485 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
486 return __sign_flip ^ __cosSeries(__f._M_x);
487 else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
488 return __sign_flip ^ __sinSeries(__f._M_x);
489 else // some_of(__need_cos)
490 {
491 _V __r = __sinSeries(__f._M_x);
492 where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
493 return __r ^ __sign_flip;
494 }
495 }
496 }
497
498template <typename _Tp>
499 _GLIBCXX_SIMD_ALWAYS_INLINE
500 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
501 cos(simd<_Tp, simd_abi::scalar> __x)
502 { return std::cos(__data(__x)); }
503
504//}}}
505// sin{{{
506template <typename _Tp, typename _Abi>
507 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
508 sin(const simd<_Tp, _Abi>& __x)
509 {
510 using _V = simd<_Tp, _Abi>;
511 if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
512 return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
513 else
514 {
515 if constexpr (is_same_v<_Tp, float>)
516 if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
517 return static_simd_cast<_V>(
518 sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
519
520 const auto __f = __fold_input(__x);
521 // quadrant | effect
522 // 0 | sinSeries
523 // 1 | cosSeries
524 // 2 | sinSeries, sign flip
525 // 3 | cosSeries, sign flip
526 using namespace std::experimental::__float_bitwise_operators;
527 const auto __sign_flip
528 = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
529
530 const auto __need_sin = (__f._M_quadrant & 1) == 0;
531 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
532 return __sign_flip ^ __sinSeries(__f._M_x);
533 else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
534 return __sign_flip ^ __cosSeries(__f._M_x);
535 else // some_of(__need_sin)
536 {
537 _V __r = __cosSeries(__f._M_x);
538 where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
539 return __sign_flip ^ __r;
540 }
541 }
542 }
543
544template <typename _Tp>
545 _GLIBCXX_SIMD_ALWAYS_INLINE
546 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
547 sin(simd<_Tp, simd_abi::scalar> __x)
548 { return std::sin(__data(__x)); }
549
550//}}}
551_GLIBCXX_SIMD_MATH_CALL_(tan)
552_GLIBCXX_SIMD_MATH_CALL_(acosh)
553_GLIBCXX_SIMD_MATH_CALL_(asinh)
554_GLIBCXX_SIMD_MATH_CALL_(atanh)
555_GLIBCXX_SIMD_MATH_CALL_(cosh)
556_GLIBCXX_SIMD_MATH_CALL_(sinh)
557_GLIBCXX_SIMD_MATH_CALL_(tanh)
558// }}}
559// exponential functions {{{
560_GLIBCXX_SIMD_MATH_CALL_(exp)
561_GLIBCXX_SIMD_MATH_CALL_(exp2)
562_GLIBCXX_SIMD_MATH_CALL_(expm1)
563
564// }}}
565// frexp {{{
566#if _GLIBCXX_SIMD_X86INTRIN
567template <typename _Tp, size_t _Np>
568 _SimdWrapper<_Tp, _Np>
569 __getexp(_SimdWrapper<_Tp, _Np> __x)
570 {
571 if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
572 return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
573 else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
574 return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
575 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
576 return _mm_getexp_pd(__x);
577 else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
578 return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
579 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
580 return _mm256_getexp_ps(__x);
581 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
582 return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
583 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
584 return _mm256_getexp_pd(__x);
585 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
586 return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
587 else if constexpr (__is_avx512_ps<_Tp, _Np>())
588 return _mm512_getexp_ps(__x);
589 else if constexpr (__is_avx512_pd<_Tp, _Np>())
590 return _mm512_getexp_pd(__x);
591 else
592 __assert_unreachable<_Tp>();
593 }
594
595template <typename _Tp, size_t _Np>
596 _SimdWrapper<_Tp, _Np>
597 __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
598 {
599 if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
600 return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
601 _MM_MANT_SIGN_src));
602 else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
603 return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
604 _MM_MANT_NORM_p5_1,
605 _MM_MANT_SIGN_src));
606 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
607 return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
608 else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
609 return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
610 _MM_MANT_SIGN_src));
611 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
612 return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
613 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
614 return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
615 _MM_MANT_SIGN_src));
616 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
617 return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
618 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
619 return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
620 _MM_MANT_SIGN_src));
621 else if constexpr (__is_avx512_ps<_Tp, _Np>())
622 return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
623 else if constexpr (__is_avx512_pd<_Tp, _Np>())
624 return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
625 else
626 __assert_unreachable<_Tp>();
627 }
628#endif // _GLIBCXX_SIMD_X86INTRIN
629
630/**
631 * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
632 *
633 * The return value will be in the range [0.5, 1.0[
634 * The @p __e value will be an integer defining the power-of-two exponent
635 */
636template <typename _Tp, typename _Abi>
637 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
638 frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
639 {
640 if constexpr (simd_size_v<_Tp, _Abi> == 1)
641 {
642 int __tmp;
643 const auto __r = std::frexp(__x[0], &__tmp);
644 (*__exp)[0] = __tmp;
645 return __r;
646 }
647 else if constexpr (__is_fixed_size_abi_v<_Abi>)
648 {
649 return {__private_init,
650 _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
651#if _GLIBCXX_SIMD_X86INTRIN
652 }
653 else if constexpr (__have_avx512f)
654 {
655 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
656 constexpr size_t _NI = _Np < 4 ? 4 : _Np;
657 const auto __v = __data(__x);
658 const auto __isnonzero
659 = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
660 const _SimdWrapper<int, _NI> __exp_plus1
661 = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
662 const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
663 _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
664 _SimdWrapper<int, _NI>(), __exp_plus1));
665 simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
666 return {__private_init,
667 _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
668 __isnonzero),
669 __v, __getmant_avx512(__v))};
670#endif // _GLIBCXX_SIMD_X86INTRIN
671 }
672 else
673 {
674 // fallback implementation
675 static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
676 using _V = simd<_Tp, _Abi>;
677 using _IV = rebind_simd_t<int, _V>;
678 using namespace std::experimental::__proposed;
679 using namespace std::experimental::__float_bitwise_operators;
680
681 constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
682 constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
683 constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
684 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
685 = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
686 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
687 = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
688
689 _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
690 const _IV __exponent_bits = __extract_exponent_as_int(__x);
691 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
692 {
693 *__exp
694 = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
695 return __mant;
696 }
697
698#if __FINITE_MATH_ONLY__
699 // at least one element of __x is 0 or subnormal, the rest is normal
700 // (inf and NaN are excluded by -ffinite-math-only)
701 const auto __iszero_inf_nan = __x == 0;
702#else
703 const auto __as_int
704 = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(abs(__x));
705 const auto __inf
706 = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(
707 _V(__infinity_v<_Tp>));
708 const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
709 __as_int == 0 || __as_int >= __inf);
710#endif
711
712 const _V __scaled_subnormal = __x * __subnorm_scale;
713 const _V __mant_subnormal
714 = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
715 where(!isnormal(__x), __mant) = __mant_subnormal;
716 where(__iszero_inf_nan, __mant) = __x;
717 _IV __e = __extract_exponent_as_int(__scaled_subnormal);
718 using _MaskType =
719 typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
720 _V, _IV>::mask_type;
721 const _MaskType __value_isnormal = isnormal(__x).__cvt();
722 where(__value_isnormal.__cvt(), __e) = __exponent_bits;
723 static_assert(sizeof(_IV) == sizeof(__value_isnormal));
724 const _IV __offset
725 = (__bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
726 | (__bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
727 & static_simd_cast<_MaskType>(__x != 0))
728 & _IV(__exp_adjust + __exp_offset));
729 *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
730 return __mant;
731 }
732 }
733
734// }}}
735_GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
736_GLIBCXX_SIMD_MATH_CALL_(ilogb)
737
738// logarithms {{{
739_GLIBCXX_SIMD_MATH_CALL_(log)
740_GLIBCXX_SIMD_MATH_CALL_(log10)
741_GLIBCXX_SIMD_MATH_CALL_(log1p)
742_GLIBCXX_SIMD_MATH_CALL_(log2)
743
744//}}}
745// logb{{{
746template <typename _Tp, typename _Abi>
747 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
748 logb(const simd<_Tp, _Abi>& __x)
749 {
750 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
751 if constexpr (_Np == 1)
752 return std::logb(__x[0]);
753 else if constexpr (__is_fixed_size_abi_v<_Abi>)
754 {
755 return {__private_init,
756 __data(__x)._M_apply_per_chunk([](auto __impl, auto __xx) {
757 using _V = typename decltype(__impl)::simd_type;
758 return __data(
759 std::experimental::logb(_V(__private_init, __xx)));
760 })};
761 }
762#if _GLIBCXX_SIMD_X86INTRIN // {{{
763 else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
764 return {__private_init,
765 __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
766 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
767 return {__private_init, _mm_getexp_pd(__data(__x))};
768 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
769 return {__private_init, _mm256_getexp_ps(__data(__x))};
770 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
771 return {__private_init, _mm256_getexp_pd(__data(__x))};
772 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
773 return {__private_init,
774 __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
775 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
776 return {__private_init,
777 __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
778 else if constexpr (__is_avx512_ps<_Tp, _Np>())
779 return {__private_init, _mm512_getexp_ps(__data(__x))};
780 else if constexpr (__is_avx512_pd<_Tp, _Np>())
781 return {__private_init, _mm512_getexp_pd(__data(__x))};
782#endif // _GLIBCXX_SIMD_X86INTRIN }}}
783 else
784 {
785 using _V = simd<_Tp, _Abi>;
786 using namespace std::experimental::__proposed;
787 auto __is_normal = isnormal(__x);
788
789 // work on abs(__x) to reflect the return value on Linux for negative
790 // inputs (domain-error => implementation-defined value is returned)
791 const _V abs_x = abs(__x);
792
793 // __exponent(__x) returns the exponent value (bias removed) as
794 // simd<_Up> with integral _Up
795 auto&& __exponent = [](const _V& __v) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
796 using namespace std::experimental::__proposed;
797 using _IV = rebind_simd_t<
798 conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
799 return (__bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
800 - (__max_exponent_v<_Tp> - 1);
801 };
802 _V __r = static_simd_cast<_V>(__exponent(abs_x));
803 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
804 // without corner cases (nan, inf, subnormal, zero) we have our
805 // answer:
806 return __r;
807 const auto __is_zero = __x == 0;
808 const auto __is_nan = isnan(__x);
809 const auto __is_inf = isinf(__x);
810 where(__is_zero, __r) = -__infinity_v<_Tp>;
811 where(__is_nan, __r) = __x;
812 where(__is_inf, __r) = __infinity_v<_Tp>;
813 __is_normal |= __is_zero || __is_nan || __is_inf;
814 if (all_of(__is_normal))
815 // at this point everything but subnormals is handled
816 return __r;
817 // subnormals repeat the exponent extraction after multiplication of the
818 // input with __a floating point value that has 112 (0x70) in its exponent
819 // (not too big for sp and large enough for dp)
820 const _V __scaled = abs_x * _Tp(0x1p112);
821 _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
822 where(__is_normal, __scaled_exp) = __r;
823 return __scaled_exp;
824 }
825 }
826
827//}}}
828template <typename _Tp, typename _Abi>
829 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
830 modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
831 {
832 if constexpr (__is_scalar_abi<_Abi>()
833 || (__is_fixed_size_abi_v<
834 _Abi> && simd_size_v<_Tp, _Abi> == 1))
835 {
836 _Tp __tmp;
837 _Tp __r = std::modf(__x[0], &__tmp);
838 __iptr[0] = __tmp;
839 return __r;
840 }
841 else
842 {
843 const auto __integral = trunc(__x);
844 *__iptr = __integral;
845 auto __r = __x - __integral;
846#if !__FINITE_MATH_ONLY__
847 where(isinf(__x), __r) = _Tp();
848#endif
849 return copysign(__r, __x);
850 }
851 }
852
853_GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
854_GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
855
856_GLIBCXX_SIMD_MATH_CALL_(cbrt)
857
858_GLIBCXX_SIMD_MATH_CALL_(abs)
859_GLIBCXX_SIMD_MATH_CALL_(fabs)
860
861// [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
862// allow signed integral _Tp
863template <typename _Tp, typename _Abi>
864 enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
865 abs(const simd<_Tp, _Abi>& __x)
866 { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
867
868template <typename _Tp, typename _Abi>
869 enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
870 fabs(const simd<_Tp, _Abi>& __x)
871 { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
872
873// the following are overloads for functions in <cstdlib> and not covered by
874// [parallel.simd.math]. I don't see much value in making them work, though
875/*
876template <typename _Abi> simd<long, _Abi> labs(const simd<long, _Abi> &__x)
877{ return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
878
879template <typename _Abi> simd<long long, _Abi> llabs(const simd<long long, _Abi>
880&__x)
881{ return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
882*/
883
884#define _GLIBCXX_SIMD_CVTING2(_NAME) \
885template <typename _Tp, typename _Abi> \
886 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
887 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
888 { \
889 return _NAME(__x, __y); \
890 } \
891 \
892template <typename _Tp, typename _Abi> \
893 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
894 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
895 { \
896 return _NAME(__x, __y); \
897 }
898
899#define _GLIBCXX_SIMD_CVTING3(_NAME) \
900template <typename _Tp, typename _Abi> \
901 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
902 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
903 const simd<_Tp, _Abi>& __z) \
904 { \
905 return _NAME(__x, __y, __z); \
906 } \
907 \
908template <typename _Tp, typename _Abi> \
909 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
910 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
911 const simd<_Tp, _Abi>& __z) \
912 { \
913 return _NAME(__x, __y, __z); \
914 } \
915 \
916template <typename _Tp, typename _Abi> \
917 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
918 const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \
919 const __type_identity_t<simd<_Tp, _Abi>>& __z) \
920 { \
921 return _NAME(__x, __y, __z); \
922 } \
923 \
924template <typename _Tp, typename _Abi> \
925 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
926 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
927 const __type_identity_t<simd<_Tp, _Abi>>& __z) \
928 { \
929 return _NAME(__x, __y, __z); \
930 } \
931 \
932template <typename _Tp, typename _Abi> \
933 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
934 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
935 const __type_identity_t<simd<_Tp, _Abi>>& __z) \
936 { \
937 return _NAME(__x, __y, __z); \
938 } \
939 \
940template <typename _Tp, typename _Abi> \
941 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
942 const __type_identity_t<simd<_Tp, _Abi>>& __x, \
943 const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
944 { \
945 return _NAME(__x, __y, __z); \
946 }
947
948template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
949 _GLIBCXX_SIMD_INTRINSIC _R
950 __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
951 const _Tps&... __args)
952 {
953 return {__private_init,
954 __data(__arg0)._M_apply_per_chunk(
955 [&](auto __impl, const auto&... __inner) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
956 using _V = typename decltype(__impl)::simd_type;
957 return __data(__apply(_V(__private_init, __inner)...));
958 },
959 __data(__args)...)};
960 }
961
962template <typename _VV>
963 __remove_cvref_t<_VV>
964 __hypot(_VV __x, _VV __y)
965 {
966 using _V = __remove_cvref_t<_VV>;
967 using _Tp = typename _V::value_type;
968 if constexpr (_V::size() == 1)
969 return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
970 else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
971 {
972 return __fixed_size_apply<_V>([](auto __a,
973 auto __b) { return hypot(__a, __b); },
974 __x, __y);
975 }
976 else
977 {
978 // A simple solution for _Tp == float would be to cast to double and
979 // simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
980 // dp. It still needs the Annex F fixups though and isn't faster on
981 // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
982 // AVX-512).
983 using namespace __float_bitwise_operators;
984 _V __absx = abs(__x); // no error
985 _V __absy = abs(__y); // no error
986 _V __hi = max(__absx, __absy); // no error
987 _V __lo = min(__absy, __absx); // no error
988
989 // round __hi down to the next power-of-2:
990 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
991
992#ifndef __FAST_MATH__
993 if constexpr (__have_neon && !__have_neon_a32)
994 { // With ARMv7 NEON, we have no subnormals and must use slightly
995 // different strategy
996 const _V __hi_exp = __hi & __inf;
997 _V __scale_back = __hi_exp;
998 // For large exponents (max & max/2) the inversion comes too close
999 // to subnormals. Subtract 3 from the exponent:
1000 where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
1001 // Invert and adjust for the off-by-one error of inversion via xor:
1002 const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
1003 const _V __h1 = __hi * __scale;
1004 const _V __l1 = __lo * __scale;
1005 _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
1006 // Fix up hypot(0, 0) to not be NaN:
1007 where(__hi == 0, __r) = 0;
1008 return __r;
1009 }
1010#endif
1011
1012#ifdef __FAST_MATH__
1013 // With fast-math, ignore precision of subnormals and inputs from
1014 // __finite_max_v/2 to __finite_max_v. This removes all
1015 // branching/masking.
1016 if constexpr (true)
1017#else
1018 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1019 && all_of(isnormal(__y))))
1020#endif
1021 {
1022 const _V __hi_exp = __hi & __inf;
1023 //((__hi + __hi) & __inf) ^ __inf almost works for computing
1024 //__scale,
1025 // except when (__hi + __hi) & __inf == __inf, in which case __scale
1026 // becomes 0 (should be min/2 instead) and thus loses the
1027 // information from __lo.
1028#ifdef __FAST_MATH__
1029 using _Ip = __int_for_sizeof_t<_Tp>;
1030 using _IV = rebind_simd_t<_Ip, _V>;
1031 const auto __as_int = __bit_cast<_IV>(__hi_exp);
1032 const _V __scale
1033 = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1034#else
1035 const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1036#endif
1037 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
1038 = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1039 const _V __h1 = (__hi & __mant_mask) | _V(1);
1040 const _V __l1 = __lo * __scale;
1041 return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1042 }
1043 else
1044 {
1045 // slower path to support subnormals
1046 // if __hi is subnormal, avoid scaling by inf & final mul by 0
1047 // (which yields NaN) by using min()
1048 _V __scale = _V(1 / __norm_min_v<_Tp>);
1049 // invert exponent w/o error and w/o using the slow divider unit:
1050 // xor inverts the exponent but off by 1. Multiplication with .5
1051 // adjusts for the discrepancy.
1052 where(__hi >= __norm_min_v<_Tp>, __scale)
1053 = ((__hi & __inf) ^ __inf) * _Tp(.5);
1054 // adjust final exponent for subnormal inputs
1055 _V __hi_exp = __norm_min_v<_Tp>;
1056 where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1057 = __hi & __inf; // no error
1058 _V __h1 = __hi * __scale; // no error
1059 _V __l1 = __lo * __scale; // no error
1060
1061 // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
1062 // this ensures no overflow in the argument to sqrt
1063 _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1064#ifdef __STDC_IEC_559__
1065 // fixup for Annex F requirements
1066 // the naive fixup goes like this:
1067 //
1068 // where(__l1 == 0, __r) = __hi;
1069 // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>;
1070 // where(isinf(__absx) || isinf(__absy), __r) = __inf;
1071 //
1072 // The fixup can be prepared in parallel with the sqrt, requiring a
1073 // single blend step after hi_exp * sqrt, reducing latency and
1074 // throughput:
1075 _V __fixup = __hi; // __lo == 0
1076 where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
1077 where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
1078 where(!(__lo == 0 || isunordered(__x, __y)
1079 || (isinf(__absx) || isinf(__absy))),
1080 __fixup)
1081 = __r;
1082 __r = __fixup;
1083#endif
1084 return __r;
1085 }
1086 }
1087 }
1088
1089template <typename _Tp, typename _Abi>
1090 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1091 hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1092 {
1093 return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1094 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1095 __y);
1096 }
1097
1098_GLIBCXX_SIMD_CVTING2(hypot)
1099
1100 template <typename _VV>
1101 __remove_cvref_t<_VV>
1102 __hypot(_VV __x, _VV __y, _VV __z)
1103 {
1104 using _V = __remove_cvref_t<_VV>;
1105 using _Abi = typename _V::abi_type;
1106 using _Tp = typename _V::value_type;
1107 /* FIXME: enable after PR77776 is resolved
1108 if constexpr (_V::size() == 1)
1109 return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
1110 else
1111 */
1112 if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
1113 {
1114 return __fixed_size_apply<simd<_Tp, _Abi>>(
1115 [](auto __a, auto __b, auto __c) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1116 return hypot(__a, __b, __c);
1117 }, __x, __y, __z);
1118 }
1119 else
1120 {
1121 using namespace __float_bitwise_operators;
1122 const _V __absx = abs(__x); // no error
1123 const _V __absy = abs(__y); // no error
1124 const _V __absz = abs(__z); // no error
1125 _V __hi = max(max(__absx, __absy), __absz); // no error
1126 _V __l0 = min(__absz, max(__absx, __absy)); // no error
1127 _V __l1 = min(__absy, __absx); // no error
1128 if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
1129 && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
1130 { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
1131 // NaN. In this case the bit-tricks don't work, they require IEC559
1132 // binary32 or binary64 format.
1133#ifdef __STDC_IEC_559__
1134 // fixup for Annex F requirements
1135 if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
1136 return __infinity_v<_Tp>;
1137 else if (isunordered(__absx[0], __absy[0] + __absz[0]))
1138 return __quiet_NaN_v<_Tp>;
1139 else if (__l0[0] == 0 && __l1[0] == 0)
1140 return __hi;
1141#endif
1142 _V __hi_exp = __hi;
1143 const _ULLong __tmp = 0x8000'0000'0000'0000ull;
1144 __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
1145 const _V __scale = 1 / __hi_exp;
1146 __hi *= __scale;
1147 __l0 *= __scale;
1148 __l1 *= __scale;
1149 return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
1150 }
1151 else
1152 {
1153 // round __hi down to the next power-of-2:
1154 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
1155
1156#ifndef __FAST_MATH__
1157 if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
1158 { // With ARMv7 NEON, we have no subnormals and must use slightly
1159 // different strategy
1160 const _V __hi_exp = __hi & __inf;
1161 _V __scale_back = __hi_exp;
1162 // For large exponents (max & max/2) the inversion comes too
1163 // close to subnormals. Subtract 3 from the exponent:
1164 where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
1165 // Invert and adjust for the off-by-one error of inversion via
1166 // xor:
1167 const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
1168 const _V __h1 = __hi * __scale;
1169 __l0 *= __scale;
1170 __l1 *= __scale;
1171 _V __lo = __l0 * __l0
1172 + __l1 * __l1; // add the two smaller values first
1173 asm("" : "+m"(__lo));
1174 _V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
1175 // Fix up hypot(0, 0, 0) to not be NaN:
1176 where(__hi == 0, __r) = 0;
1177 return __r;
1178 }
1179#endif
1180
1181#ifdef __FAST_MATH__
1182 // With fast-math, ignore precision of subnormals and inputs from
1183 // __finite_max_v/2 to __finite_max_v. This removes all
1184 // branching/masking.
1185 if constexpr (true)
1186#else
1187 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1188 && all_of(isnormal(__y))
1189 && all_of(isnormal(__z))))
1190#endif
1191 {
1192 const _V __hi_exp = __hi & __inf;
1193 //((__hi + __hi) & __inf) ^ __inf almost works for computing
1194 //__scale, except when (__hi + __hi) & __inf == __inf, in which
1195 // case __scale
1196 // becomes 0 (should be min/2 instead) and thus loses the
1197 // information from __lo.
1198#ifdef __FAST_MATH__
1199 using _Ip = __int_for_sizeof_t<_Tp>;
1200 using _IV = rebind_simd_t<_Ip, _V>;
1201 const auto __as_int = __bit_cast<_IV>(__hi_exp);
1202 const _V __scale
1203 = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1204#else
1205 const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1206#endif
1207 constexpr _Tp __mant_mask
1208 = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1209 const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
1210 __l0 *= __scale;
1211 __l1 *= __scale;
1212 const _V __lo
1213 = __l0 * __l0
1214 + __l1 * __l1; // add the two smaller values first
1215 return __hi_exp * sqrt(__lo + __h1 * __h1);
1216 }
1217 else
1218 {
1219 // slower path to support subnormals
1220 // if __hi is subnormal, avoid scaling by inf & final mul by 0
1221 // (which yields NaN) by using min()
1222 _V __scale = _V(1 / __norm_min_v<_Tp>);
1223 // invert exponent w/o error and w/o using the slow divider
1224 // unit: xor inverts the exponent but off by 1. Multiplication
1225 // with .5 adjusts for the discrepancy.
1226 where(__hi >= __norm_min_v<_Tp>, __scale)
1227 = ((__hi & __inf) ^ __inf) * _Tp(.5);
1228 // adjust final exponent for subnormal inputs
1229 _V __hi_exp = __norm_min_v<_Tp>;
1230 where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1231 = __hi & __inf; // no error
1232 _V __h1 = __hi * __scale; // no error
1233 __l0 *= __scale; // no error
1234 __l1 *= __scale; // no error
1235 _V __lo = __l0 * __l0
1236 + __l1 * __l1; // add the two smaller values first
1237 _V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
1238#ifdef __STDC_IEC_559__
1239 // fixup for Annex F requirements
1240 _V __fixup = __hi; // __lo == 0
1241 // where(__lo == 0, __fixup) = __hi;
1242 where(isunordered(__x, __y + __z), __fixup)
1243 = __quiet_NaN_v<_Tp>;
1244 where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
1245 = __inf;
1246 // Instead of __lo == 0, the following could depend on __h1² ==
1247 // __h1² + __lo (i.e. __hi is so much larger than the other two
1248 // inputs that the result is exactly __hi). While this may
1249 // improve precision, it is likely to reduce efficiency if the
1250 // ISA has FMAs (because __h1² + __lo is an FMA, but the
1251 // intermediate
1252 // __h1² must be kept)
1253 where(!(__lo == 0 || isunordered(__x, __y + __z)
1254 || isinf(__absx) || isinf(__absy) || isinf(__absz)),
1255 __fixup)
1256 = __r;
1257 __r = __fixup;
1258#endif
1259 return __r;
1260 }
1261 }
1262 }
1263 }
1264
1265 template <typename _Tp, typename _Abi>
1266 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1267 hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
1268 const simd<_Tp, _Abi>& __z)
1269 {
1270 return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1271 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1272 __y,
1273 __z);
1274 }
1275
1276_GLIBCXX_SIMD_CVTING3(hypot)
1277
1278_GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
1279
1280_GLIBCXX_SIMD_MATH_CALL_(sqrt)
1281_GLIBCXX_SIMD_MATH_CALL_(erf)
1282_GLIBCXX_SIMD_MATH_CALL_(erfc)
1283_GLIBCXX_SIMD_MATH_CALL_(lgamma)
1284_GLIBCXX_SIMD_MATH_CALL_(tgamma)
1285_GLIBCXX_SIMD_MATH_CALL_(ceil)
1286_GLIBCXX_SIMD_MATH_CALL_(floor)
1287_GLIBCXX_SIMD_MATH_CALL_(nearbyint)
1288_GLIBCXX_SIMD_MATH_CALL_(rint)
1289_GLIBCXX_SIMD_MATH_CALL_(lrint)
1290_GLIBCXX_SIMD_MATH_CALL_(llrint)
1291
1292_GLIBCXX_SIMD_MATH_CALL_(round)
1293_GLIBCXX_SIMD_MATH_CALL_(lround)
1294_GLIBCXX_SIMD_MATH_CALL_(llround)
1295
1296_GLIBCXX_SIMD_MATH_CALL_(trunc)
1297
1298_GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
1299_GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
1300_GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
1301
1302template <typename _Tp, typename _Abi>
1303 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1304 copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1305 {
1306 if constexpr (simd_size_v<_Tp, _Abi> == 1)
1307 return std::copysign(__x[0], __y[0]);
1308 else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == 12)
1309 // Remove this case once __bit_cast is implemented via __builtin_bit_cast.
1310 // It is necessary, because __signmask below cannot be computed at compile
1311 // time.
1312 return simd<_Tp, _Abi>(
1313 [&](auto __i) { return std::copysign(__x[__i], __y[__i]); });
1314 else
1315 {
1316 using _V = simd<_Tp, _Abi>;
1317 using namespace std::experimental::__float_bitwise_operators;
1318 _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
1319 return (__x & (__x ^ __signmask)) | (__y & __signmask);
1320 }
1321 }
1322
1323_GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
1324// not covered in [parallel.simd.math]:
1325// _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
1326_GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
1327_GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
1328_GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
1329
1330_GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
1331_GLIBCXX_SIMD_MATH_CALL_(fpclassify)
1332_GLIBCXX_SIMD_MATH_CALL_(isfinite)
1333
1334// isnan and isinf require special treatment because old glibc may declare
1335// `int isinf(double)`.
1336template <typename _Tp, typename _Abi, typename...,
1337 typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1338 enable_if_t<is_floating_point_v<_Tp>, _R>
1339 isinf(simd<_Tp, _Abi> __x)
1340 { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
1341
1342template <typename _Tp, typename _Abi, typename...,
1343 typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1344 enable_if_t<is_floating_point_v<_Tp>, _R>
1345 isnan(simd<_Tp, _Abi> __x)
1346 { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
1347
1348_GLIBCXX_SIMD_MATH_CALL_(isnormal)
1349
1350template <typename..., typename _Tp, typename _Abi>
1351 simd_mask<_Tp, _Abi>
1352 signbit(simd<_Tp, _Abi> __x)
1353 {
1354 if constexpr (is_integral_v<_Tp>)
1355 {
1356 if constexpr (is_unsigned_v<_Tp>)
1357 return simd_mask<_Tp, _Abi>{}; // false
1358 else
1359 return __x < 0;
1360 }
1361 else
1362 return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
1363 }
1364
1365_GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
1366_GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
1367_GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
1368_GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
1369_GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
1370_GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
1371
1372/* not covered in [parallel.simd.math]
1373template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
1374template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
1375template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
1376
1377template <typename _V> struct simd_div_t {
1378 _V quot, rem;
1379};
1380
1381template <typename _Abi>
1382simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
1383 _SCharv<_Abi> denom);
1384template <typename _Abi>
1385simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
1386 __shortv<_Abi> denom);
1387template <typename _Abi>
1388simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
1389template <typename _Abi>
1390simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
1391 __longv<_Abi> denom);
1392template <typename _Abi>
1393simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
1394 __llongv<_Abi> denom);
1395*/
1396
1397// special math {{{
1398template <typename _Tp, typename _Abi>
1399 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1400 assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1401 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1402 const simd<_Tp, _Abi>& __x)
1403 {
1404 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1405 return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
1406 });
1407 }
1408
1409template <typename _Tp, typename _Abi>
1410 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1411 assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1412 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1413 const simd<_Tp, _Abi>& __x)
1414 {
1415 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1416 return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
1417 });
1418 }
1419
1420_GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
1421_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
1422_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
1423_GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
1424_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
1425_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
1426_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
1427_GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
1428_GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
1429_GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
1430_GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
1431_GLIBCXX_SIMD_MATH_CALL_(expint)
1432
1433template <typename _Tp, typename _Abi>
1434 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1435 hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1436 const simd<_Tp, _Abi>& __x)
1437 {
1438 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1439 return std::hermite(__n[__i], __x[__i]);
1440 });
1441 }
1442
1443template <typename _Tp, typename _Abi>
1444 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1445 laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1446 const simd<_Tp, _Abi>& __x)
1447 {
1448 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1449 return std::laguerre(__n[__i], __x[__i]);
1450 });
1451 }
1452
1453template <typename _Tp, typename _Abi>
1454 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1455 legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1456 const simd<_Tp, _Abi>& __x)
1457 {
1458 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1459 return std::legendre(__n[__i], __x[__i]);
1460 });
1461 }
1462
1463_GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
1464
1465template <typename _Tp, typename _Abi>
1466 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1467 sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1468 const simd<_Tp, _Abi>& __x)
1469 {
1470 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1471 return std::sph_bessel(__n[__i], __x[__i]);
1472 });
1473 }
1474
1475template <typename _Tp, typename _Abi>
1476 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1477 sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
1478 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1479 const simd<_Tp, _Abi>& theta)
1480 {
1481 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1482 return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
1483 });
1484 }
1485
1486template <typename _Tp, typename _Abi>
1487 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1488 sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1489 const simd<_Tp, _Abi>& __x)
1490 {
1491 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1492 return std::sph_neumann(__n[__i], __x[__i]);
1493 });
1494 }
1495// }}}
1496
1497#undef _GLIBCXX_SIMD_MATH_CALL_
1498#undef _GLIBCXX_SIMD_MATH_CALL2_
1499#undef _GLIBCXX_SIMD_MATH_CALL3_
1500
1501_GLIBCXX_SIMD_END_NAMESPACE
1502
1503#endif // __cplusplus >= 201703L
1504#endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
1505
1506// vim: foldmethod=marker sw=2 ts=8 noet sts=2
1507

source code of include/c++/11/experimental/bits/simd_math.h