simd_math.h source code [include/c++/11/experimental/bits/simd_math.h]

1	// Math overloads for simd -- C++ --
2
3	// Copyright (C) 2020-2021 Free Software Foundation, Inc.
4	//
5	// This file is part of the GNU ISO C++ Library. This library is free
6	// software; you can redistribute it and/or modify it under the
7	// terms of the GNU General Public License as published by the
8	// Free Software Foundation; either version 3, or (at your option)
9	// any later version.
10
11	// This library is distributed in the hope that it will be useful,
12	// but WITHOUT ANY WARRANTY; without even the implied warranty of
13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	// GNU General Public License for more details.
15
16	// Under Section 7 of GPL version 3, you are granted additional
17	// permissions described in the GCC Runtime Library Exception, version
18	// 3.1, as published by the Free Software Foundation.
19
20	// You should have received a copy of the GNU General Public License and
21	// a copy of the GCC Runtime Library Exception along with this program;
22	// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23	// <http://www.gnu.org/licenses/>.
24
25	#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
26	#define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
27
28	#if __cplusplus >= 201703L
29
30	#include <utility>
31	#include <iomanip>
32
33	_GLIBCXX_SIMD_BEGIN_NAMESPACE
34	template <typename _Tp, typename _V>
35	using _Samesize = fixed_size_simd<_Tp, _V::size()>;
36
37	// _Math_return_type {{{
38	template <typename _DoubleR, typename _Tp, typename _Abi>
39	struct _Math_return_type;
40
41	template <typename _DoubleR, typename _Tp, typename _Abi>
42	using _Math_return_type_t =
43	typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
44
45	template <typename _Tp, typename _Abi>
46	struct _Math_return_type<double, _Tp, _Abi>
47	{ using type = simd<_Tp, _Abi>; };
48
49	template <typename _Tp, typename _Abi>
50	struct _Math_return_type<bool, _Tp, _Abi>
51	{ using type = simd_mask<_Tp, _Abi>; };
52
53	template <typename _DoubleR, typename _Tp, typename _Abi>
54	struct _Math_return_type
55	{ using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
56
57	//}}}
58	// _GLIBCXX_SIMD_MATH_CALL_ {{{
59	#define _GLIBCXX_SIMD_MATH_CALL_(__name) \
60	template <typename _Tp, typename _Abi, typename..., \
61	typename _R = _Math_return_type_t< \
62	decltype(std::__name(declval<double>())), _Tp, _Abi>> \
63	enable_if_t<is_floating_point_v<_Tp>, _R> \
64	__name(simd<_Tp, _Abi> __x) \
65	{ return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
66
67	// }}}
68	//_Extra_argument_type{{{
69	template <typename _Up, typename _Tp, typename _Abi>
70	struct _Extra_argument_type;
71
72	template <typename _Tp, typename _Abi>
73	struct _Extra_argument_type<_Tp*, _Tp, _Abi>
74	{
75	using type = simd<_Tp, _Abi>*;
76	static constexpr double* declval();
77	static constexpr bool __needs_temporary_scalar = true;
78
79	_GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
80	{ return &__data(*__x); }
81	};
82
83	template <typename _Up, typename _Tp, typename _Abi>
84	struct _Extra_argument_type<_Up*, _Tp, _Abi>
85	{
86	static_assert(is_integral_v<_Up>);
87	using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
88	static constexpr _Up* declval();
89	static constexpr bool __needs_temporary_scalar = true;
90
91	_GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
92	{ return &__data(*__x); }
93	};
94
95	template <typename _Tp, typename _Abi>
96	struct _Extra_argument_type<_Tp, _Tp, _Abi>
97	{
98	using type = simd<_Tp, _Abi>;
99	static constexpr double declval();
100	static constexpr bool __needs_temporary_scalar = false;
101
102	_GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
103	_S_data(const type& __x)
104	{ return __data(__x); }
105	};
106
107	template <typename _Up, typename _Tp, typename _Abi>
108	struct _Extra_argument_type
109	{
110	static_assert(is_integral_v<_Up>);
111	using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
112	static constexpr _Up declval();
113	static constexpr bool __needs_temporary_scalar = false;
114
115	_GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
116	_S_data(const type& __x)
117	{ return __data(__x); }
118	};
119
120	//}}}
121	// _GLIBCXX_SIMD_MATH_CALL2_ {{{
122	#define _GLIBCXX_SIMD_MATH_CALL2_(__name, arg2_) \
123	template < \
124	typename _Tp, typename _Abi, typename..., \
125	typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
126	typename _R = _Math_return_type_t< \
127	decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \
128	enable_if_t<is_floating_point_v<_Tp>, _R> \
129	__name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \
130	{ \
131	return {__private_init, \
132	_Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \
133	} \
134	template <typename _Up, typename _Tp, typename _Abi> \
135	_GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \
136	decltype(std::__name( \
137	declval<double>(), \
138	declval<enable_if_t< \
139	conjunction_v< \
140	is_same<arg2_, _Tp>, \
141	negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \
142	is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \
143	double>>())), \
144	_Tp, _Abi> \
145	__name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \
146	{ return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
147
148	// }}}
149	// _GLIBCXX_SIMD_MATH_CALL3_ {{{
150	#define _GLIBCXX_SIMD_MATH_CALL3_(__name, arg2_, arg3_) \
151	template <typename _Tp, typename _Abi, typename..., \
152	typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
153	typename _Arg3 = _Extra_argument_type<arg3_, _Tp, _Abi>, \
154	typename _R = _Math_return_type_t< \
155	decltype(std::__name(declval<double>(), _Arg2::declval(), \
156	_Arg3::declval())), \
157	_Tp, _Abi>> \
158	enable_if_t<is_floating_point_v<_Tp>, _R> \
159	__name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \
160	const typename _Arg3::type& __z) \
161	{ \
162	return {__private_init, \
163	_Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \
164	_Arg3::_S_data(__z))}; \
165	} \
166	template < \
167	typename _T0, typename _T1, typename _T2, typename..., \
168	typename _U0 = __remove_cvref_t<_T0>, \
169	typename _U1 = __remove_cvref_t<_T1>, \
170	typename _U2 = __remove_cvref_t<_T2>, \
171	typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \
172	typename = enable_if_t<conjunction_v< \
173	is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \
174	is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \
175	negation<conjunction< \
176	is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \
177	_GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \
178	declval<const _Simd&>(), \
179	declval<const _Simd&>())) \
180	__name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \
181	{ \
182	return __name(_Simd(static_cast<_T0&&>(__xx)), \
183	_Simd(static_cast<_T1&&>(__yy)), \
184	_Simd(static_cast<_T2&&>(__zz))); \
185	}
186
187	// }}}
188	// __cosSeries {{{
189	template <typename _Abi>
190	_GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
191	__cosSeries(const simd<float, _Abi>& __x)
192	{
193	const simd<float, _Abi> __x2 = __x * __x;
194	simd<float, _Abi> __y;
195	__y = `0x1.ap-16f`; // 1/8!
196	__y = __y * __x2 - `0x1.6c1p-10f`; // -1/6!
197	__y = __y * __x2 + `0x1.555556p-5f`; // 1/4!
198	return __y * (__x2 * __x2) - `.5f` * __x2 + `1.f`;
199	}
200
201	template <typename _Abi>
202	_GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
203	__cosSeries(const simd<double, _Abi>& __x)
204	{
205	const simd<double, _Abi> __x2 = __x * __x;
206	simd<double, _Abi> __y;
207	__y = `0x1.AC00000000000p-45`; // 1/16!
208	__y = __y * __x2 - `0x1.9394000000000p-37`; // -1/14!
209	__y = __y * __x2 + `0x1.1EED8C0000000p-29`; // 1/12!
210	__y = __y * __x2 - `0x1.27E4FB7400000p-22`; // -1/10!
211	__y = __y * __x2 + `0x1.A01A01A018000p-16`; // 1/8!
212	__y = __y * __x2 - `0x1.6C16C16C16C00p-10`; // -1/6!
213	__y = __y * __x2 + `0x1.5555555555554p-5`; // 1/4!
214	return (__y * __x2 - `.5f`) * __x2 + `1.f`;
215	}
216
217	// }}}
218	// __sinSeries {{{
219	template <typename _Abi>
220	_GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
221	__sinSeries(const simd<float, _Abi>& __x)
222	{
223	const simd<float, _Abi> __x2 = __x * __x;
224	simd<float, _Abi> __y;
225	__y = -`0x1.9CC000p-13f`; // -1/7!
226	__y = __y * __x2 + `0x1.111100p-7f`; // 1/5!
227	__y = __y * __x2 - `0x1.555556p-3f`; // -1/3!
228	return __y * (__x2 * __x) + __x;
229	}
230
231	template <typename _Abi>
232	_GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
233	__sinSeries(const simd<double, _Abi>& __x)
234	{
235	// __x = [0, 0.7854 = pi/4]
236	// __x² = [0, 0.6169 = pi²/8]
237	const simd<double, _Abi> __x2 = __x * __x;
238	simd<double, _Abi> __y;
239	__y = -`0x1.ACF0000000000p-41`; // -1/15!
240	__y = __y * __x2 + `0x1.6124400000000p-33`; // 1/13!
241	__y = __y * __x2 - `0x1.AE64567000000p-26`; // -1/11!
242	__y = __y * __x2 + `0x1.71DE3A5540000p-19`; // 1/9!
243	__y = __y * __x2 - `0x1.A01A01A01A000p-13`; // -1/7!
244	__y = __y * __x2 + `0x1.1111111111110p-7`; // 1/5!
245	__y = __y * __x2 - `0x1.5555555555555p-3`; // -1/3!
246	return __y * (__x2 * __x) + __x;
247	}
248
249	// }}}
250	// __zero_low_bits {{{
251	template <int _Bits, typename _Tp, typename _Abi>
252	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
253	__zero_low_bits(simd<_Tp, _Abi> __x)
254	{
255	const simd<_Tp, _Abi> __bitmask
256	= __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
257	return {__private_init,
258	_Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
259	}
260
261	// }}}
262	// __fold_input {{{
263
264	/@internal
265	* Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
266	* quadrant 0: [-¼π, ¼π]
267	* quadrant 1: [ ¼π, ¾π]
268	* quadrant 2: [ ¾π, 1¼π]
269	* quadrant 3: [1¼π, 1¾π]
270	*
271	* The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
272	* a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
273	* ```
274	* y = trunc(x / ¼π);
275	* y += fmod(y, 2);
276	* ```
277	* This can be simplified by moving the (implicit) division by 2 into the
278	* truncation expression. The `+= fmod` effect can the be achieved by using
279	* rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
280	* `2/π * x` is better (faster).
281	*/
282	template <typename _Tp, typename _Abi>
283	struct _Folded
284	{
285	simd<_Tp, _Abi> _M_x;
286	rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
287	};
288
289	namespace __math_float {
290	inline constexpr float __pi_over_4 = `0x1.921FB6p-1f`; // π/4
291	inline constexpr float __2_over_pi = `0x1.45F306p-1f`; // 2/π
292	inline constexpr float __pi_2_5bits0
293	= `0x1.921fc0p0f`; // π/2, 5 0-bits (least significant)
294	inline constexpr float __pi_2_5bits0_rem
295	= -`0x1.5777a6p-21f`; // π/2 - __pi_2_5bits0
296	} // namespace __math_float
297	namespace __math_double {
298	inline constexpr double __pi_over_4 = `0x1.921fb54442d18p-1`; // π/4
299	inline constexpr double __2_over_pi = `0x1.45F306DC9C883p-1`; // 2/π
300	inline constexpr double __pi_2 = `0x1.921fb54442d18p0`; // π/2
301	} // namespace __math_double
302
303	template <typename _Abi>
304	_GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
305	__fold_input(const simd<float, _Abi>& __x)
306	{
307	using _V = simd<float, _Abi>;
308	using _IV = rebind_simd_t<int, _V>;
309	using namespace __math_float;
310	_Folded<float, _Abi> __r;
311	__r._M_x = abs(__x);
312	#if 0
313	// zero most mantissa bits:
314	constexpr float __1_over_pi = `0x1.45F306p-2f`; // 1/π
315	const auto __y = (__r._M_x * __1_over_pi + `0x1.8p23f`) - `0x1.8p23f`;
316	// split π into 4 parts, the first three with 13 trailing zeros (to make the
317	// following multiplications precise):
318	constexpr float __pi0 = `0x1.920000p1f`;
319	constexpr float __pi1 = `0x1.fb4000p-11f`;
320	constexpr float __pi2 = `0x1.444000p-23f`;
321	constexpr float __pi3 = `0x1.68c234p-38f`;
322	__r._M_x - __y__pi0 - __y__pi1 - __y__pi2 - __y__pi3
323	#else
324	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
325	__r._M_quadrant = `0`;
326	else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < `6` * __pi_over_4)))
327	{
328	const _V __y = nearbyint(__r._M_x * __2_over_pi);
329	__r._M_quadrant = static_simd_cast<_IV>(__y) & `3`; // __y mod 4
330	__r._M_x -= __y * __pi_2_5bits0;
331	__r._M_x -= __y * __pi_2_5bits0_rem;
332	}
333	else
334	{
335	using __math_double::__2_over_pi;
336	using __math_double::__pi_2;
337	using _VD = rebind_simd_t<double, _V>;
338	_VD __xd = static_simd_cast<_VD>(__r._M_x);
339	_VD __y = nearbyint(__xd * __2_over_pi);
340	__r._M_quadrant = static_simd_cast<_IV>(__y) & `3`; // = __y mod 4
341	__r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
342	}
343	#endif
344	return __r;
345	}
346
347	template <typename _Abi>
348	_GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
349	__fold_input(const simd<double, _Abi>& __x)
350	{
351	using _V = simd<double, _Abi>;
352	using _IV = rebind_simd_t<int, _V>;
353	using namespace __math_double;
354
355	_Folded<double, _Abi> __r;
356	__r._M_x = abs(__x);
357	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
358	{
359	__r._M_quadrant = `0`;
360	return __r;
361	}
362	const _V __y = nearbyint(__r._M_x / (`2` * __pi_over_4));
363	__r._M_quadrant = static_simd_cast<_IV>(__y) & `3`;
364
365	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < `1025` * __pi_over_4)))
366	{
367	// x - y pi/2, y uses no more than 11 mantissa bits*
368	__r._M_x -= __y * `0x1.921FB54443000p0`;
369	__r._M_x -= __y * -`0x1.73DCB3B39A000p-43`;
370	__r._M_x -= __y * `0x1.45C06E0E68948p-86`;
371	}
372	else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= `0x1.0p30`)))
373	{
374	// x - y pi/2, y uses no more than 29 mantissa bits*
375	__r._M_x -= __y * `0x1.921FB40000000p0`;
376	__r._M_x -= __y * `0x1.4442D00000000p-24`;
377	__r._M_x -= __y * `0x1.8469898CC5170p-48`;
378	}
379	else
380	{
381	// x - y pi/2, y may require all mantissa bits*
382	const _V __y_hi = __zero_low_bits<`26`>(__y);
383	const _V __y_lo = __y - __y_hi;
384	const auto __pi_2_1 = `0x1.921FB50000000p0`;
385	const auto __pi_2_2 = `0x1.110B460000000p-26`;
386	const auto __pi_2_3 = `0x1.1A62630000000p-54`;
387	const auto __pi_2_4 = `0x1.8A2E03707344Ap-81`;
388	__r._M_x = __r._M_x - __y_hi * __pi_2_1
389	- max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
390	- min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
391	- max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
392	- min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
393	- max(__y * __pi_2_4, __y_lo * __pi_2_3)
394	- min(__y * __pi_2_4, __y_lo * __pi_2_3);
395	}
396	return __r;
397	}
398
399	// }}}
400	// __extract_exponent_as_int {{{
401	template <typename _Tp, typename _Abi>
402	rebind_simd_t<int, simd<_Tp, _Abi>>
403	__extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
404	{
405	using _Vp = simd<_Tp, _Abi>;
406	using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
407	using namespace std::experimental::__float_bitwise_operators;
408	const _Vp __exponent_mask
409	= __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
410	return static_simd_cast<rebind_simd_t<int, _Vp>>(
411	__bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
412	>> (__digits_v<_Tp> - `1`));
413	}
414
415	// }}}
416	// __impl_or_fallback {{{
417	template <typename ImplFun, typename FallbackFun, typename... _Args>
418	_GLIBCXX_SIMD_INTRINSIC auto
419	__impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
420	_Args&&... __args)
421	-> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
422	{ return __impl_fun(static_cast<_Args&&>(__args)...); }
423
424	template <typename ImplFun, typename FallbackFun, typename... _Args>
425	inline auto
426	__impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
427	_Args&&... __args)
428	-> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
429	{ return __fallback_fun(static_cast<_Args&&>(__args)...); }
430
431	template <typename... _Args>
432	_GLIBCXX_SIMD_INTRINSIC auto
433	__impl_or_fallback(_Args&&... __args)
434	{
435	return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
436	}
437	//}}}
438
439	// trigonometric functions {{{
440	_GLIBCXX_SIMD_MATH_CALL_(acos)
441	_GLIBCXX_SIMD_MATH_CALL_(asin)
442	_GLIBCXX_SIMD_MATH_CALL_(atan)
443	_GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
444
445	/*
446	* algorithm for sine and cosine:
447	*
448	* The result can be calculated with sine or cosine depending on the π/4 section
449	* the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x²
450	*
451	* sine:
452	* Map -__x to __x and invert the output
453	* Extend precision of __x - n * π/4 by calculating
454	* ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
455	*
456	* Calculate Taylor series with tuned coefficients.
457	* Fix sign.
458	*/
459	// cos{{{
460	template <typename _Tp, typename _Abi>
461	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
462	cos(const simd<_Tp, _Abi>& __x)
463	{
464	using _V = simd<_Tp, _Abi>;
465	if constexpr (__is_scalar_abi<_Abi>() \|\| __is_fixed_size_abi_v<_Abi>)
466	return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
467	else
468	{
469	if constexpr (is_same_v<_Tp, float>)
470	if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= `393382`)))
471	return static_simd_cast<_V>(
472	cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
473
474	const auto __f = __fold_input(__x);
475	// quadrant \| effect
476	// 0 \| cosSeries, +
477	// 1 \| sinSeries, -
478	// 2 \| cosSeries, -
479	// 3 \| sinSeries, +
480	using namespace std::experimental::__float_bitwise_operators;
481	const _V __sign_flip
482	= _V(-`0.f`) & static_simd_cast<_V>((`1` + __f._M_quadrant) << `30`);
483
484	const auto __need_cos = (__f._M_quadrant & `1`) == `0`;
485	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
486	return __sign_flip ^ __cosSeries(__f._M_x);
487	else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
488	return __sign_flip ^ __sinSeries(__f._M_x);
489	else // some_of(__need_cos)
490	{
491	_V __r = __sinSeries(__f._M_x);
492	where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
493	return __r ^ __sign_flip;
494	}
495	}
496	}
497
498	template <typename _Tp>
499	_GLIBCXX_SIMD_ALWAYS_INLINE
500	enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
501	cos(simd<_Tp, simd_abi::scalar> __x)
502	{ return std::cos(__data(__x)); }
503
504	//}}}
505	// sin{{{
506	template <typename _Tp, typename _Abi>
507	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
508	sin(const simd<_Tp, _Abi>& __x)
509	{
510	using _V = simd<_Tp, _Abi>;
511	if constexpr (__is_scalar_abi<_Abi>() \|\| __is_fixed_size_abi_v<_Abi>)
512	return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
513	else
514	{
515	if constexpr (is_same_v<_Tp, float>)
516	if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= `527449`)))
517	return static_simd_cast<_V>(
518	sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
519
520	const auto __f = __fold_input(__x);
521	// quadrant \| effect
522	// 0 \| sinSeries
523	// 1 \| cosSeries
524	// 2 \| sinSeries, sign flip
525	// 3 \| cosSeries, sign flip
526	using namespace std::experimental::__float_bitwise_operators;
527	const auto __sign_flip
528	= (__x ^ static_simd_cast<_V>(`1` - __f._M_quadrant)) & _V(_Tp(-`0.`));
529
530	const auto __need_sin = (__f._M_quadrant & `1`) == `0`;
531	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
532	return __sign_flip ^ __sinSeries(__f._M_x);
533	else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
534	return __sign_flip ^ __cosSeries(__f._M_x);
535	else // some_of(__need_sin)
536	{
537	_V __r = __cosSeries(__f._M_x);
538	where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
539	return __sign_flip ^ __r;
540	}
541	}
542	}
543
544	template <typename _Tp>
545	_GLIBCXX_SIMD_ALWAYS_INLINE
546	enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
547	sin(simd<_Tp, simd_abi::scalar> __x)
548	{ return std::sin(__data(__x)); }
549
550	//}}}
551	_GLIBCXX_SIMD_MATH_CALL_(tan)
552	_GLIBCXX_SIMD_MATH_CALL_(acosh)
553	_GLIBCXX_SIMD_MATH_CALL_(asinh)
554	_GLIBCXX_SIMD_MATH_CALL_(atanh)
555	_GLIBCXX_SIMD_MATH_CALL_(cosh)
556	_GLIBCXX_SIMD_MATH_CALL_(sinh)
557	_GLIBCXX_SIMD_MATH_CALL_(tanh)
558	// }}}
559	// exponential functions {{{
560	_GLIBCXX_SIMD_MATH_CALL_(exp)
561	_GLIBCXX_SIMD_MATH_CALL_(exp2)
562	_GLIBCXX_SIMD_MATH_CALL_(expm1)
563
564	// }}}
565	// frexp {{{
566	#if _GLIBCXX_SIMD_X86INTRIN
567	template <typename _Tp, size_t _Np>
568	_SimdWrapper<_Tp, _Np>
569	__getexp(_SimdWrapper<_Tp, _Np> __x)
570	{
571	if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
572	return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
573	else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
574	return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
575	else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
576	return _mm_getexp_pd(__x);
577	else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
578	return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
579	else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
580	return _mm256_getexp_ps(__x);
581	else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
582	return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
583	else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
584	return _mm256_getexp_pd(__x);
585	else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
586	return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
587	else if constexpr (__is_avx512_ps<_Tp, _Np>())
588	return _mm512_getexp_ps(__x);
589	else if constexpr (__is_avx512_pd<_Tp, _Np>())
590	return _mm512_getexp_pd(__x);
591	else
592	__assert_unreachable<_Tp>();
593	}
594
595	template <typename _Tp, size_t _Np>
596	_SimdWrapper<_Tp, _Np>
597	__getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
598	{
599	if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
600	return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
601	_MM_MANT_SIGN_src));
602	else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
603	return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
604	_MM_MANT_NORM_p5_1,
605	_MM_MANT_SIGN_src));
606	else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
607	return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
608	else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
609	return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
610	_MM_MANT_SIGN_src));
611	else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
612	return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
613	else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
614	return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
615	_MM_MANT_SIGN_src));
616	else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
617	return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
618	else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
619	return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
620	_MM_MANT_SIGN_src));
621	else if constexpr (__is_avx512_ps<_Tp, _Np>())
622	return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
623	else if constexpr (__is_avx512_pd<_Tp, _Np>())
624	return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
625	else
626	__assert_unreachable<_Tp>();
627	}
628	#endif // _GLIBCXX_SIMD_X86INTRIN
629
630	/**
631	* splits @p __v into exponent and mantissa, the sign is kept with the mantissa
632	*
633	* The return value will be in the range [0.5, 1.0[
634	* The @p __e value will be an integer defining the power-of-two exponent
635	*/
636	template <typename _Tp, typename _Abi>
637	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
638	frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
639	{
640	if constexpr (simd_size_v<_Tp, _Abi> == `1`)
641	{
642	int __tmp;
643	const auto __r = std::frexp(__x[`0`], &__tmp);
644	(*__exp)[`0`] = __tmp;
645	return __r;
646	}
647	else if constexpr (__is_fixed_size_abi_v<_Abi>)
648	{
649	return {__private_init,
650	_Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
651	#if _GLIBCXX_SIMD_X86INTRIN
652	}
653	else if constexpr (__have_avx512f)
654	{
655	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
656	constexpr size_t _NI = _Np < `4` ? `4` : _Np;
657	const auto __v = __data(__x);
658	const auto __isnonzero
659	= _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
660	const _SimdWrapper<int, _NI> __exp_plus1
661	= `1` + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
662	const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
663	_Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
664	_SimdWrapper<int, _NI>(), __exp_plus1));
665	simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
666	return {__private_init,
667	_Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
668	__isnonzero),
669	__v, __getmant_avx512(__v))};
670	#endif // _GLIBCXX_SIMD_X86INTRIN
671	}
672	else
673	{
674	// fallback implementation
675	static_assert(sizeof(_Tp) == `4` \|\| sizeof(_Tp) == `8`);
676	using _V = simd<_Tp, _Abi>;
677	using _IV = rebind_simd_t<int, _V>;
678	using namespace std::experimental::__proposed;
679	using namespace std::experimental::__float_bitwise_operators;
680
681	constexpr int __exp_adjust = sizeof(_Tp) == `4` ? `0x7e` : `0x3fe`;
682	constexpr int __exp_offset = sizeof(_Tp) == `4` ? `0x70` : `0x200`;
683	constexpr _Tp __subnorm_scale = sizeof(_Tp) == `4` ? `0x1p112` : `0x1p512`;
684	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
685	= __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
686	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
687	= -(`2` - __epsilon_v<_Tp>) / `2`; // 0xbf7fffff or 0xbfefffffffffffff
688
689	_V __mant = __p5_1_exponent & (__exponent_mask \| __x); // +/-[.5, 1)
690	const _IV __exponent_bits = __extract_exponent_as_int(__x);
691	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
692	{
693	*__exp
694	= simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
695	return __mant;
696	}
697
698	#if __FINITE_MATH_ONLY__
699	// at least one element of __x is 0 or subnormal, the rest is normal
700	// (inf and NaN are excluded by -ffinite-math-only)
701	const auto __iszero_inf_nan = __x == `0`;
702	#else
703	const auto __as_int
704	= __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(abs(__x));
705	const auto __inf
706	= __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(
707	_V(__infinity_v<_Tp>));
708	const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
709	__as_int == `0` \|\| __as_int >= __inf);
710	#endif
711
712	const _V __scaled_subnormal = __x * __subnorm_scale;
713	const _V __mant_subnormal
714	= __p5_1_exponent & (__exponent_mask \| __scaled_subnormal);
715	where(!isnormal(__x), __mant) = __mant_subnormal;
716	where(__iszero_inf_nan, __mant) = __x;
717	_IV __e = __extract_exponent_as_int(__scaled_subnormal);
718	using _MaskType =
719	typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
720	_V, _IV>::mask_type;
721	const _MaskType __value_isnormal = isnormal(__x).__cvt();
722	where(__value_isnormal.__cvt(), __e) = __exponent_bits;
723	static_assert(sizeof(_IV) == sizeof(__value_isnormal));
724	const _IV __offset
725	= (__bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
726	\| (__bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == `0`)
727	& static_simd_cast<_MaskType>(__x != `0`))
728	& _IV(__exp_adjust + __exp_offset));
729	__exp = simd_cast<_Samesize<int*, _V>>(__e - __offset);
730	return __mant;
731	}
732	}
733
734	// }}}
735	_GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
736	_GLIBCXX_SIMD_MATH_CALL_(ilogb)
737
738	// logarithms {{{
739	_GLIBCXX_SIMD_MATH_CALL_(log)
740	_GLIBCXX_SIMD_MATH_CALL_(log10)
741	_GLIBCXX_SIMD_MATH_CALL_(log1p)
742	_GLIBCXX_SIMD_MATH_CALL_(log2)
743
744	//}}}
745	// logb{{{
746	template <typename _Tp, typename _Abi>
747	enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
748	logb(const simd<_Tp, _Abi>& __x)
749	{
750	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
751	if constexpr (_Np == `1`)
752	return std::logb(__x[`0`]);
753	else if constexpr (__is_fixed_size_abi_v<_Abi>)
754	{
755	return {__private_init,
756	__data(__x)._M_apply_per_chunk([](auto __impl, auto __xx) {
757	using _V = typename decltype(__impl)::simd_type;
758	return __data(
759	std::experimental::logb(_V(__private_init, __xx)));
760	})};
761	}
762	#if _GLIBCXX_SIMD_X86INTRIN // {{{
763	else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
764	return {__private_init,
765	__auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
766	else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
767	return {__private_init, _mm_getexp_pd(__data(__x))};
768	else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
769	return {__private_init, _mm256_getexp_ps(__data(__x))};
770	else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
771	return {__private_init, _mm256_getexp_pd(__data(__x))};
772	else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
773	return {__private_init,
774	__lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
775	else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
776	return {__private_init,
777	__lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
778	else if constexpr (__is_avx512_ps<_Tp, _Np>())
779	return {__private_init, _mm512_getexp_ps(__data(__x))};
780	else if constexpr (__is_avx512_pd<_Tp, _Np>())
781	return {__private_init, _mm512_getexp_pd(__data(__x))};
782	#endif // _GLIBCXX_SIMD_X86INTRIN }}}
783	else
784	{
785	using _V = simd<_Tp, _Abi>;
786	using namespace std::experimental::__proposed;
787	auto __is_normal = isnormal(__x);
788
789	// work on abs(__x) to reflect the return value on Linux for negative
790	// inputs (domain-error => implementation-defined value is returned)
791	const _V abs_x = abs(__x);
792
793	// __exponent(__x) returns the exponent value (bias removed) as
794	// simd<_Up> with integral _Up
795	auto&& __exponent = [](const _V& __v) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
796	using namespace std::experimental::__proposed;
797	using _IV = rebind_simd_t<
798	conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
799	return (__bit_cast<_IV>(__v) >> (__digits_v<_Tp> - `1`))
800	- (__max_exponent_v<_Tp> - `1`);
801	};
802	_V __r = static_simd_cast<_V>(__exponent(abs_x));
803	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
804	// without corner cases (nan, inf, subnormal, zero) we have our
805	// answer:
806	return __r;
807	const auto __is_zero = __x == `0`;
808	const auto __is_nan = isnan(__x);
809	const auto __is_inf = isinf(__x);
810	where(__is_zero, __r) = -__infinity_v<_Tp>;
811	where(__is_nan, __r) = __x;
812	where(__is_inf, __r) = __infinity_v<_Tp>;
813	__is_normal \|= __is_zero \|\| __is_nan \|\| __is_inf;
814	if (all_of(__is_normal))
815	// at this point everything but subnormals is handled
816	return __r;
817	// subnormals repeat the exponent extraction after multiplication of the
818	// input with __a floating point value that has 112 (0x70) in its exponent
819	// (not too big for sp and large enough for dp)
820	const _V __scaled = abs_x * _Tp(`0x1p112`);
821	_V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - `112`);
822	where(__is_normal, __scaled_exp) = __r;
823	return __scaled_exp;
824	}
825	}
826
827	//}}}
828	template <typename _Tp, typename _Abi>
829	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
830	modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
831	{
832	if constexpr (__is_scalar_abi<_Abi>()
833	\|\| (__is_fixed_size_abi_v<
834	_Abi> && simd_size_v<_Tp, _Abi> == `1`))
835	{
836	_Tp __tmp;
837	_Tp __r = std::modf(__x[`0`], &__tmp);
838	__iptr[`0`] = __tmp;
839	return __r;
840	}
841	else
842	{
843	const auto __integral = trunc(__x);
844	*__iptr = __integral;
845	auto __r = __x - __integral;
846	#if !__FINITE_MATH_ONLY__
847	where(isinf(__x), __r) = _Tp();
848	#endif
849	return copysign(__r, __x);
850	}
851	}
852
853	_GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
854	_GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
855
856	_GLIBCXX_SIMD_MATH_CALL_(cbrt)
857
858	_GLIBCXX_SIMD_MATH_CALL_(abs)
859	_GLIBCXX_SIMD_MATH_CALL_(fabs)
860
861	// [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
862	// allow signed integral _Tp
863	template <typename _Tp, typename _Abi>
864	enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
865	abs(const simd<_Tp, _Abi>& __x)
866	{ return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
867
868	template <typename _Tp, typename _Abi>
869	enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
870	fabs(const simd<_Tp, _Abi>& __x)
871	{ return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
872
873	// the following are overloads for functions in <cstdlib> and not covered by
874	// [parallel.simd.math]. I don't see much value in making them work, though
875	/*
876	template <typename _Abi> simd<long, _Abi> labs(const simd<long, _Abi> &__x)
877	{ return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
878
879	template <typename _Abi> simd<long long, _Abi> llabs(const simd<long long, _Abi>
880	&__x)
881	{ return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
882	*/
883
884	#define _GLIBCXX_SIMD_CVTING2(_NAME) \
885	template <typename _Tp, typename _Abi> \
886	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
887	const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
888	{ \
889	return _NAME(__x, __y); \
890	} \
891	\
892	template <typename _Tp, typename _Abi> \
893	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
894	const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
895	{ \
896	return _NAME(__x, __y); \
897	}
898
899	#define _GLIBCXX_SIMD_CVTING3(_NAME) \
900	template <typename _Tp, typename _Abi> \
901	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
902	const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
903	const simd<_Tp, _Abi>& __z) \
904	{ \
905	return _NAME(__x, __y, __z); \
906	} \
907	\
908	template <typename _Tp, typename _Abi> \
909	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
910	const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
911	const simd<_Tp, _Abi>& __z) \
912	{ \
913	return _NAME(__x, __y, __z); \
914	} \
915	\
916	template <typename _Tp, typename _Abi> \
917	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
918	const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \
919	const __type_identity_t<simd<_Tp, _Abi>>& __z) \
920	{ \
921	return _NAME(__x, __y, __z); \
922	} \
923	\
924	template <typename _Tp, typename _Abi> \
925	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
926	const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
927	const __type_identity_t<simd<_Tp, _Abi>>& __z) \
928	{ \
929	return _NAME(__x, __y, __z); \
930	} \
931	\
932	template <typename _Tp, typename _Abi> \
933	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
934	const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
935	const __type_identity_t<simd<_Tp, _Abi>>& __z) \
936	{ \
937	return _NAME(__x, __y, __z); \
938	} \
939	\
940	template <typename _Tp, typename _Abi> \
941	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
942	const __type_identity_t<simd<_Tp, _Abi>>& __x, \
943	const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
944	{ \
945	return _NAME(__x, __y, __z); \
946	}
947
948	template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
949	_GLIBCXX_SIMD_INTRINSIC _R
950	__fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
951	const _Tps&... __args)
952	{
953	return {__private_init,
954	__data(__arg0)._M_apply_per_chunk(
955	[&](auto __impl, const auto&... __inner) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
956	using _V = typename decltype(__impl)::simd_type;
957	return __data(__apply(_V(__private_init, __inner)...));
958	},
959	__data(__args)...)};
960	}
961
962	template <typename _VV>
963	__remove_cvref_t<_VV>
964	__hypot(_VV __x, _VV __y)
965	{
966	using _V = __remove_cvref_t<_VV>;
967	using _Tp = typename _V::value_type;
968	if constexpr (_V::size() == `1`)
969	return std::hypot(_Tp(__x[`0`]), _Tp(__y[`0`]));
970	else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
971	{
972	return __fixed_size_apply<_V>([](auto __a,
973	auto __b) { return hypot(__a, __b); },
974	__x, __y);
975	}
976	else
977	{
978	// A simple solution for _Tp == float would be to cast to double and
979	// simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
980	// dp. It still needs the Annex F fixups though and isn't faster on
981	// Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
982	// AVX-512).
983	using namespace __float_bitwise_operators;
984	_V __absx = abs(__x); // no error
985	_V __absy = abs(__y); // no error
986	_V __hi = max(__absx, __absy); // no error
987	_V __lo = min(__absy, __absx); // no error
988
989	// round __hi down to the next power-of-2:
990	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
991
992	#ifndef __FAST_MATH__
993	if constexpr (__have_neon && !__have_neon_a32)
994	{ // With ARMv7 NEON, we have no subnormals and must use slightly
995	// different strategy
996	const _V __hi_exp = __hi & __inf;
997	_V __scale_back = __hi_exp;
998	// For large exponents (max & max/2) the inversion comes too close
999	// to subnormals. Subtract 3 from the exponent:
1000	where(__hi_exp > `1`, __scale_back) = __hi_exp * _Tp(`0.125`);
1001	// Invert and adjust for the off-by-one error of inversion via xor:
1002	const _V __scale = (__scale_back ^ __inf) * _Tp(`.5`);
1003	const _V __h1 = __hi * __scale;
1004	const _V __l1 = __lo * __scale;
1005	_V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
1006	// Fix up hypot(0, 0) to not be NaN:
1007	where(__hi == `0`, __r) = `0`;
1008	return __r;
1009	}
1010	#endif
1011
1012	#ifdef __FAST_MATH__
1013	// With fast-math, ignore precision of subnormals and inputs from
1014	// __finite_max_v/2 to __finite_max_v. This removes all
1015	// branching/masking.
1016	if constexpr (true)
1017	#else
1018	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1019	&& all_of(isnormal(__y))))
1020	#endif
1021	{
1022	const _V __hi_exp = __hi & __inf;
1023	//((__hi + __hi) & __inf) ^ __inf almost works for computing
1024	//__scale,
1025	// except when (__hi + __hi) & __inf == __inf, in which case __scale
1026	// becomes 0 (should be min/2 instead) and thus loses the
1027	// information from __lo.
1028	#ifdef __FAST_MATH__
1029	using _Ip = __int_for_sizeof_t<_Tp>;
1030	using _IV = rebind_simd_t<_Ip, _V>;
1031	const auto __as_int = __bit_cast<_IV>(__hi_exp);
1032	const _V __scale
1033	= __bit_cast<_V>(`2` * __bit_cast<_Ip>(_Tp(`1`)) - __as_int);
1034	#else
1035	const _V __scale = (__hi_exp ^ __inf) * _Tp(`.5`);
1036	#endif
1037	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
1038	= __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1039	const _V __h1 = (__hi & __mant_mask) \| _V(`1`);
1040	const _V __l1 = __lo * __scale;
1041	return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1042	}
1043	else
1044	{
1045	// slower path to support subnormals
1046	// if __hi is subnormal, avoid scaling by inf & final mul by 0
1047	// (which yields NaN) by using min()
1048	_V __scale = _V(`1` / __norm_min_v<_Tp>);
1049	// invert exponent w/o error and w/o using the slow divider unit:
1050	// xor inverts the exponent but off by 1. Multiplication with .5
1051	// adjusts for the discrepancy.
1052	where(__hi >= __norm_min_v<_Tp>, __scale)
1053	= ((__hi & __inf) ^ __inf) * _Tp(`.5`);
1054	// adjust final exponent for subnormal inputs
1055	_V __hi_exp = __norm_min_v<_Tp>;
1056	where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1057	= __hi & __inf; // no error
1058	_V __h1 = __hi * __scale; // no error
1059	_V __l1 = __lo * __scale; // no error
1060
1061	// sqrt(x²+y²) = esqrt((x/e)²+(y/e)²):*
1062	// this ensures no overflow in the argument to sqrt
1063	_V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1064	#ifdef __STDC_IEC_559__
1065	// fixup for Annex F requirements
1066	// the naive fixup goes like this:
1067	//
1068	// where(__l1 == 0, __r) = __hi;
1069	// where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>;
1070	// where(isinf(__absx) \|\| isinf(__absy), __r) = __inf;
1071	//
1072	// The fixup can be prepared in parallel with the sqrt, requiring a
1073	// single blend step after hi_exp sqrt, reducing latency and*
1074	// throughput:
1075	_V __fixup = __hi; // __lo == 0
1076	where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
1077	where(isinf(__absx) \|\| isinf(__absy), __fixup) = __inf;
1078	where(!(__lo == `0` \|\| isunordered(__x, __y)
1079	\|\| (isinf(__absx) \|\| isinf(__absy))),
1080	__fixup)
1081	= __r;
1082	__r = __fixup;
1083	#endif
1084	return __r;
1085	}
1086	}
1087	}
1088
1089	template <typename _Tp, typename _Abi>
1090	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1091	hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1092	{
1093	return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1094	const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1095	__y);
1096	}
1097
1098	_GLIBCXX_SIMD_CVTING2(hypot)
1099
1100	template <typename _VV>
1101	__remove_cvref_t<_VV>
1102	__hypot(_VV __x, _VV __y, _VV __z)
1103	{
1104	using _V = __remove_cvref_t<_VV>;
1105	using _Abi = typename _V::abi_type;
1106	using _Tp = typename _V::value_type;
1107	/ FIXME: enable after PR77776 is resolved*
1108	if constexpr (_V::size() == 1)
1109	return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
1110	else
1111	*/
1112	if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > `1`)
1113	{
1114	return __fixed_size_apply<simd<_Tp, _Abi>>(
1115	[](auto __a, auto __b, auto __c) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1116	return hypot(__a, __b, __c);
1117	}, __x, __y, __z);
1118	}
1119	else
1120	{
1121	using namespace __float_bitwise_operators;
1122	const _V __absx = abs(__x); // no error
1123	const _V __absy = abs(__y); // no error
1124	const _V __absz = abs(__z); // no error
1125	_V __hi = max(max(__absx, __absy), __absz); // no error
1126	_V __l0 = min(__absz, max(__absx, __absy)); // no error
1127	_V __l1 = min(__absy, __absx); // no error
1128	if constexpr (__digits_v<_Tp> == `64` && __max_exponent_v<_Tp> == `0x4000`
1129	&& __min_exponent_v<_Tp> == -`0x3FFD` && _V::size() == `1`)
1130	{ // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
1131	// NaN. In this case the bit-tricks don't work, they require IEC559
1132	// binary32 or binary64 format.
1133	#ifdef __STDC_IEC_559__
1134	// fixup for Annex F requirements
1135	if (isinf(__absx[`0`]) \|\| isinf(__absy[`0`]) \|\| isinf(__absz[`0`]))
1136	return __infinity_v<_Tp>;
1137	else if (isunordered(__absx[`0`], __absy[`0`] + __absz[`0`]))
1138	return __quiet_NaN_v<_Tp>;
1139	else if (__l0[`0`] == `0` && __l1[`0`] == `0`)
1140	return __hi;
1141	#endif
1142	_V __hi_exp = __hi;
1143	const _ULLong __tmp = `0x8000'0000'0000'0000ull`;
1144	__builtin_memcpy(&__data(__hi_exp), &__tmp, `8`);
1145	const _V __scale = `1` / __hi_exp;
1146	__hi *= __scale;
1147	__l0 *= __scale;
1148	__l1 *= __scale;
1149	return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
1150	}
1151	else
1152	{
1153	// round __hi down to the next power-of-2:
1154	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
1155
1156	#ifndef __FAST_MATH__
1157	if constexpr (_V::size() > `1` && __have_neon && !__have_neon_a32)
1158	{ // With ARMv7 NEON, we have no subnormals and must use slightly
1159	// different strategy
1160	const _V __hi_exp = __hi & __inf;
1161	_V __scale_back = __hi_exp;
1162	// For large exponents (max & max/2) the inversion comes too
1163	// close to subnormals. Subtract 3 from the exponent:
1164	where(__hi_exp > `1`, __scale_back) = __hi_exp * _Tp(`0.125`);
1165	// Invert and adjust for the off-by-one error of inversion via
1166	// xor:
1167	const _V __scale = (__scale_back ^ __inf) * _Tp(`.5`);
1168	const _V __h1 = __hi * __scale;
1169	__l0 *= __scale;
1170	__l1 *= __scale;
1171	_V __lo = __l0 * __l0
1172	+ __l1 * __l1; // add the two smaller values first
1173	asm("" : "+m"(__lo));
1174	_V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
1175	// Fix up hypot(0, 0, 0) to not be NaN:
1176	where(__hi == `0`, __r) = `0`;
1177	return __r;
1178	}
1179	#endif
1180
1181	#ifdef __FAST_MATH__
1182	// With fast-math, ignore precision of subnormals and inputs from
1183	// __finite_max_v/2 to __finite_max_v. This removes all
1184	// branching/masking.
1185	if constexpr (true)
1186	#else
1187	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1188	&& all_of(isnormal(__y))
1189	&& all_of(isnormal(__z))))
1190	#endif
1191	{
1192	const _V __hi_exp = __hi & __inf;
1193	//((__hi + __hi) & __inf) ^ __inf almost works for computing
1194	//__scale, except when (__hi + __hi) & __inf == __inf, in which
1195	// case __scale
1196	// becomes 0 (should be min/2 instead) and thus loses the
1197	// information from __lo.
1198	#ifdef __FAST_MATH__
1199	using _Ip = __int_for_sizeof_t<_Tp>;
1200	using _IV = rebind_simd_t<_Ip, _V>;
1201	const auto __as_int = __bit_cast<_IV>(__hi_exp);
1202	const _V __scale
1203	= __bit_cast<_V>(`2` * __bit_cast<_Ip>(_Tp(`1`)) - __as_int);
1204	#else
1205	const _V __scale = (__hi_exp ^ __inf) * _Tp(`.5`);
1206	#endif
1207	constexpr _Tp __mant_mask
1208	= __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1209	const _V __h1 = (__hi & _V(__mant_mask)) \| _V(`1`);
1210	__l0 *= __scale;
1211	__l1 *= __scale;
1212	const _V __lo
1213	= __l0 * __l0
1214	+ __l1 * __l1; // add the two smaller values first
1215	return __hi_exp * sqrt(__lo + __h1 * __h1);
1216	}
1217	else
1218	{
1219	// slower path to support subnormals
1220	// if __hi is subnormal, avoid scaling by inf & final mul by 0
1221	// (which yields NaN) by using min()
1222	_V __scale = _V(`1` / __norm_min_v<_Tp>);
1223	// invert exponent w/o error and w/o using the slow divider
1224	// unit: xor inverts the exponent but off by 1. Multiplication
1225	// with .5 adjusts for the discrepancy.
1226	where(__hi >= __norm_min_v<_Tp>, __scale)
1227	= ((__hi & __inf) ^ __inf) * _Tp(`.5`);
1228	// adjust final exponent for subnormal inputs
1229	_V __hi_exp = __norm_min_v<_Tp>;
1230	where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1231	= __hi & __inf; // no error
1232	_V __h1 = __hi * __scale; // no error
1233	__l0 = __scale; // no error*
1234	__l1 = __scale; // no error*
1235	_V __lo = __l0 * __l0
1236	+ __l1 * __l1; // add the two smaller values first
1237	_V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
1238	#ifdef __STDC_IEC_559__
1239	// fixup for Annex F requirements
1240	_V __fixup = __hi; // __lo == 0
1241	// where(__lo == 0, __fixup) = __hi;
1242	where(isunordered(__x, __y + __z), __fixup)
1243	= __quiet_NaN_v<_Tp>;
1244	where(isinf(__absx) \|\| isinf(__absy) \|\| isinf(__absz), __fixup)
1245	= __inf;
1246	// Instead of __lo == 0, the following could depend on __h1² ==
1247	// __h1² + __lo (i.e. __hi is so much larger than the other two
1248	// inputs that the result is exactly __hi). While this may
1249	// improve precision, it is likely to reduce efficiency if the
1250	// ISA has FMAs (because __h1² + __lo is an FMA, but the
1251	// intermediate
1252	// __h1² must be kept)
1253	where(!(__lo == `0` \|\| isunordered(__x, __y + __z)
1254	\|\| isinf(__absx) \|\| isinf(__absy) \|\| isinf(__absz)),
1255	__fixup)
1256	= __r;
1257	__r = __fixup;
1258	#endif
1259	return __r;
1260	}
1261	}
1262	}
1263	}
1264
1265	template <typename _Tp, typename _Abi>
1266	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1267	hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
1268	const simd<_Tp, _Abi>& __z)
1269	{
1270	return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1271	const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1272	__y,
1273	__z);
1274	}
1275
1276	_GLIBCXX_SIMD_CVTING3(hypot)
1277
1278	_GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
1279
1280	_GLIBCXX_SIMD_MATH_CALL_(sqrt)
1281	_GLIBCXX_SIMD_MATH_CALL_(erf)
1282	_GLIBCXX_SIMD_MATH_CALL_(erfc)
1283	_GLIBCXX_SIMD_MATH_CALL_(lgamma)
1284	_GLIBCXX_SIMD_MATH_CALL_(tgamma)
1285	_GLIBCXX_SIMD_MATH_CALL_(ceil)
1286	_GLIBCXX_SIMD_MATH_CALL_(floor)
1287	_GLIBCXX_SIMD_MATH_CALL_(nearbyint)
1288	_GLIBCXX_SIMD_MATH_CALL_(rint)
1289	_GLIBCXX_SIMD_MATH_CALL_(lrint)
1290	_GLIBCXX_SIMD_MATH_CALL_(llrint)
1291
1292	_GLIBCXX_SIMD_MATH_CALL_(round)
1293	_GLIBCXX_SIMD_MATH_CALL_(lround)
1294	_GLIBCXX_SIMD_MATH_CALL_(llround)
1295
1296	_GLIBCXX_SIMD_MATH_CALL_(trunc)
1297
1298	_GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
1299	_GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
1300	_GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
1301
1302	template <typename _Tp, typename _Abi>
1303	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1304	copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1305	{
1306	if constexpr (simd_size_v<_Tp, _Abi> == `1`)
1307	return std::copysign(__x[`0`], __y[`0`]);
1308	else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == `12`)
1309	// Remove this case once __bit_cast is implemented via __builtin_bit_cast.
1310	// It is necessary, because __signmask below cannot be computed at compile
1311	// time.
1312	return simd<_Tp, _Abi>(
1313	[&](auto __i) { return std::copysign(__x[__i], __y[__i]); });
1314	else
1315	{
1316	using _V = simd<_Tp, _Abi>;
1317	using namespace std::experimental::__float_bitwise_operators;
1318	_GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(`1`) ^ _V(-`1`);
1319	return (__x & (__x ^ __signmask)) \| (__y & __signmask);
1320	}
1321	}
1322
1323	_GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
1324	// not covered in [parallel.simd.math]:
1325	// _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
1326	_GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
1327	_GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
1328	_GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
1329
1330	_GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
1331	_GLIBCXX_SIMD_MATH_CALL_(fpclassify)
1332	_GLIBCXX_SIMD_MATH_CALL_(isfinite)
1333
1334	// isnan and isinf require special treatment because old glibc may declare
1335	// `int isinf(double)`.
1336	template <typename _Tp, typename _Abi, typename...,
1337	typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1338	enable_if_t<is_floating_point_v<_Tp>, _R>
1339	isinf(simd<_Tp, _Abi> __x)
1340	{ return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
1341
1342	template <typename _Tp, typename _Abi, typename...,
1343	typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1344	enable_if_t<is_floating_point_v<_Tp>, _R>
1345	isnan(simd<_Tp, _Abi> __x)
1346	{ return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
1347
1348	_GLIBCXX_SIMD_MATH_CALL_(isnormal)
1349
1350	template <typename..., typename _Tp, typename _Abi>
1351	simd_mask<_Tp, _Abi>
1352	signbit(simd<_Tp, _Abi> __x)
1353	{
1354	if constexpr (is_integral_v<_Tp>)
1355	{
1356	if constexpr (is_unsigned_v<_Tp>)
1357	return simd_mask<_Tp, _Abi>{}; // false
1358	else
1359	return __x < `0`;
1360	}
1361	else
1362	return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
1363	}
1364
1365	_GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
1366	_GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
1367	_GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
1368	_GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
1369	_GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
1370	_GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
1371
1372	/ not covered in [parallel.simd.math]*
1373	template <typename _Abi> __doublev<_Abi> nan(const char tagp);*
1374	template <typename _Abi> __floatv<_Abi> nanf(const char tagp);*
1375	template <typename _Abi> __ldoublev<_Abi> nanl(const char tagp);*
1376
1377	template <typename _V> struct simd_div_t {
1378	_V quot, rem;
1379	};
1380
1381	template <typename _Abi>
1382	simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
1383	_SCharv<_Abi> denom);
1384	template <typename _Abi>
1385	simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
1386	__shortv<_Abi> denom);
1387	template <typename _Abi>
1388	simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
1389	template <typename _Abi>
1390	simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
1391	__longv<_Abi> denom);
1392	template <typename _Abi>
1393	simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
1394	__llongv<_Abi> denom);
1395	*/
1396
1397	// special math {{{
1398	template <typename _Tp, typename _Abi>
1399	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1400	assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1401	const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1402	const simd<_Tp, _Abi>& __x)
1403	{
1404	return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1405	return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
1406	});
1407	}
1408
1409	template <typename _Tp, typename _Abi>
1410	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1411	assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1412	const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1413	const simd<_Tp, _Abi>& __x)
1414	{
1415	return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1416	return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
1417	});
1418	}
1419
1420	_GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
1421	_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
1422	_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
1423	_GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
1424	_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
1425	_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
1426	_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
1427	_GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
1428	_GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
1429	_GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
1430	_GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
1431	_GLIBCXX_SIMD_MATH_CALL_(expint)
1432
1433	template <typename _Tp, typename _Abi>
1434	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1435	hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1436	const simd<_Tp, _Abi>& __x)
1437	{
1438	return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1439	return std::hermite(__n[__i], __x[__i]);
1440	});
1441	}
1442
1443	template <typename _Tp, typename _Abi>
1444	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1445	laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1446	const simd<_Tp, _Abi>& __x)
1447	{
1448	return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1449	return std::laguerre(__n[__i], __x[__i]);
1450	});
1451	}
1452
1453	template <typename _Tp, typename _Abi>
1454	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1455	legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1456	const simd<_Tp, _Abi>& __x)
1457	{
1458	return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1459	return std::legendre(__n[__i], __x[__i]);
1460	});
1461	}
1462
1463	_GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
1464
1465	template <typename _Tp, typename _Abi>
1466	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1467	sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1468	const simd<_Tp, _Abi>& __x)
1469	{
1470	return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1471	return std::sph_bessel(__n[__i], __x[__i]);
1472	});
1473	}
1474
1475	template <typename _Tp, typename _Abi>
1476	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1477	sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
1478	const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1479	const simd<_Tp, _Abi>& theta)
1480	{
1481	return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1482	return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
1483	});
1484	}
1485
1486	template <typename _Tp, typename _Abi>
1487	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1488	sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1489	const simd<_Tp, _Abi>& __x)
1490	{
1491	return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1492	return std::sph_neumann(__n[__i], __x[__i]);
1493	});
1494	}
1495	// }}}
1496
1497	#undef _GLIBCXX_SIMD_MATH_CALL_
1498	#undef _GLIBCXX_SIMD_MATH_CALL2_
1499	#undef _GLIBCXX_SIMD_MATH_CALL3_
1500
1501	_GLIBCXX_SIMD_END_NAMESPACE
1502
1503	#endif // __cplusplus >= 201703L
1504	#endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
1505
1506	// vim: foldmethod=marker sw=2 ts=8 noet sts=2
1507

source code of include/c++/11/experimental/bits/simd_math.h