logf_advsimd.c source code [glibc/sysdeps/aarch64/fpu/logf_advsimd.c]

1	/ Single-precision vector (Advanced SIMD) log function.*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "v_math.h"
21
22	static const struct data
23	{
24	uint32x4_t min_norm;
25	uint16x8_t special_bound;
26	float32x4_t poly[`7`];
27	float32x4_t ln2, tiny_bound;
28	uint32x4_t off, mantissa_mask;
29	} data = {
30	/ 3.34 ulp error. /
31	.poly = { V4 (-`0x1.3e737cp-3f`), V4 (`0x1.5a9aa2p-3f`), V4 (-`0x1.4f9934p-3f`),
32	V4 (`0x1.961348p-3f`), V4 (-`0x1.00187cp-2f`), V4 (`0x1.555d7cp-2f`),
33	V4 (-`0x1.ffffc8p-2f`) },
34	.ln2 = V4 (`0x1.62e43p-1f`),
35	.tiny_bound = V4 (`0x1p-126`),
36	.min_norm = V4 (`0x00800000`),
37	.special_bound = V8 (`0x7f00`), / asuint32(inf) - min_norm. /
38	.off = V4 (`0x3f2aaaab`), / 0.666667. /
39	.mantissa_mask = V4 (`0x007fffff`)
40	};
41
42	#define P(i) d->poly[7 - i]
43
44	static float32x4_t VPCS_ATTR NOINLINE
45	special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p,
46	uint16x4_t cmp)
47	{
48	/ Fall back to scalar code. /
49	return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp));
50	}
51
52	float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log) (float32x4_t x)
53	{
54	const struct data *d = ptr_barrier (&data);
55	float32x4_t n, p, q, r, r2, y;
56	uint32x4_t u;
57	uint16x4_t cmp;
58
59	u = vreinterpretq_u32_f32 (x);
60	cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm),
61	vget_low_u16 (d->special_bound));
62
63	/ x = 2^n * (1+r), where 2/3 < 1+r < 4/3. /
64	u = vsubq_u32 (u, d->off);
65	n = vcvtq_f32_s32 (
66	vshrq_n_s32 (vreinterpretq_s32_u32 (u), `23`)); / signextend. /
67	u = vandq_u32 (u, d->mantissa_mask);
68	u = vaddq_u32 (u, d->off);
69	r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (`1.0f`));
70
71	/ y = log(1+r) + nln2. /*
72	r2 = vmulq_f32 (r, r);
73	/ nln2 + r + r2(P1 + rP2 + r2(P3 + rP4 + r2(P5 + rP6 + r2P7))). /
74	p = vfmaq_f32 (P (`5`), P (`6`), r);
75	q = vfmaq_f32 (P (`3`), P (`4`), r);
76	y = vfmaq_f32 (P (`1`), P (`2`), r);
77	p = vfmaq_f32 (p, P (`7`), r2);
78	q = vfmaq_f32 (q, p, r2);
79	y = vfmaq_f32 (y, q, r2);
80	p = vfmaq_f32 (r, d->ln2, n);
81
82	if (__glibc_unlikely (v_any_u16h (cmp)))
83	return special_case (x, y, r2, p, cmp);
84	return vfmaq_f32 (p, y, r2);
85	}
86	libmvec_hidden_def (V_NAME_F1 (log))
87	HALF_WIDTH_ALIAS_F1 (log)
88

source code of glibc/sysdeps/aarch64/fpu/logf_advsimd.c