sqrt.h source code [libc/src/__support/fixed_point/sqrt.h]

1	//===-- Calculate square root of fixed point numbers. ------ C++ --=========//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_FIXEDPOINT_SQRT_H
10	#define LLVM_LIBC_SRC___SUPPORT_FIXEDPOINT_SQRT_H
11
12	#include "include/llvm-libc-macros/stdfix-macros.h"
13	#include "src/__support/CPP/bit.h"
14	#include "src/__support/CPP/limits.h" // CHAR_BIT
15	#include "src/__support/CPP/type_traits.h"
16	#include "src/__support/macros/attributes.h" // LIBC_INLINE
17	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
18
19	#include "fx_rep.h"
20
21	#ifdef LIBC_COMPILER_HAS_FIXED_POINT
22
23	namespace LIBC_NAMESPACE::fixed_point {
24
25	namespace internal {
26
27	template <typename T> struct SqrtConfig;
28
29	template <> struct SqrtConfig<unsigned short fract> {
30	using Type = unsigned short fract;
31	static constexpr int EXTRA_STEPS = `0`;
32
33	// Linear approximation for the initial values, with errors bounded by:
34	// max(1.5 2^-11, eps)*
35	// Generated with Sollya:
36	// > for i from 4 to 15 do {
37	// P = fpminimax(sqrt(x), 1, [\|8, 8\|], [i 2^-4, (i + 1)2^-4],
38	// fixed, absolute);
39	// print("{", coeff(P, 1), "uhr,", coeff(P, 0), "uhr},");
40	// };
41	static constexpr Type FIRST_APPROX[`12`][`2`] = {
42	{`0x1.e8p-1uhr`, `0x1.0cp-2uhr`}, {`0x1.bap-1uhr`, `0x1.28p-2uhr`},
43	{`0x1.94p-1uhr`, `0x1.44p-2uhr`}, {`0x1.74p-1uhr`, `0x1.6p-2uhr`},
44	{`0x1.6p-1uhr`, `0x1.74p-2uhr`}, {`0x1.4ep-1uhr`, `0x1.88p-2uhr`},
45	{`0x1.3ep-1uhr`, `0x1.9cp-2uhr`}, {`0x1.32p-1uhr`, `0x1.acp-2uhr`},
46	{`0x1.22p-1uhr`, `0x1.c4p-2uhr`}, {`0x1.18p-1uhr`, `0x1.d4p-2uhr`},
47	{`0x1.08p-1uhr`, `0x1.fp-2uhr`}, {`0x1.04p-1uhr`, `0x1.f8p-2uhr`},
48	};
49	};
50
51	template <> struct SqrtConfig<unsigned fract> {
52	using Type = unsigned fract;
53	static constexpr int EXTRA_STEPS = `1`;
54
55	// Linear approximation for the initial values, with errors bounded by:
56	// max(1.5 2^-11, eps)*
57	// Generated with Sollya:
58	// > for i from 4 to 15 do {
59	// P = fpminimax(sqrt(x), 1, [\|16, 16\|], [i 2^-4, (i + 1)2^-4],
60	// fixed, absolute);
61	// print("{", coeff(P, 1), "ur,", coeff(P, 0), "ur},");
62	// };
63	static constexpr Type FIRST_APPROX[`12`][`2`] = {
64	{`0x1.e378p-1ur`, `0x1.0ebp-2ur`}, {`0x1.b512p-1ur`, `0x1.2b94p-2ur`},
65	{`0x1.91fp-1ur`, `0x1.45dcp-2ur`}, {`0x1.7622p-1ur`, `0x1.5e24p-2ur`},
66	{`0x1.5f5ap-1ur`, `0x1.74e4p-2ur`}, {`0x1.4c58p-1ur`, `0x1.8a4p-2ur`},
67	{`0x1.3c1ep-1ur`, `0x1.9e84p-2ur`}, {`0x1.2e0cp-1ur`, `0x1.b1d8p-2ur`},
68	{`0x1.21aap-1ur`, `0x1.c468p-2ur`}, {`0x1.16bap-1ur`, `0x1.d62cp-2ur`},
69	{`0x1.0cfp-1ur`, `0x1.e74cp-2ur`}, {`0x1.0418p-1ur`, `0x1.f7ep-2ur`},
70	};
71	};
72
73	template <> struct SqrtConfig<unsigned long fract> {
74	using Type = unsigned long fract;
75	static constexpr int EXTRA_STEPS = `2`;
76
77	// Linear approximation for the initial values, with errors bounded by:
78	// max(1.5 2^-11, eps)*
79	// Generated with Sollya:
80	// > for i from 4 to 15 do {
81	// P = fpminimax(sqrt(x), 1, [\|32, 32\|], [i 2^-4, (i + 1)2^-4],
82	// fixed, absolute);
83	// print("{", coeff(P, 1), "ulr,", coeff(P, 0), "ulr},");
84	// };
85	static constexpr Type FIRST_APPROX[`12`][`2`] = {
86	{`0x1.e3779b98p-1ulr`, `0x1.0eaff788p-2ulr`},
87	{`0x1.b5167872p-1ulr`, `0x1.2b908ad4p-2ulr`},
88	{`0x1.91f195cap-1ulr`, `0x1.45da800cp-2ulr`},
89	{`0x1.761ebcb4p-1ulr`, `0x1.5e27004cp-2ulr`},
90	{`0x1.5f619986p-1ulr`, `0x1.74db933cp-2ulr`},
91	{`0x1.4c583adep-1ulr`, `0x1.8a3fbfccp-2ulr`},
92	{`0x1.3c1a591cp-1ulr`, `0x1.9e88373cp-2ulr`},
93	{`0x1.2e08545ap-1ulr`, `0x1.b1dd2534p-2ulr`},
94	{`0x1.21b05c0ap-1ulr`, `0x1.c45e023p-2ulr`},
95	{`0x1.16becd02p-1ulr`, `0x1.d624031p-2ulr`},
96	{`0x1.0cf49fep-1ulr`, `0x1.e743b844p-2ulr`},
97	{`0x1.04214e9cp-1ulr`, `0x1.f7ce2c3cp-2ulr`},
98	};
99	};
100
101	template <>
102	struct SqrtConfig<unsigned short accum> : SqrtConfig<unsigned fract> {};
103
104	template <>
105	struct SqrtConfig<unsigned accum> : SqrtConfig<unsigned long fract> {};
106
107	// Integer square root
108	template <> struct SqrtConfig<unsigned short> {
109	using OutType = unsigned short accum;
110	using FracType = unsigned fract;
111	// For fast-but-less-accurate version
112	using FastFracType = unsigned short fract;
113	using HalfType = unsigned char;
114	};
115
116	template <> struct SqrtConfig<unsigned int> {
117	using OutType = unsigned accum;
118	using FracType = unsigned long fract;
119	// For fast-but-less-accurate version
120	using FastFracType = unsigned fract;
121	using HalfType = unsigned short;
122	};
123
124	// TODO: unsigned long accum type is 64-bit, and will need 64-bit fract type.
125	// Probably we will use DyadicFloat<64> for intermediate computations instead.
126
127	} // namespace internal
128
129	// Core computation for sqrt with normalized inputs (0.25 <= x < 1).
130	template <typename Config>
131	LIBC_INLINE constexpr typename Config::Type
132	sqrt_core(typename Config::Type x_frac) {
133	using FracType = typename Config::Type;
134	using FXRep = FXRep<FracType>;
135	using StorageType = typename FXRep::StorageType;
136	// Exact case:
137	if (x_frac == FXRep::ONE_FOURTH())
138	return FXRep::ONE_HALF();
139
140	// Use use Newton method to approximate sqrt(a):
141	// x_{n + 1} = 1/2 (x_n + a / x_n)
142	// For the initial values, we choose x_0
143
144	// Use the leading 4 bits to do look up for sqrt(x).
145	// After normalization, 0.25 <= x_frac < 1, so the leading 4 bits of x_frac
146	// are between 0b0100 and 0b1111. Hence the lookup table only needs 12
147	// entries, and we can get the index by subtracting the leading 4 bits of
148	// x_frac by 4 = 0b0100.
149	StorageType x_bit = cpp::bit_cast<StorageType>(x_frac);
150	int index = (static_cast<int>(x_bit >> (FXRep::TOTAL_LEN - `4`))) - `4`;
151	FracType a = Config::FIRST_APPROX[index][`0`];
152	FracType b = Config::FIRST_APPROX[index][`1`];
153
154	// Initial approximation step.
155	// Estimated error bounds: \| r - sqrt(x_frac) \| < max(1.5 2^-11, eps).*
156	FracType r = a * x_frac + b;
157
158	// Further Newton-method iterations for square-root:
159	// x_{n + 1} = 0.5 (x_n + a / x_n)*
160	// We distribute and do the multiplication by 0.5 first to avoid overflow.
161	// TODO: Investigate the performance and accuracy of using division-free
162	// iterations from:
163	// Blanchard, J. D. and Chamberland, M., "Newton's Method Without Division",
164	// The American Mathematical Monthly (2023).
165	// https://chamberland.math.grinnell.edu/papers/newton.pdf
166	for (int i = `0`; i < Config::EXTRA_STEPS; ++i)
167	r = (r >> `1`) + (x_frac >> `1`) / r;
168
169	return r;
170	}
171
172	template <typename T>
173	LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_fixed_point_v<T>, T> sqrt(T x) {
174	using BitType = typename FXRep<T>::StorageType;
175	BitType x_bit = cpp::bit_cast<BitType>(x);
176
177	if (LIBC_UNLIKELY(x_bit == `0`))
178	return FXRep<T>::ZERO();
179
180	int leading_zeros = cpp::countl_zero(x_bit);
181	constexpr int STORAGE_LENGTH = sizeof(BitType) * CHAR_BIT;
182	constexpr int EXP_ADJUSTMENT = STORAGE_LENGTH - FXRep<T>::FRACTION_LEN - `1`;
183	// x_exp is the real exponent of the leading bit of x.
184	int x_exp = EXP_ADJUSTMENT - leading_zeros;
185	int shift = EXP_ADJUSTMENT - `1` - (x_exp & (~`1`));
186	// Normalize.
187	x_bit <<= shift;
188	using FracType = typename internal::SqrtConfig<T>::Type;
189	FracType x_frac = cpp::bit_cast<FracType>(x_bit);
190
191	// Compute sqrt(x_frac) using Newton-method.
192	FracType r = sqrt_core<internal::SqrtConfig<T>>(x_frac);
193
194	// Re-scaling
195	r >>= EXP_ADJUSTMENT - (x_exp >> `1`);
196
197	// Return result.
198	return cpp::bit_cast<T>(r);
199	}
200
201	// Integer square root - Accurate version:
202	// Absolute errors < 2^(-fraction length).
203	template <typename T>
204	LIBC_INLINE constexpr typename internal::SqrtConfig<T>::OutType isqrt(T x) {
205	using OutType = typename internal::SqrtConfig<T>::OutType;
206	using FracType = typename internal::SqrtConfig<T>::FracType;
207
208	if (x == `0`)
209	return FXRep<OutType>::ZERO();
210
211	// Normalize the leading bits to the first two bits.
212	// Shift and then Bit cast x to x_frac gives us:
213	// x = 2^(FRACTION_LEN + 1 - shift) x_frac;*
214	int leading_zeros = cpp::countl_zero(x);
215	int shift = ((leading_zeros >> `1`) << `1`);
216	x <<= shift;
217	// Convert to frac type and compute square root.
218	FracType x_frac = cpp::bit_cast<FracType>(x);
219	FracType r = sqrt_core<internal::SqrtConfig<FracType>>(x_frac);
220	// To rescale back to the OutType (Accum)
221	r >>= (shift >> `1`);
222
223	return cpp::bit_cast<OutType>(r);
224	}
225
226	// Integer square root - Fast but less accurate version:
227	// Relative errors < 2^(-fraction length).
228	template <typename T>
229	LIBC_INLINE constexpr typename internal::SqrtConfig<T>::OutType
230	isqrt_fast(T x) {
231	using OutType = typename internal::SqrtConfig<T>::OutType;
232	using FracType = typename internal::SqrtConfig<T>::FastFracType;
233	using StorageType = typename FXRep<FracType>::StorageType;
234
235	if (x == `0`)
236	return FXRep<OutType>::ZERO();
237
238	// Normalize the leading bits to the first two bits.
239	// Shift and then Bit cast x to x_frac gives us:
240	// x = 2^(FRACTION_LEN + 1 - shift) x_frac;*
241	int leading_zeros = cpp::countl_zero(x);
242	int shift = (leading_zeros & (~`1`));
243	x <<= shift;
244	// Convert to frac type and compute square root.
245	FracType x_frac = cpp::bit_cast<FracType>(
246	static_cast<StorageType>(x >> FXRep<FracType>::FRACTION_LEN));
247	OutType r =
248	static_cast<OutType>(sqrt_core<internal::SqrtConfig<FracType>>(x_frac));
249	// To rescale back to the OutType (Accum)
250	r <<= (FXRep<OutType>::INTEGRAL_LEN - (shift >> `1`));
251	return cpp::bit_cast<OutType>(r);
252	}
253
254	} // namespace LIBC_NAMESPACE::fixed_point
255
256	#endif // LIBC_COMPILER_HAS_FIXED_POINT
257
258	#endif // LLVM_LIBC_SRC___SUPPORT_FIXEDPOINT_SQRT_H
259

Provided by KDAB

Update your C++ knowledge – Modern C++11/14/17 Training

Find out more

source code of libc/src/__support/fixed_point/sqrt.h