double_double.h source code [libc/src/__support/FPUtil/double_double.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	//===-- Utilities for double-double data type. ------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H
10	#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H
11
12	#include "multiply_add.h"
13	#include "src/__support/common.h"
14	#include "src/__support/macros/config.h"
15	#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
16	#include "src/__support/number_pair.h"
17
18	namespace LIBC_NAMESPACE_DECL {
19	namespace fputil {
20
21	template <typename T> struct DefaultSplit;
22	template <> struct DefaultSplit<float> {
23	static constexpr size_t VALUE = 12;
24	};
25	template <> struct DefaultSplit<double> {
26	static constexpr size_t VALUE = 27;
27	};
28
29	using DoubleDouble = NumberPair<double>;
30	using FloatFloat = NumberPair<float>;
31
32	// The output of Dekker's FastTwoSum algorithm is correct, i.e.:
33	// r.hi + r.lo = a + b exactly
34	// and \|r.lo\| < eps(r.lo)
35	// Assumption: \|a\| >= \|b\|, or a = 0.
36	template <bool FAST2SUM = true, typename T = double>
37	LIBC_INLINE constexpr NumberPair<T> exact_add(T a, T b) {
38	NumberPair<T> r{0.0, 0.0};
39	if constexpr (FAST2SUM) {
40	r.hi = a + b;
41	T t = r.hi - a;
42	r.lo = b - t;
43	} else {
44	r.hi = a + b;
45	T t1 = r.hi - a;
46	T t2 = r.hi - t1;
47	T t3 = b - t1;
48	T t4 = a - t2;
49	r.lo = t3 + t4;
50	}
51	return r;
52	}
53
54	// Assumption: \|a.hi\| >= \|b.hi\|
55	template <typename T>
56	LIBC_INLINE constexpr NumberPair<T> add(const NumberPair<T> &a,
57	const NumberPair<T> &b) {
58	NumberPair<T> r = exact_add(a.hi, b.hi);
59	T lo = a.lo + b.lo;
60	return exact_add(r.hi, r.lo + lo);
61	}
62
63	// Assumption: \|a.hi\| >= \|b\|
64	template <typename T>
65	LIBC_INLINE constexpr NumberPair<T> add(const NumberPair<T> &a, T b) {
66	NumberPair<T> r = exact_add<false>(a.hi, b);
67	return exact_add(r.hi, r.lo + a.lo);
68	}
69
70	// Veltkamp's Splitting for double precision.
71	// Note: This is proved to be correct for all rounding modes:
72	// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
73	// Roundings," https://inria.hal.science/hal-04480440.
74	// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
75	template <typename T = double, size_t N = DefaultSplit<T>::VALUE>
76	LIBC_INLINE constexpr NumberPair<T> split(T a) {
77	NumberPair<T> r{0.0, 0.0};
78	// CN = 2^N.
79	constexpr T CN = static_cast<T>(1 << N);
80	constexpr T C = CN + 1.0;
81	double t1 = C * a;
82	double t2 = a - t1;
83	r.hi = t1 + t2;
84	r.lo = a - r.hi;
85	return r;
86	}
87
88	// Helper for non-fma exact mult where the first number is already split.
89	template <typename T = double, size_t SPLIT_B = DefaultSplit<T>::VALUE>
90	LIBC_INLINE NumberPair<T> exact_mult(const NumberPair<T> &as, T a, T b) {
91	NumberPair<T> bs = split<T, SPLIT_B>(b);
92	NumberPair<T> r{0.0, 0.0};
93
94	r.hi = a * b;
95	T t1 = as.hi * bs.hi - r.hi;
96	T t2 = as.hi * bs.lo + t1;
97	T t3 = as.lo * bs.hi + t2;
98	r.lo = as.lo * bs.lo + t3;
99
100	return r;
101	}
102
103	// The templated exact multiplication needs template version of
104	// LIBC_TARGET_CPU_HAS_FMA_* macro to correctly select the implementation.
105	// These can be moved to "src/__support/macros/properties/cpu_features.h" if
106	// other part of libc needed.
107	template <typename T> struct TargetHasFmaInstruction {
108	static constexpr bool VALUE = false;
109	};
110
111	#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
112	template <> struct TargetHasFmaInstruction<float> {
113	static constexpr bool VALUE = true;
114	};
115	#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
116
117	#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
118	template <> struct TargetHasFmaInstruction<double> {
119	static constexpr bool VALUE = true;
120	};
121	#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
122
123	// Note: When FMA instruction is not available, the `exact_mult` function is
124	// only correct for round-to-nearest mode. See:
125	// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
126	// Roundings," https://inria.hal.science/hal-04480440.
127	// Using Theorem 1 in the paper above, without FMA instruction, if we restrict
128	// the generated constants to precision <= 51, and splitting it by 2^28 + 1,
129	// then a * b = r.hi + r.lo is exact for all rounding modes.
130	template <typename T = double, size_t SPLIT_B = DefaultSplit<T>::VALUE>
131	LIBC_INLINE NumberPair<T> exact_mult(T a, T b) {
132	NumberPair<T> r{0.0, 0.0};
133
134	if constexpr (TargetHasFmaInstruction<T>::VALUE) {
135	r.hi = a * b;
136	r.lo = fputil::multiply_add(a, b, -r.hi);
137	} else {
138	// Dekker's Product.
139	NumberPair<T> as = split(a);
140
141	r = exact_mult<T, SPLIT_B>(as, a, b);
142	}
143
144	return r;
145	}
146
147	LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
148	DoubleDouble r = exact_mult(a, b.hi);
149	r.lo = multiply_add(a, b.lo, r.lo);
150	return r;
151	}
152
153	template <size_t SPLIT_B = 27>
154	LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
155	const DoubleDouble &b) {
156	DoubleDouble r = exact_mult<double, SPLIT_B>(a.hi, b.hi);
157	double t1 = multiply_add(a.hi, b.lo, r.lo);
158	double t2 = multiply_add(a.lo, b.hi, t1);
159	r.lo = t2;
160	return r;
161	}
162
163	// Assuming \|c\| >= \|a * b\|.
164	template <>
165	LIBC_INLINE DoubleDouble multiply_add<DoubleDouble>(const DoubleDouble &a,
166	const DoubleDouble &b,
167	const DoubleDouble &c) {
168	return add(c, quick_mult(a, b));
169	}
170
171	// Accurate double-double division, following Karp-Markstein's trick for
172	// division, implemented in the CORE-MATH project at:
173	// https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/tan/tan.c#L1855
174	//
175	// Error bounds:
176	// Let a = ah + al, b = bh + bl.
177	// Let r = rh + rl be the approximation of (ah + al) / (bh + bl).
178	// Then:
179	// (ah + al) / (bh + bl) - rh =
180	// = ((ah - bh * rh) + (al - bl * rh)) / (bh + bl)
181	// = (1 + O(bl/bh)) * ((ah - bh * rh) + (al - bl * rh)) / bh
182	// Let q = round(1/bh), then the above expressions are approximately:
183	// = (1 + O(bl / bh)) * (1 + O(2^-52)) * q * ((ah - bh * rh) + (al - bl * rh))
184	// So we can compute:
185	// rl = q * (ah - bh * rh) + q * (al - bl * rh)
186	// as accurate as possible, then the error is bounded by:
187	// \|(ah + al) / (bh + bl) - (rh + rl)\| < O(bl/bh) * (2^-52 + al/ah + bl/bh)
188	template <typename T>
189	LIBC_INLINE NumberPair<T> div(const NumberPair<T> &a, const NumberPair<T> &b) {
190	NumberPair<T> r;
191	T q = T(1) / b.hi;
192	r.hi = a.hi * q;
193
194	#ifdef LIBC_TARGET_CPU_HAS_FMA
195	T e_hi = fputil::multiply_add(b.hi, -r.hi, a.hi);
196	T e_lo = fputil::multiply_add(b.lo, -r.hi, a.lo);
197	#else
198	NumberPair<T> b_hi_r_hi = fputil::exact_mult(b.hi, -r.hi);
199	NumberPair<T> b_lo_r_hi = fputil::exact_mult(b.lo, -r.hi);
200	T e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
201	T e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
202	#endif // LIBC_TARGET_CPU_HAS_FMA
203
204	r.lo = q * (e_hi + e_lo);
205	return r;
206	}
207
208	} // namespace fputil
209	} // namespace LIBC_NAMESPACE_DECL
210
211	#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H
212

Warning: This file is not a C or C++ file. It does not have highlighting.

Provided by KDAB

Update your C++ knowledge – Modern C++11/14/17 Training

Find out more

source code of libc/src/__support/FPUtil/double_double.h