cbrt.rs source code [crates/libm/src/math/cbrt.rs]

1	/ origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c /
2	/*
3	* ====================================================
4	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5	*
6	* Developed at SunPro, a Sun Microsystems, Inc. business.
7	* Permission to use, copy, modify, and distribute this
8	* software is freely granted, provided that this notice
9	* is preserved.
10	* ====================================================
11	*
12	* Optimized by Bruce D. Evans.
13	*/
14	/ cbrt(x)*
15	* Return cube root of x
16	*/
17
18	use core::f64;
19
20	const B1: u32 = `715094163`; / B1 = (1023-1023/3-0.03306235651)220 /*
21	const B2: u32 = `696219795`; / B2 = (1023-1023/3-54/3-0.03306235651)220 /*
22
23	/ \|1/cbrt(x) - p(x)\| < 2*-23.5 (~[-7.93e-8, 7.929e-8]). /*
24	const P0: f64 = `1.87595182427177009643`; / 0x3ffe03e6, 0x0f61e692 /
25	const P1: f64 = `-1.88497979543377169875`; / 0xbffe28e0, 0x92f02420 /
26	const P2: f64 = `1.621429720105354466140`; / 0x3ff9f160, 0x4a49d6c2 /
27	const P3: f64 = `-0.758397934778766047437`; / 0xbfe844cb, 0xbee751d9 /
28	const P4: f64 = `0.145996192886612446982`; / 0x3fc2b000, 0xd4e4edd7 /
29
30	// Cube root (f64)
31	///
32	/// Computes the cube root of the argument.
33	#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
34	pub fn cbrt(x: f64) -> f64 {
35	let x1p54 = f64::from_bits(`0x4350000000000000`); // 0x1p54 === 2 ^ 54
36
37	let mut ui: u64 = x.to_bits();
38	let mut r: f64;
39	let s: f64;
40	let mut t: f64;
41	let w: f64;
42	let mut hx: u32 = (ui >> `32`) as u32 & `0x7fffffff`;
43
44	if hx >= `0x7ff00000` {
45	/ cbrt(NaN,INF) is itself /
46	return x + x;
47	}
48
49	/*
50	* Rough cbrt to 5 bits:
51	* cbrt(2*e(1+m) ~= 2*(e/3)(1+(e%3+m)/3)
52	* where e is integral and >= 0, m is real and in [0, 1), and "/" and
53	* "%" are integer division and modulus with rounding towards minus
54	* infinity. The RHS is always >= the LHS and has a maximum relative
55	* error of about 1 in 16. Adding a bias of -0.03306235651 to the
56	* (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
57	* floating point representation, for finite positive normal values,
58	* ordinary integer divison of the value in bits magically gives
59	* almost exactly the RHS of the above provided we first subtract the
60	* exponent bias (1023 for doubles) and later add it back. We do the
61	* subtraction virtually to keep e >= 0 so that ordinary integer
62	* division rounds towards minus infinity; this is also efficient.
63	*/
64	if hx < `0x00100000` {
65	/ zero or subnormal? /
66	ui = (x * x1p54).to_bits();
67	hx = (ui >> `32`) as u32 & `0x7fffffff`;
68	if hx == `0` {
69	return x; / cbrt(0) is itself /
70	}
71	hx = hx / `3` + B2;
72	} else {
73	hx = hx / `3` + B1;
74	}
75	ui &= `1` << `63`;
76	ui \|= (hx as u64) << `32`;
77	t = f64::from_bits(ui);
78
79	/*
80	* New cbrt to 23 bits:
81	* cbrt(x) = tcbrt(x/t3) ~= tP(t**3/x)
82	* where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
83	* to within 2**-23.5 when \|r - 1\| < 1/10. The rough approximation
84	* has produced t such than \|t/cbrt(x) - 1\| ~< 1/32, and cubing this
85	* gives us bounds for r = t**3/x.
86	*
87	* Try to optimize for parallel evaluation as in __tanf.c.
88	*/
89	r = (t * t) * (t / x);
90	t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
91
92	/*
93	* Round t away from zero to 23 bits (sloppily except for ensuring that
94	* the result is larger in magnitude than cbrt(x) but not much more than
95	* 2 23-bit ulps larger). With rounding towards zero, the error bound
96	* would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps
97	* in the rounded t, the infinite-precision error in the Newton
98	* approximation barely affects third digit in the final error
99	* 0.667; the error in the rounded t can be up to about 3 23-bit ulps
100	* before the final error is larger than 0.667 ulps.
101	*/
102	ui = t.to_bits();
103	ui = (ui + `0x80000000`) & `0xffffffffc0000000`;
104	t = f64::from_bits(ui);
105
106	/ one step Newton iteration to 53 bits with error < 0.667 ulps /
107	s = t * t; / tt is exact /*
108	r = x / s; / error <= 0.5 ulps; \|r\| < \|t\| /
109	w = t + t; / t+t is exact /
110	r = (r - t) / (w + r); / r-t is exact; w+r ~= 3t /*
111	t = t + t * r; / error <= 0.5 + 0.5/3 + epsilon /
112	t
113	}
114