1// Copyright 2018 the Kurbo Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Common mathematical operations
5
6#![allow(missing_docs)]
7
8use arrayvec::ArrayVec;
9
10/// Defines a trait that chooses between libstd or libm implementations of float methods.
11macro_rules! define_float_funcs {
12 ($(
13 fn $name:ident(self $(,$arg:ident: $arg_ty:ty)*) -> $ret:ty
14 => $lname:ident/$lfname:ident;
15 )+) => {
16 #[cfg(not(feature = "std"))]
17 pub(crate) trait FloatFuncs : Sized {
18 /// Special implementation for signum, because libm doesn't have it.
19 fn signum(self) -> Self;
20
21 $(fn $name(self $(,$arg: $arg_ty)*) -> $ret;)+
22 }
23
24 #[cfg(not(feature = "std"))]
25 impl FloatFuncs for f32 {
26 #[inline]
27 fn signum(self) -> f32 {
28 if self.is_nan() {
29 f32::NAN
30 } else {
31 1.0_f32.copysign(self)
32 }
33 }
34
35 $(fn $name(self $(,$arg: $arg_ty)*) -> $ret {
36 #[cfg(feature = "libm")]
37 return libm::$lfname(self $(,$arg as _)*);
38
39 #[cfg(not(feature = "libm"))]
40 compile_error!("kurbo requires either the `std` or `libm` feature")
41 })+
42 }
43
44 #[cfg(not(feature = "std"))]
45 impl FloatFuncs for f64 {
46 #[inline]
47 fn signum(self) -> f64 {
48 if self.is_nan() {
49 f64::NAN
50 } else {
51 1.0_f64.copysign(self)
52 }
53 }
54
55 $(fn $name(self $(,$arg: $arg_ty)*) -> $ret {
56 #[cfg(feature = "libm")]
57 return libm::$lname(self $(,$arg as _)*);
58
59 #[cfg(not(feature = "libm"))]
60 compile_error!("kurbo requires either the `std` or `libm` feature")
61 })+
62 }
63 }
64}
65
66define_float_funcs! {
67 fn abs(self) -> Self => fabs/fabsf;
68 fn acos(self) -> Self => acos/acosf;
69 fn atan2(self, other: Self) -> Self => atan2/atan2f;
70 fn cbrt(self) -> Self => cbrt/cbrtf;
71 fn ceil(self) -> Self => ceil/ceilf;
72 fn cos(self) -> Self => cos/cosf;
73 fn copysign(self, sign: Self) -> Self => copysign/copysignf;
74 fn floor(self) -> Self => floor/floorf;
75 fn hypot(self, other: Self) -> Self => hypot/hypotf;
76 fn ln(self) -> Self => log/logf;
77 fn log2(self) -> Self => log2/log2f;
78 fn mul_add(self, a: Self, b: Self) -> Self => fma/fmaf;
79 fn powi(self, n: i32) -> Self => pow/powf;
80 fn powf(self, n: Self) -> Self => pow/powf;
81 fn round(self) -> Self => round/roundf;
82 fn sin_cos(self) -> (Self, Self) => sincos/sincosf;
83 fn sqrt(self) -> Self => sqrt/sqrtf;
84 fn tan(self) -> Self => tan/tanf;
85 fn trunc(self) -> Self => trunc/truncf;
86}
87
88/// Adds convenience methods to `f32` and `f64`.
89pub trait FloatExt<T> {
90 /// Rounds to the nearest integer away from zero,
91 /// unless the provided value is already an integer.
92 ///
93 /// It is to `ceil` what `trunc` is to `floor`.
94 ///
95 /// # Examples
96 ///
97 /// ```
98 /// use kurbo::common::FloatExt;
99 ///
100 /// let f = 3.7_f64;
101 /// let g = 3.0_f64;
102 /// let h = -3.7_f64;
103 /// let i = -5.1_f32;
104 ///
105 /// assert_eq!(f.expand(), 4.0);
106 /// assert_eq!(g.expand(), 3.0);
107 /// assert_eq!(h.expand(), -4.0);
108 /// assert_eq!(i.expand(), -6.0);
109 /// ```
110 fn expand(&self) -> T;
111}
112
113impl FloatExt<f64> for f64 {
114 #[inline]
115 fn expand(&self) -> f64 {
116 self.abs().ceil().copysign(*self)
117 }
118}
119
120impl FloatExt<f32> for f32 {
121 #[inline]
122 fn expand(&self) -> f32 {
123 self.abs().ceil().copysign(*self)
124 }
125}
126
127/// Find real roots of cubic equation.
128///
129/// The implementation is not (yet) fully robust, but it does handle the case
130/// where `c3` is zero (in that case, solving the quadratic equation).
131///
132/// See: <https://momentsingraphics.de/CubicRoots.html>
133///
134/// That implementation is in turn based on Jim Blinn's "How to Solve a Cubic
135/// Equation", which is masterful.
136///
137/// Return values of x for which c0 + c1 x + c2 x² + c3 x³ = 0.
138pub fn solve_cubic(c0: f64, c1: f64, c2: f64, c3: f64) -> ArrayVec<f64, 3> {
139 let mut result = ArrayVec::new();
140 let c3_recip = c3.recip();
141 const ONETHIRD: f64 = 1. / 3.;
142 let scaled_c2 = c2 * (ONETHIRD * c3_recip);
143 let scaled_c1 = c1 * (ONETHIRD * c3_recip);
144 let scaled_c0 = c0 * c3_recip;
145 if !(scaled_c0.is_finite() && scaled_c1.is_finite() && scaled_c2.is_finite()) {
146 // cubic coefficient is zero or nearly so.
147 return solve_quadratic(c0, c1, c2).iter().copied().collect();
148 }
149 let (c0, c1, c2) = (scaled_c0, scaled_c1, scaled_c2);
150 // (d0, d1, d2) is called "Delta" in article
151 let d0 = (-c2).mul_add(c2, c1);
152 let d1 = (-c1).mul_add(c2, c0);
153 let d2 = c2 * c0 - c1 * c1;
154 // d is called "Discriminant"
155 let d = 4.0 * d0 * d2 - d1 * d1;
156 // de is called "Depressed.x", Depressed.y = d0
157 let de = (-2.0 * c2).mul_add(d0, d1);
158 // TODO: handle the cases where these intermediate results overflow.
159 if d < 0.0 {
160 let sq = (-0.25 * d).sqrt();
161 let r = -0.5 * de;
162 let t1 = (r + sq).cbrt() + (r - sq).cbrt();
163 result.push(t1 - c2);
164 } else if d == 0.0 {
165 let t1 = (-d0).sqrt().copysign(de);
166 result.push(t1 - c2);
167 result.push(-2.0 * t1 - c2);
168 } else {
169 let th = d.sqrt().atan2(-de) * ONETHIRD;
170 // (th_cos, th_sin) is called "CubicRoot"
171 let (th_sin, th_cos) = th.sin_cos();
172 // (r0, r1, r2) is called "Root"
173 let r0 = th_cos;
174 let ss3 = th_sin * 3.0f64.sqrt();
175 let r1 = 0.5 * (-th_cos + ss3);
176 let r2 = 0.5 * (-th_cos - ss3);
177 let t = 2.0 * (-d0).sqrt();
178 result.push(t.mul_add(r0, -c2));
179 result.push(t.mul_add(r1, -c2));
180 result.push(t.mul_add(r2, -c2));
181 }
182 result
183}
184
185/// Find real roots of quadratic equation.
186///
187/// Return values of x for which c0 + c1 x + c2 x² = 0.
188///
189/// This function tries to be quite numerically robust. If the equation
190/// is nearly linear, it will return the root ignoring the quadratic term;
191/// the other root might be out of representable range. In the degenerate
192/// case where all coefficients are zero, so that all values of x satisfy
193/// the equation, a single `0.0` is returned.
194pub fn solve_quadratic(c0: f64, c1: f64, c2: f64) -> ArrayVec<f64, 2> {
195 let mut result = ArrayVec::new();
196 let sc0 = c0 * c2.recip();
197 let sc1 = c1 * c2.recip();
198 if !sc0.is_finite() || !sc1.is_finite() {
199 // c2 is zero or very small, treat as linear eqn
200 let root = -c0 / c1;
201 if root.is_finite() {
202 result.push(root);
203 } else if c0 == 0.0 && c1 == 0.0 {
204 // Degenerate case
205 result.push(0.0);
206 }
207 return result;
208 }
209 let arg = sc1 * sc1 - 4. * sc0;
210 let root1 = if !arg.is_finite() {
211 // Likely, calculation of sc1 * sc1 overflowed. Find one root
212 // using sc1 x + x² = 0, other root as sc0 / root1.
213 -sc1
214 } else {
215 if arg < 0.0 {
216 return result;
217 } else if arg == 0.0 {
218 result.push(-0.5 * sc1);
219 return result;
220 }
221 // See https://math.stackexchange.com/questions/866331
222 -0.5 * (sc1 + arg.sqrt().copysign(sc1))
223 };
224 let root2 = sc0 / root1;
225 if root2.is_finite() {
226 // Sort just to be friendly and make results deterministic.
227 if root2 > root1 {
228 result.push(root1);
229 result.push(root2);
230 } else {
231 result.push(root2);
232 result.push(root1);
233 }
234 } else {
235 result.push(root1);
236 }
237 result
238}
239
240/// Compute epsilon relative to coefficient.
241///
242/// A helper function from the Orellana and De Michele paper.
243fn eps_rel(raw: f64, a: f64) -> f64 {
244 if a == 0.0 {
245 raw.abs()
246 } else {
247 ((raw - a) / a).abs()
248 }
249}
250
251/// Find real roots of a quartic equation.
252///
253/// This is a fairly literal implementation of the method described in:
254/// Algorithm 1010: Boosting Efficiency in Solving Quartic Equations with
255/// No Compromise in Accuracy, Orellana and De Michele, ACM
256/// Transactions on Mathematical Software, Vol. 46, No. 2, May 2020.
257pub fn solve_quartic(c0: f64, c1: f64, c2: f64, c3: f64, c4: f64) -> ArrayVec<f64, 4> {
258 if c4 == 0.0 {
259 return solve_cubic(c0, c1, c2, c3).iter().copied().collect();
260 }
261 if c0 == 0.0 {
262 // Note: appends 0 root at end, doesn't sort. We might want to do that.
263 return solve_cubic(c1, c2, c3, c4)
264 .iter()
265 .copied()
266 .chain(Some(0.0))
267 .collect();
268 }
269 let a = c3 / c4;
270 let b = c2 / c4;
271 let c = c1 / c4;
272 let d = c0 / c4;
273 if let Some(result) = solve_quartic_inner(a, b, c, d, false) {
274 return result;
275 }
276 // Do polynomial rescaling
277 const K_Q: f64 = 7.16e76;
278 for rescale in [false, true] {
279 if let Some(result) = solve_quartic_inner(
280 a / K_Q,
281 b / K_Q.powi(2),
282 c / K_Q.powi(3),
283 d / K_Q.powi(4),
284 rescale,
285 ) {
286 return result.iter().map(|x| x * K_Q).collect();
287 }
288 }
289 // Overflow happened, just return no roots.
290 //println!("overflow, no roots returned");
291 Default::default()
292}
293
294fn solve_quartic_inner(a: f64, b: f64, c: f64, d: f64, rescale: bool) -> Option<ArrayVec<f64, 4>> {
295 factor_quartic_inner(a, b, c, d, rescale).map(|quadratics: ArrayVec<(f64, f64), 2>| {
296 quadraticsimpl Iterator
297 .iter()
298 .flat_map(|(a: &f64, b: &f64)| solve_quadratic(*b, *a, c2:1.0))
299 .collect()
300 })
301}
302
303/// Factor a quartic into two quadratics.
304///
305/// Attempt to factor a quartic equation into two quadratic equations. Returns `None` either if there
306/// is overflow (in which case rescaling might succeed) or the factorization would result in
307/// complex coefficients.
308///
309/// Discussion question: distinguish the two cases in return value?
310pub fn factor_quartic_inner(
311 a: f64,
312 b: f64,
313 c: f64,
314 d: f64,
315 rescale: bool,
316) -> Option<ArrayVec<(f64, f64), 2>> {
317 let calc_eps_q = |a1, b1, a2, b2| {
318 let eps_a = eps_rel(a1 + a2, a);
319 let eps_b = eps_rel(b1 + a1 * a2 + b2, b);
320 let eps_c = eps_rel(b1 * a2 + a1 * b2, c);
321 eps_a + eps_b + eps_c
322 };
323 let calc_eps_t = |a1, b1, a2, b2| calc_eps_q(a1, b1, a2, b2) + eps_rel(b1 * b2, d);
324 let disc = 9. * a * a - 24. * b;
325 let s = if disc >= 0.0 {
326 -2. * b / (3. * a + disc.sqrt().copysign(a))
327 } else {
328 -0.25 * a
329 };
330 let a_prime = a + 4. * s;
331 let b_prime = b + 3. * s * (a + 2. * s);
332 let c_prime = c + s * (2. * b + s * (3. * a + 4. * s));
333 let d_prime = d + s * (c + s * (b + s * (a + s)));
334 let g_prime;
335 let h_prime;
336 const K_C: f64 = 3.49e102;
337 if rescale {
338 let a_prime_s = a_prime / K_C;
339 let b_prime_s = b_prime / K_C;
340 let c_prime_s = c_prime / K_C;
341 let d_prime_s = d_prime / K_C;
342 g_prime = a_prime_s * c_prime_s - (4. / K_C) * d_prime_s - (1. / 3.) * b_prime_s.powi(2);
343 h_prime = (a_prime_s * c_prime_s + (8. / K_C) * d_prime_s - (2. / 9.) * b_prime_s.powi(2))
344 * (1. / 3.)
345 * b_prime_s
346 - c_prime_s * (c_prime_s / K_C)
347 - a_prime_s.powi(2) * d_prime_s;
348 } else {
349 g_prime = a_prime * c_prime - 4. * d_prime - (1. / 3.) * b_prime.powi(2);
350 h_prime =
351 (a_prime * c_prime + 8. * d_prime - (2. / 9.) * b_prime.powi(2)) * (1. / 3.) * b_prime
352 - c_prime.powi(2)
353 - a_prime.powi(2) * d_prime;
354 }
355 if !(g_prime.is_finite() && h_prime.is_finite()) {
356 return None;
357 }
358 let phi = depressed_cubic_dominant(g_prime, h_prime);
359 let phi = if rescale { phi * K_C } else { phi };
360 let l_1 = a * 0.5;
361 let l_3 = (1. / 6.) * b + 0.5 * phi;
362 let delt_2 = c - a * l_3;
363 let d_2_cand_1 = (2. / 3.) * b - phi - l_1 * l_1;
364 let l_2_cand_1 = 0.5 * delt_2 / d_2_cand_1;
365 let l_2_cand_2 = 2. * (d - l_3 * l_3) / delt_2;
366 let d_2_cand_2 = 0.5 * delt_2 / l_2_cand_2;
367 let d_2_cand_3 = d_2_cand_1;
368 let l_2_cand_3 = l_2_cand_2;
369 let mut d_2_best = 0.0;
370 let mut l_2_best = 0.0;
371 let mut eps_l_best = 0.0;
372 for (i, (d_2, l_2)) in [
373 (d_2_cand_1, l_2_cand_1),
374 (d_2_cand_2, l_2_cand_2),
375 (d_2_cand_3, l_2_cand_3),
376 ]
377 .iter()
378 .enumerate()
379 {
380 let eps_0 = eps_rel(d_2 + l_1 * l_1 + 2. * l_3, b);
381 let eps_1 = eps_rel(2. * (d_2 * l_2 + l_1 * l_3), c);
382 let eps_2 = eps_rel(d_2 * l_2 * l_2 + l_3 * l_3, d);
383 let eps_l = eps_0 + eps_1 + eps_2;
384 if i == 0 || eps_l < eps_l_best {
385 d_2_best = *d_2;
386 l_2_best = *l_2;
387 eps_l_best = eps_l;
388 }
389 }
390 let d_2 = d_2_best;
391 let l_2 = l_2_best;
392 let mut alpha_1;
393 let mut beta_1;
394 let mut alpha_2;
395 let mut beta_2;
396 //println!("phi = {}, d_2 = {}", phi, d_2);
397 if d_2 < 0.0 {
398 let sq = (-d_2).sqrt();
399 alpha_1 = l_1 + sq;
400 beta_1 = l_3 + sq * l_2;
401 alpha_2 = l_1 - sq;
402 beta_2 = l_3 - sq * l_2;
403 if beta_2.abs() < beta_1.abs() {
404 beta_2 = d / beta_1;
405 } else if beta_2.abs() > beta_1.abs() {
406 beta_1 = d / beta_2;
407 }
408 let cands;
409 if alpha_1.abs() != alpha_2.abs() {
410 if alpha_1.abs() < alpha_2.abs() {
411 let a1_cand_1 = (c - beta_1 * alpha_2) / beta_2;
412 let a1_cand_2 = (b - beta_2 - beta_1) / alpha_2;
413 let a1_cand_3 = a - alpha_2;
414 // Note: cand 3 is first because it is infallible, simplifying logic
415 cands = [
416 (a1_cand_3, alpha_2),
417 (a1_cand_1, alpha_2),
418 (a1_cand_2, alpha_2),
419 ];
420 } else {
421 let a2_cand_1 = (c - alpha_1 * beta_2) / beta_1;
422 let a2_cand_2 = (b - beta_2 - beta_1) / alpha_1;
423 let a2_cand_3 = a - alpha_1;
424 cands = [
425 (alpha_1, a2_cand_3),
426 (alpha_1, a2_cand_1),
427 (alpha_1, a2_cand_2),
428 ];
429 }
430 let mut eps_q_best = 0.0;
431 for (i, (a1, a2)) in cands.iter().enumerate() {
432 if a1.is_finite() && a2.is_finite() {
433 let eps_q = calc_eps_q(*a1, beta_1, *a2, beta_2);
434 if i == 0 || eps_q < eps_q_best {
435 alpha_1 = *a1;
436 alpha_2 = *a2;
437 eps_q_best = eps_q;
438 }
439 }
440 }
441 }
442 } else if d_2 == 0.0 {
443 let d_3 = d - l_3 * l_3;
444 alpha_1 = l_1;
445 beta_1 = l_3 + (-d_3).sqrt();
446 alpha_2 = l_1;
447 beta_2 = l_3 - (-d_3).sqrt();
448 if beta_1.abs() > beta_2.abs() {
449 beta_2 = d / beta_1;
450 } else if beta_2.abs() > beta_1.abs() {
451 beta_1 = d / beta_2;
452 }
453 // TODO: handle case d_2 is very small?
454 } else {
455 // This case means no real roots; in the most general case we might want
456 // to factor into quadratic equations with complex coefficients.
457 return None;
458 }
459 // Newton-Raphson iteration on alpha/beta coeff's.
460 let mut eps_t = calc_eps_t(alpha_1, beta_1, alpha_2, beta_2);
461 for _ in 0..8 {
462 //println!("a1 {} b1 {} a2 {} b2 {}", alpha_1, beta_1, alpha_2, beta_2);
463 //println!("eps_t = {:e}", eps_t);
464 if eps_t == 0.0 {
465 break;
466 }
467 let f_0 = beta_1 * beta_2 - d;
468 let f_1 = beta_1 * alpha_2 + alpha_1 * beta_2 - c;
469 let f_2 = beta_1 + alpha_1 * alpha_2 + beta_2 - b;
470 let f_3 = alpha_1 + alpha_2 - a;
471 let c_1 = alpha_1 - alpha_2;
472 let det_j = beta_1 * beta_1 - beta_1 * (alpha_2 * c_1 + 2. * beta_2)
473 + beta_2 * (alpha_1 * c_1 + beta_2);
474 if det_j == 0.0 {
475 break;
476 }
477 let inv = det_j.recip();
478 let c_2 = beta_2 - beta_1;
479 let c_3 = beta_1 * alpha_2 - alpha_1 * beta_2;
480 let dz_0 = c_1 * f_0 + c_2 * f_1 + c_3 * f_2 - (beta_1 * c_2 + alpha_1 * c_3) * f_3;
481 let dz_1 = (alpha_1 * c_1 + c_2) * f_0
482 - beta_1 * c_1 * f_1
483 - beta_1 * c_2 * f_2
484 - beta_1 * c_3 * f_3;
485 let dz_2 = -c_1 * f_0 - c_2 * f_1 - c_3 * f_2 + (alpha_2 * c_3 + beta_2 * c_2) * f_3;
486 let dz_3 = -(alpha_2 * c_1 + c_2) * f_0
487 + beta_2 * c_1 * f_1
488 + beta_2 * c_2 * f_2
489 + beta_2 * c_3 * f_3;
490 let a1 = alpha_1 - inv * dz_0;
491 let b1 = beta_1 - inv * dz_1;
492 let a2 = alpha_2 - inv * dz_2;
493 let b2 = beta_2 - inv * dz_3;
494 let new_eps_t = calc_eps_t(a1, b1, a2, b2);
495 // We break if the new eps is equal, paper keeps going
496 if new_eps_t < eps_t {
497 alpha_1 = a1;
498 beta_1 = b1;
499 alpha_2 = a2;
500 beta_2 = b2;
501 eps_t = new_eps_t;
502 } else {
503 //println!("new_eps_t got worse: {:e}", new_eps_t);
504 break;
505 }
506 }
507 Some([(alpha_1, beta_1), (alpha_2, beta_2)].into())
508}
509
510/// Dominant root of depressed cubic x^3 + gx + h = 0.
511///
512/// Section 2.2 of Orellana and De Michele.
513// Note: some of the techniques in here might be useful to improve the
514// cubic solver, and vice versa.
515fn depressed_cubic_dominant(g: f64, h: f64) -> f64 {
516 let q = (-1. / 3.) * g;
517 let r = 0.5 * h;
518 let phi_0;
519 let k = if q.abs() < 1e102 && r.abs() < 1e154 {
520 None
521 } else if q.abs() < r.abs() {
522 Some(1. - q * (q / r).powi(2))
523 } else {
524 Some(q.signum() * ((r / q).powi(2) / q - 1.0))
525 };
526 if k.is_some() && r == 0.0 {
527 if g > 0.0 {
528 phi_0 = 0.0;
529 } else {
530 phi_0 = (-g).sqrt();
531 }
532 } else if k.map(|k| k < 0.0).unwrap_or_else(|| r * r < q.powi(3)) {
533 let t = if k.is_some() {
534 r / q / q.sqrt()
535 } else {
536 r / q.powi(3).sqrt()
537 };
538 phi_0 = -2. * q.sqrt() * (t.abs().acos() * (1. / 3.)).cos().copysign(t);
539 } else {
540 let a = if let Some(k) = k {
541 if q.abs() < r.abs() {
542 -r * (1. + k.sqrt())
543 } else {
544 -r - (q.abs().sqrt() * q * k.sqrt()).copysign(r)
545 }
546 } else {
547 -r - (r * r - q.powi(3)).sqrt().copysign(r)
548 }
549 .cbrt();
550 let b = if a == 0.0 { 0.0 } else { q / a };
551 phi_0 = a + b;
552 }
553 // Refine with Newton-Raphson iteration
554 let mut x = phi_0;
555 let mut f = (x * x + g) * x + h;
556 //println!("g = {:e}, h = {:e}, x = {:e}, f = {:e}", g, h, x, f);
557 const EPS_M: f64 = 2.22045e-16;
558 if f.abs() < EPS_M * x.powi(3).max(g * x).max(h) {
559 return x;
560 }
561 for _ in 0..8 {
562 let delt_f = 3. * x * x + g;
563 if delt_f == 0.0 {
564 break;
565 }
566 let new_x = x - f / delt_f;
567 let new_f = (new_x * new_x + g) * new_x + h;
568 //println!("delt_f = {:e}, new_f = {:e}", delt_f, new_f);
569 if new_f == 0.0 {
570 return new_x;
571 }
572 if new_f.abs() >= f.abs() {
573 break;
574 }
575 x = new_x;
576 f = new_f;
577 }
578 x
579}
580
581/// Solve an arbitrary function for a zero-crossing.
582///
583/// This uses the [ITP method], as described in the paper
584/// [An Enhancement of the Bisection Method Average Performance Preserving Minmax Optimality].
585///
586/// The values of `ya` and `yb` are given as arguments rather than
587/// computed from `f`, as the values may already be known, or they may
588/// be less expensive to compute as special cases.
589///
590/// It is assumed that `ya < 0.0` and `yb > 0.0`, otherwise unexpected
591/// results may occur.
592///
593/// The value of `epsilon` must be larger than 2^-63 times `b - a`,
594/// otherwise integer overflow may occur. The `a` and `b` parameters
595/// represent the lower and upper bounds of the bracket searched for a
596/// solution.
597///
598/// The ITP method has tuning parameters. This implementation hardwires
599/// k2 to 2, both because it avoids an expensive floating point
600/// exponentiation, and because this value has been tested to work well
601/// with curve fitting problems.
602///
603/// The `n0` parameter controls the relative impact of the bisection and
604/// secant components. When it is 0, the number of iterations is
605/// guaranteed to be no more than the number required by bisection (thus,
606/// this method is strictly superior to bisection). However, when the
607/// function is smooth, a value of 1 gives the secant method more of a
608/// chance to engage, so the average number of iterations is likely
609/// lower, though there can be one more iteration than bisection in the
610/// worst case.
611///
612/// The `k1` parameter is harder to characterize, and interested users
613/// are referred to the paper, as well as encouraged to do empirical
614/// testing. To match the paper, a value of `0.2 / (b - a)` is
615/// suggested, and this is confirmed to give good results.
616///
617/// When the function is monotonic, the returned result is guaranteed to
618/// be within `epsilon` of the zero crossing. For more detailed analysis,
619/// again see the paper.
620///
621/// [ITP method]: https://en.wikipedia.org/wiki/ITP_Method
622/// [An Enhancement of the Bisection Method Average Performance Preserving Minmax Optimality]: https://dl.acm.org/doi/10.1145/3423597
623#[allow(clippy::too_many_arguments)]
624pub fn solve_itp(
625 mut f: impl FnMut(f64) -> f64,
626 mut a: f64,
627 mut b: f64,
628 epsilon: f64,
629 n0: usize,
630 k1: f64,
631 mut ya: f64,
632 mut yb: f64,
633) -> f64 {
634 let n1_2 = (((b - a) / epsilon).log2().ceil() - 1.0).max(0.0) as usize;
635 let nmax = n0 + n1_2;
636 let mut scaled_epsilon = epsilon * (1u64 << nmax) as f64;
637 while b - a > 2.0 * epsilon {
638 let x1_2 = 0.5 * (a + b);
639 let r = scaled_epsilon - 0.5 * (b - a);
640 let xf = (yb * a - ya * b) / (yb - ya);
641 let sigma = x1_2 - xf;
642 // This has k2 = 2 hardwired for efficiency.
643 let delta = k1 * (b - a).powi(2);
644 let xt = if delta <= (x1_2 - xf).abs() {
645 xf + delta.copysign(sigma)
646 } else {
647 x1_2
648 };
649 let xitp = if (xt - x1_2).abs() <= r {
650 xt
651 } else {
652 x1_2 - r.copysign(sigma)
653 };
654 let yitp = f(xitp);
655 if yitp > 0.0 {
656 b = xitp;
657 yb = yitp;
658 } else if yitp < 0.0 {
659 a = xitp;
660 ya = yitp;
661 } else {
662 return xitp;
663 }
664 scaled_epsilon *= 0.5;
665 }
666 0.5 * (a + b)
667}
668
669/// A variant ITP solver that allows fallible functions.
670///
671/// Another difference: it returns the bracket that contains the root,
672/// which may be important if the function has a discontinuity.
673#[allow(clippy::too_many_arguments)]
674pub(crate) fn solve_itp_fallible<E>(
675 mut f: impl FnMut(f64) -> Result<f64, E>,
676 mut a: f64,
677 mut b: f64,
678 epsilon: f64,
679 n0: usize,
680 k1: f64,
681 mut ya: f64,
682 mut yb: f64,
683) -> Result<(f64, f64), E> {
684 let n1_2 = (((b - a) / epsilon).log2().ceil() - 1.0).max(0.0) as usize;
685 let nmax = n0 + n1_2;
686 let mut scaled_epsilon = epsilon * (1u64 << nmax) as f64;
687 while b - a > 2.0 * epsilon {
688 let x1_2 = 0.5 * (a + b);
689 let r = scaled_epsilon - 0.5 * (b - a);
690 let xf = (yb * a - ya * b) / (yb - ya);
691 let sigma = x1_2 - xf;
692 // This has k2 = 2 hardwired for efficiency.
693 let delta = k1 * (b - a).powi(2);
694 let xt = if delta <= (x1_2 - xf).abs() {
695 xf + delta.copysign(sigma)
696 } else {
697 x1_2
698 };
699 let xitp = if (xt - x1_2).abs() <= r {
700 xt
701 } else {
702 x1_2 - r.copysign(sigma)
703 };
704 let yitp = f(xitp)?;
705 if yitp > 0.0 {
706 b = xitp;
707 yb = yitp;
708 } else if yitp < 0.0 {
709 a = xitp;
710 ya = yitp;
711 } else {
712 return Ok((xitp, xitp));
713 }
714 scaled_epsilon *= 0.5;
715 }
716 Ok((a, b))
717}
718
719// Tables of Legendre-Gauss quadrature coefficients, adapted from:
720// <https://pomax.github.io/bezierinfo/legendre-gauss.html>
721
722pub const GAUSS_LEGENDRE_COEFFS_3: &[(f64, f64)] = &[
723 (0.8888888888888888, 0.0000000000000000),
724 (0.5555555555555556, -0.7745966692414834),
725 (0.5555555555555556, 0.7745966692414834),
726];
727
728pub const GAUSS_LEGENDRE_COEFFS_4: &[(f64, f64)] = &[
729 (0.6521451548625461, -0.3399810435848563),
730 (0.6521451548625461, 0.3399810435848563),
731 (0.3478548451374538, -0.8611363115940526),
732 (0.3478548451374538, 0.8611363115940526),
733];
734
735pub const GAUSS_LEGENDRE_COEFFS_5: &[(f64, f64)] = &[
736 (0.5688888888888889, 0.0000000000000000),
737 (0.4786286704993665, -0.5384693101056831),
738 (0.4786286704993665, 0.5384693101056831),
739 (0.2369268850561891, -0.9061798459386640),
740 (0.2369268850561891, 0.9061798459386640),
741];
742
743pub const GAUSS_LEGENDRE_COEFFS_6: &[(f64, f64)] = &[
744 (0.3607615730481386, 0.6612093864662645),
745 (0.3607615730481386, -0.6612093864662645),
746 (0.4679139345726910, -0.2386191860831969),
747 (0.4679139345726910, 0.2386191860831969),
748 (0.1713244923791704, -0.9324695142031521),
749 (0.1713244923791704, 0.9324695142031521),
750];
751
752pub const GAUSS_LEGENDRE_COEFFS_7: &[(f64, f64)] = &[
753 (0.4179591836734694, 0.0000000000000000),
754 (0.3818300505051189, 0.4058451513773972),
755 (0.3818300505051189, -0.4058451513773972),
756 (0.2797053914892766, -0.7415311855993945),
757 (0.2797053914892766, 0.7415311855993945),
758 (0.1294849661688697, -0.9491079123427585),
759 (0.1294849661688697, 0.9491079123427585),
760];
761
762pub const GAUSS_LEGENDRE_COEFFS_8: &[(f64, f64)] = &[
763 (0.3626837833783620, -0.1834346424956498),
764 (0.3626837833783620, 0.1834346424956498),
765 (0.3137066458778873, -0.5255324099163290),
766 (0.3137066458778873, 0.5255324099163290),
767 (0.2223810344533745, -0.7966664774136267),
768 (0.2223810344533745, 0.7966664774136267),
769 (0.1012285362903763, -0.9602898564975363),
770 (0.1012285362903763, 0.9602898564975363),
771];
772
773pub const GAUSS_LEGENDRE_COEFFS_8_HALF: &[(f64, f64)] = &[
774 (0.3626837833783620, 0.1834346424956498),
775 (0.3137066458778873, 0.5255324099163290),
776 (0.2223810344533745, 0.7966664774136267),
777 (0.1012285362903763, 0.9602898564975363),
778];
779
780pub const GAUSS_LEGENDRE_COEFFS_9: &[(f64, f64)] = &[
781 (0.3302393550012598, 0.0000000000000000),
782 (0.1806481606948574, -0.8360311073266358),
783 (0.1806481606948574, 0.8360311073266358),
784 (0.0812743883615744, -0.9681602395076261),
785 (0.0812743883615744, 0.9681602395076261),
786 (0.3123470770400029, -0.3242534234038089),
787 (0.3123470770400029, 0.3242534234038089),
788 (0.2606106964029354, -0.6133714327005904),
789 (0.2606106964029354, 0.6133714327005904),
790];
791
792pub const GAUSS_LEGENDRE_COEFFS_11: &[(f64, f64)] = &[
793 (0.2729250867779006, 0.0000000000000000),
794 (0.2628045445102467, -0.2695431559523450),
795 (0.2628045445102467, 0.2695431559523450),
796 (0.2331937645919905, -0.5190961292068118),
797 (0.2331937645919905, 0.5190961292068118),
798 (0.1862902109277343, -0.7301520055740494),
799 (0.1862902109277343, 0.7301520055740494),
800 (0.1255803694649046, -0.8870625997680953),
801 (0.1255803694649046, 0.8870625997680953),
802 (0.0556685671161737, -0.9782286581460570),
803 (0.0556685671161737, 0.9782286581460570),
804];
805
806pub const GAUSS_LEGENDRE_COEFFS_16: &[(f64, f64)] = &[
807 (0.1894506104550685, -0.0950125098376374),
808 (0.1894506104550685, 0.0950125098376374),
809 (0.1826034150449236, -0.2816035507792589),
810 (0.1826034150449236, 0.2816035507792589),
811 (0.1691565193950025, -0.4580167776572274),
812 (0.1691565193950025, 0.4580167776572274),
813 (0.1495959888165767, -0.6178762444026438),
814 (0.1495959888165767, 0.6178762444026438),
815 (0.1246289712555339, -0.7554044083550030),
816 (0.1246289712555339, 0.7554044083550030),
817 (0.0951585116824928, -0.8656312023878318),
818 (0.0951585116824928, 0.8656312023878318),
819 (0.0622535239386479, -0.9445750230732326),
820 (0.0622535239386479, 0.9445750230732326),
821 (0.0271524594117541, -0.9894009349916499),
822 (0.0271524594117541, 0.9894009349916499),
823];
824
825// Just the positive x_i values.
826pub const GAUSS_LEGENDRE_COEFFS_16_HALF: &[(f64, f64)] = &[
827 (0.1894506104550685, 0.0950125098376374),
828 (0.1826034150449236, 0.2816035507792589),
829 (0.1691565193950025, 0.4580167776572274),
830 (0.1495959888165767, 0.6178762444026438),
831 (0.1246289712555339, 0.7554044083550030),
832 (0.0951585116824928, 0.8656312023878318),
833 (0.0622535239386479, 0.9445750230732326),
834 (0.0271524594117541, 0.9894009349916499),
835];
836
837pub const GAUSS_LEGENDRE_COEFFS_24: &[(f64, f64)] = &[
838 (0.1279381953467522, -0.0640568928626056),
839 (0.1279381953467522, 0.0640568928626056),
840 (0.1258374563468283, -0.1911188674736163),
841 (0.1258374563468283, 0.1911188674736163),
842 (0.1216704729278034, -0.3150426796961634),
843 (0.1216704729278034, 0.3150426796961634),
844 (0.1155056680537256, -0.4337935076260451),
845 (0.1155056680537256, 0.4337935076260451),
846 (0.1074442701159656, -0.5454214713888396),
847 (0.1074442701159656, 0.5454214713888396),
848 (0.0976186521041139, -0.6480936519369755),
849 (0.0976186521041139, 0.6480936519369755),
850 (0.0861901615319533, -0.7401241915785544),
851 (0.0861901615319533, 0.7401241915785544),
852 (0.0733464814110803, -0.8200019859739029),
853 (0.0733464814110803, 0.8200019859739029),
854 (0.0592985849154368, -0.8864155270044011),
855 (0.0592985849154368, 0.8864155270044011),
856 (0.0442774388174198, -0.9382745520027328),
857 (0.0442774388174198, 0.9382745520027328),
858 (0.0285313886289337, -0.9747285559713095),
859 (0.0285313886289337, 0.9747285559713095),
860 (0.0123412297999872, -0.9951872199970213),
861 (0.0123412297999872, 0.9951872199970213),
862];
863
864pub const GAUSS_LEGENDRE_COEFFS_24_HALF: &[(f64, f64)] = &[
865 (0.1279381953467522, 0.0640568928626056),
866 (0.1258374563468283, 0.1911188674736163),
867 (0.1216704729278034, 0.3150426796961634),
868 (0.1155056680537256, 0.4337935076260451),
869 (0.1074442701159656, 0.5454214713888396),
870 (0.0976186521041139, 0.6480936519369755),
871 (0.0861901615319533, 0.7401241915785544),
872 (0.0733464814110803, 0.8200019859739029),
873 (0.0592985849154368, 0.8864155270044011),
874 (0.0442774388174198, 0.9382745520027328),
875 (0.0285313886289337, 0.9747285559713095),
876 (0.0123412297999872, 0.9951872199970213),
877];
878
879pub const GAUSS_LEGENDRE_COEFFS_32: &[(f64, f64)] = &[
880 (0.0965400885147278, -0.0483076656877383),
881 (0.0965400885147278, 0.0483076656877383),
882 (0.0956387200792749, -0.1444719615827965),
883 (0.0956387200792749, 0.1444719615827965),
884 (0.0938443990808046, -0.2392873622521371),
885 (0.0938443990808046, 0.2392873622521371),
886 (0.0911738786957639, -0.3318686022821277),
887 (0.0911738786957639, 0.3318686022821277),
888 (0.0876520930044038, -0.4213512761306353),
889 (0.0876520930044038, 0.4213512761306353),
890 (0.0833119242269467, -0.5068999089322294),
891 (0.0833119242269467, 0.5068999089322294),
892 (0.0781938957870703, -0.5877157572407623),
893 (0.0781938957870703, 0.5877157572407623),
894 (0.0723457941088485, -0.6630442669302152),
895 (0.0723457941088485, 0.6630442669302152),
896 (0.0658222227763618, -0.7321821187402897),
897 (0.0658222227763618, 0.7321821187402897),
898 (0.0586840934785355, -0.7944837959679424),
899 (0.0586840934785355, 0.7944837959679424),
900 (0.0509980592623762, -0.8493676137325700),
901 (0.0509980592623762, 0.8493676137325700),
902 (0.0428358980222267, -0.8963211557660521),
903 (0.0428358980222267, 0.8963211557660521),
904 (0.0342738629130214, -0.9349060759377397),
905 (0.0342738629130214, 0.9349060759377397),
906 (0.0253920653092621, -0.9647622555875064),
907 (0.0253920653092621, 0.9647622555875064),
908 (0.0162743947309057, -0.9856115115452684),
909 (0.0162743947309057, 0.9856115115452684),
910 (0.0070186100094701, -0.9972638618494816),
911 (0.0070186100094701, 0.9972638618494816),
912];
913
914pub const GAUSS_LEGENDRE_COEFFS_32_HALF: &[(f64, f64)] = &[
915 (0.0965400885147278, 0.0483076656877383),
916 (0.0956387200792749, 0.1444719615827965),
917 (0.0938443990808046, 0.2392873622521371),
918 (0.0911738786957639, 0.3318686022821277),
919 (0.0876520930044038, 0.4213512761306353),
920 (0.0833119242269467, 0.5068999089322294),
921 (0.0781938957870703, 0.5877157572407623),
922 (0.0723457941088485, 0.6630442669302152),
923 (0.0658222227763618, 0.7321821187402897),
924 (0.0586840934785355, 0.7944837959679424),
925 (0.0509980592623762, 0.8493676137325700),
926 (0.0428358980222267, 0.8963211557660521),
927 (0.0342738629130214, 0.9349060759377397),
928 (0.0253920653092621, 0.9647622555875064),
929 (0.0162743947309057, 0.9856115115452684),
930 (0.0070186100094701, 0.9972638618494816),
931];
932
933#[cfg(test)]
934mod tests {
935 use crate::common::*;
936 use arrayvec::ArrayVec;
937
938 fn verify<const N: usize>(mut roots: ArrayVec<f64, N>, expected: &[f64]) {
939 assert_eq!(expected.len(), roots.len());
940 let epsilon = 1e-12;
941 roots.sort_by(|a, b| a.partial_cmp(b).unwrap());
942 for i in 0..expected.len() {
943 assert!((roots[i] - expected[i]).abs() < epsilon);
944 }
945 }
946
947 #[test]
948 fn test_solve_cubic() {
949 verify(solve_cubic(-5.0, 0.0, 0.0, 1.0), &[5.0f64.cbrt()]);
950 verify(solve_cubic(-5.0, -1.0, 0.0, 1.0), &[1.90416085913492]);
951 verify(solve_cubic(0.0, -1.0, 0.0, 1.0), &[-1.0, 0.0, 1.0]);
952 verify(solve_cubic(-2.0, -3.0, 0.0, 1.0), &[-1.0, 2.0]);
953 verify(solve_cubic(2.0, -3.0, 0.0, 1.0), &[-2.0, 1.0]);
954 verify(
955 solve_cubic(2.0 - 1e-12, 5.0, 4.0, 1.0),
956 &[
957 -1.9999999999989995,
958 -1.0000010000848456,
959 -0.9999989999161546,
960 ],
961 );
962 verify(solve_cubic(2.0 + 1e-12, 5.0, 4.0, 1.0), &[-2.0]);
963 }
964
965 #[test]
966 fn test_solve_quadratic() {
967 verify(
968 solve_quadratic(-5.0, 0.0, 1.0),
969 &[-(5.0f64.sqrt()), 5.0f64.sqrt()],
970 );
971 verify(solve_quadratic(5.0, 0.0, 1.0), &[]);
972 verify(solve_quadratic(5.0, 1.0, 0.0), &[-5.0]);
973 verify(solve_quadratic(1.0, 2.0, 1.0), &[-1.0]);
974 }
975
976 #[test]
977 fn test_solve_quartic() {
978 // These test cases are taken from Orellana and De Michele paper (Table 1).
979 fn test_with_roots(coeffs: [f64; 4], roots: &[f64], rel_err: f64) {
980 // Note: in paper, coefficients are in decreasing order.
981 let mut actual = solve_quartic(coeffs[3], coeffs[2], coeffs[1], coeffs[0], 1.0);
982 actual.sort_by(f64::total_cmp);
983 assert_eq!(actual.len(), roots.len());
984 for (actual, expected) in actual.iter().zip(roots) {
985 assert!(
986 (actual - expected).abs() < rel_err * expected.abs(),
987 "actual {:e}, expected {:e}, err {:e}",
988 actual,
989 expected,
990 actual - expected
991 );
992 }
993 }
994
995 fn test_vieta_roots(x1: f64, x2: f64, x3: f64, x4: f64, roots: &[f64], rel_err: f64) {
996 let a = -(x1 + x2 + x3 + x4);
997 let b = x1 * (x2 + x3) + x2 * (x3 + x4) + x4 * (x1 + x3);
998 let c = -x1 * x2 * (x3 + x4) - x3 * x4 * (x1 + x2);
999 let d = x1 * x2 * x3 * x4;
1000 test_with_roots([a, b, c, d], roots, rel_err);
1001 }
1002
1003 fn test_vieta(x1: f64, x2: f64, x3: f64, x4: f64, rel_err: f64) {
1004 test_vieta_roots(x1, x2, x3, x4, &[x1, x2, x3, x4], rel_err);
1005 }
1006
1007 // case 1
1008 test_vieta(1., 1e3, 1e6, 1e9, 1e-16);
1009 // case 2
1010 test_vieta(2., 2.001, 2.002, 2.003, 1e-6);
1011 // case 3
1012 test_vieta(1e47, 1e49, 1e50, 1e53, 2e-16);
1013 // case 4
1014 test_vieta(-1., 1., 2., 1e14, 1e-16);
1015 // case 5
1016 test_vieta(-2e7, -1., 1., 1e7, 1e-16);
1017 // case 6
1018 test_with_roots(
1019 [-9000002.0, -9999981999998.0, 19999982e6, -2e13],
1020 &[-1e6, 1e7],
1021 1e-16,
1022 );
1023 // case 7
1024 test_with_roots(
1025 [2000011.0, 1010022000028.0, 11110056e6, 2828e10],
1026 &[-7., -4.],
1027 1e-16,
1028 );
1029 // case 8
1030 test_with_roots(
1031 [-100002011.0, 201101022001.0, -102200111000011.0, 11000011e8],
1032 &[11., 1e8],
1033 1e-16,
1034 );
1035 // cases 9-13 have no real roots
1036 // case 14
1037 test_vieta_roots(1000., 1000., 1000., 1000., &[1000., 1000.], 1e-16);
1038 // case 15
1039 test_vieta_roots(1e-15, 1000., 1000., 1000., &[1e-15, 1000., 1000.], 1e-15);
1040 // case 16 no real roots
1041 // case 17
1042 test_vieta(10000., 10001., 10010., 10100., 1e-6);
1043 // case 19
1044 test_vieta_roots(1., 1e30, 1e30, 1e44, &[1., 1e30, 1e44], 1e-16);
1045 // case 20
1046 // FAILS, error too big
1047 test_vieta(1., 1e7, 1e7, 1e14, 1e-7);
1048 // case 21 doesn't pick up double root
1049 // case 22
1050 test_vieta(1., 10., 1e152, 1e154, 3e-16);
1051 // case 23
1052 test_with_roots(
1053 [1., 1., 3. / 8., 1e-3],
1054 &[-0.497314148060048, -0.00268585193995149],
1055 2e-15,
1056 );
1057 // case 24
1058 const S: f64 = 1e30;
1059 test_with_roots(
1060 [-(1. + 1. / S), 1. / S - S * S, S * S + S, -S],
1061 &[-S, 1e-30, 1., S],
1062 2e-16,
1063 );
1064 }
1065
1066 #[test]
1067 fn test_solve_itp() {
1068 let f = |x: f64| x.powi(3) - x - 2.0;
1069 let x = solve_itp(f, 1., 2., 1e-12, 0, 0.2, f(1.), f(2.));
1070 assert!(f(x).abs() < 6e-12);
1071 }
1072
1073 #[test]
1074 fn test_inv_arclen() {
1075 use crate::{ParamCurve, ParamCurveArclen};
1076 let c = crate::CubicBez::new(
1077 (0.0, 0.0),
1078 (100.0 / 3.0, 0.0),
1079 (200.0 / 3.0, 100.0 / 3.0),
1080 (100.0, 100.0),
1081 );
1082 let target = 100.0;
1083 let _ = solve_itp(
1084 |t| c.subsegment(0.0..t).arclen(1e-9) - target,
1085 0.,
1086 1.,
1087 1e-6,
1088 1,
1089 0.2,
1090 -target,
1091 c.arclen(1e-9) - target,
1092 );
1093 }
1094}
1095