| 1 | /* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ |
| 2 | /* |
| 3 | * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. |
| 4 | */ |
| 5 | /* |
| 6 | * ==================================================== |
| 7 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
| 8 | * |
| 9 | * Developed at SunPro, a Sun Microsystems, Inc. business. |
| 10 | * Permission to use, copy, modify, and distribute this |
| 11 | * software is freely granted, provided that this notice |
| 12 | * is preserved. |
| 13 | * ==================================================== |
| 14 | */ |
| 15 | |
| 16 | /// The square root of `x` (f32). |
| 17 | #[cfg_attr (all(test, assert_no_panic), no_panic::no_panic)] |
| 18 | pub fn sqrtf(x: f32) -> f32 { |
| 19 | // On wasm32 we know that LLVM's intrinsic will compile to an optimized |
| 20 | // `f32.sqrt` native instruction, so we can leverage this for both code size |
| 21 | // and speed. |
| 22 | llvm_intrinsically_optimized! { |
| 23 | #[cfg(target_arch = "wasm32" )] { |
| 24 | return if x < 0.0 { |
| 25 | ::core::f32::NAN |
| 26 | } else { |
| 27 | unsafe { ::core::intrinsics::sqrtf32(x) } |
| 28 | } |
| 29 | } |
| 30 | } |
| 31 | #[cfg (all(target_feature = "sse" , not(feature = "force-soft-floats" )))] |
| 32 | { |
| 33 | // Note: This path is unlikely since LLVM will usually have already |
| 34 | // optimized sqrt calls into hardware instructions if sse is available, |
| 35 | // but if someone does end up here they'll appreciate the speed increase. |
| 36 | #[cfg (target_arch = "x86" )] |
| 37 | use core::arch::x86::*; |
| 38 | #[cfg (target_arch = "x86_64" )] |
| 39 | use core::arch::x86_64::*; |
| 40 | unsafe { |
| 41 | let m = _mm_set_ss(x); |
| 42 | let m_sqrt = _mm_sqrt_ss(m); |
| 43 | _mm_cvtss_f32(m_sqrt) |
| 44 | } |
| 45 | } |
| 46 | #[cfg (any(not(target_feature = "sse" ), feature = "force-soft-floats" ))] |
| 47 | { |
| 48 | const TINY: f32 = 1.0e-30; |
| 49 | |
| 50 | let mut z: f32; |
| 51 | let sign: i32 = 0x80000000u32 as i32; |
| 52 | let mut ix: i32; |
| 53 | let mut s: i32; |
| 54 | let mut q: i32; |
| 55 | let mut m: i32; |
| 56 | let mut t: i32; |
| 57 | let mut i: i32; |
| 58 | let mut r: u32; |
| 59 | |
| 60 | ix = x.to_bits() as i32; |
| 61 | |
| 62 | /* take care of Inf and NaN */ |
| 63 | if (ix as u32 & 0x7f800000) == 0x7f800000 { |
| 64 | return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ |
| 65 | } |
| 66 | |
| 67 | /* take care of zero */ |
| 68 | if ix <= 0 { |
| 69 | if (ix & !sign) == 0 { |
| 70 | return x; /* sqrt(+-0) = +-0 */ |
| 71 | } |
| 72 | if ix < 0 { |
| 73 | return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | /* normalize x */ |
| 78 | m = ix >> 23; |
| 79 | if m == 0 { |
| 80 | /* subnormal x */ |
| 81 | i = 0; |
| 82 | while ix & 0x00800000 == 0 { |
| 83 | ix <<= 1; |
| 84 | i = i + 1; |
| 85 | } |
| 86 | m -= i - 1; |
| 87 | } |
| 88 | m -= 127; /* unbias exponent */ |
| 89 | ix = (ix & 0x007fffff) | 0x00800000; |
| 90 | if m & 1 == 1 { |
| 91 | /* odd m, double x to make it even */ |
| 92 | ix += ix; |
| 93 | } |
| 94 | m >>= 1; /* m = [m/2] */ |
| 95 | |
| 96 | /* generate sqrt(x) bit by bit */ |
| 97 | ix += ix; |
| 98 | q = 0; |
| 99 | s = 0; |
| 100 | r = 0x01000000; /* r = moving bit from right to left */ |
| 101 | |
| 102 | while r != 0 { |
| 103 | t = s + r as i32; |
| 104 | if t <= ix { |
| 105 | s = t + r as i32; |
| 106 | ix -= t; |
| 107 | q += r as i32; |
| 108 | } |
| 109 | ix += ix; |
| 110 | r >>= 1; |
| 111 | } |
| 112 | |
| 113 | /* use floating add to find out rounding direction */ |
| 114 | if ix != 0 { |
| 115 | z = 1.0 - TINY; /* raise inexact flag */ |
| 116 | if z >= 1.0 { |
| 117 | z = 1.0 + TINY; |
| 118 | if z > 1.0 { |
| 119 | q += 2; |
| 120 | } else { |
| 121 | q += q & 1; |
| 122 | } |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | ix = (q >> 1) + 0x3f000000; |
| 127 | ix += m << 23; |
| 128 | f32::from_bits(ix as u32) |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 |
| 133 | #[cfg (not(target_arch = "powerpc64" ))] |
| 134 | #[cfg (test)] |
| 135 | mod tests { |
| 136 | use core::f32::*; |
| 137 | |
| 138 | use super::*; |
| 139 | |
| 140 | #[test ] |
| 141 | fn sanity_check() { |
| 142 | assert_eq!(sqrtf(100.0), 10.0); |
| 143 | assert_eq!(sqrtf(4.0), 2.0); |
| 144 | } |
| 145 | |
| 146 | /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt |
| 147 | #[test ] |
| 148 | fn spec_tests() { |
| 149 | // Not Asserted: FE_INVALID exception is raised if argument is negative. |
| 150 | assert!(sqrtf(-1.0).is_nan()); |
| 151 | assert!(sqrtf(NAN).is_nan()); |
| 152 | for f in [0.0, -0.0, INFINITY].iter().copied() { |
| 153 | assert_eq!(sqrtf(f), f); |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | #[test ] |
| 158 | fn conformance_tests() { |
| 159 | let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY]; |
| 160 | let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32]; |
| 161 | |
| 162 | for i in 0..values.len() { |
| 163 | let bits = f32::to_bits(sqrtf(values[i])); |
| 164 | assert_eq!(results[i], bits); |
| 165 | } |
| 166 | } |
| 167 | } |
| 168 | |