| 1 | // [See license/rust-lang/libm] Copyright (c) 2018 Jorge Aparicio |
| 2 | #[cfg (not(all(any(target_arch = "x86" , target_arch = "x86_64" ), feature = "simd" )))] |
| 3 | pub fn sqrt(x: f32) -> f32 { |
| 4 | const TINY: f32 = 1.0e-30; |
| 5 | |
| 6 | let mut z: f32; |
| 7 | let sign: i32 = 0x80000000u32 as i32; |
| 8 | let mut ix: i32; |
| 9 | let mut s: i32; |
| 10 | let mut q: i32; |
| 11 | let mut m: i32; |
| 12 | let mut t: i32; |
| 13 | let mut i: i32; |
| 14 | let mut r: u32; |
| 15 | |
| 16 | ix = x.to_bits() as i32; |
| 17 | |
| 18 | /* take care of Inf and NaN */ |
| 19 | if (ix as u32 & 0x7f800000) == 0x7f800000 { |
| 20 | return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ |
| 21 | } |
| 22 | |
| 23 | /* take care of zero */ |
| 24 | if ix <= 0 { |
| 25 | if (ix & !sign) == 0 { |
| 26 | return x; /* sqrt(+-0) = +-0 */ |
| 27 | } |
| 28 | if ix < 0 { |
| 29 | #[allow (clippy::eq_op)] // This has special semantics and is not wrong. |
| 30 | return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | /* normalize x */ |
| 35 | m = ix >> 23; |
| 36 | if m == 0 { |
| 37 | /* subnormal x */ |
| 38 | i = 0; |
| 39 | while ix & 0x00800000 == 0 { |
| 40 | ix <<= 1; |
| 41 | i = i + 1; |
| 42 | } |
| 43 | m -= i - 1; |
| 44 | } |
| 45 | m -= 127; /* unbias exponent */ |
| 46 | ix = (ix & 0x007fffff) | 0x00800000; |
| 47 | if m & 1 == 1 { |
| 48 | /* odd m, double x to make it even */ |
| 49 | ix += ix; |
| 50 | } |
| 51 | m >>= 1; /* m = [m/2] */ |
| 52 | |
| 53 | /* generate sqrt(x) bit by bit */ |
| 54 | ix += ix; |
| 55 | q = 0; |
| 56 | s = 0; |
| 57 | r = 0x01000000; /* r = moving bit from right to left */ |
| 58 | |
| 59 | while r != 0 { |
| 60 | t = s + r as i32; |
| 61 | if t <= ix { |
| 62 | s = t + r as i32; |
| 63 | ix -= t; |
| 64 | q += r as i32; |
| 65 | } |
| 66 | ix += ix; |
| 67 | r >>= 1; |
| 68 | } |
| 69 | |
| 70 | /* use floating add to find out rounding direction */ |
| 71 | if ix != 0 { |
| 72 | z = 1.0 - TINY; /* raise inexact flag */ |
| 73 | if z >= 1.0 { |
| 74 | z = 1.0 + TINY; |
| 75 | if z > 1.0 { |
| 76 | q += 2; |
| 77 | } else { |
| 78 | q += q & 1; |
| 79 | } |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | ix = (q >> 1) + 0x3f000000; |
| 84 | ix += m << 23; |
| 85 | f32::from_bits(ix as u32) |
| 86 | } |
| 87 | |
| 88 | #[cfg (all(any(target_arch = "x86" , target_arch = "x86_64" ), feature = "simd" ))] |
| 89 | #[inline (always)] |
| 90 | pub fn sqrt(value: f32) -> f32 { |
| 91 | #[cfg (target_arch = "x86" )] |
| 92 | use core::arch::x86::*; |
| 93 | #[cfg (target_arch = "x86_64" )] |
| 94 | use core::arch::x86_64::*; |
| 95 | |
| 96 | unsafe { _mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(value))) } |
| 97 | } |
| 98 | |