1 | // [See license/rust-lang/libm] Copyright (c) 2018 Jorge Aparicio |
2 | #[cfg (not(all(any(target_arch = "x86" , target_arch = "x86_64" ), feature = "simd" )))] |
3 | pub fn sqrt(x: f32) -> f32 { |
4 | const TINY: f32 = 1.0e-30; |
5 | |
6 | let mut z: f32; |
7 | let sign: i32 = 0x80000000u32 as i32; |
8 | let mut ix: i32; |
9 | let mut s: i32; |
10 | let mut q: i32; |
11 | let mut m: i32; |
12 | let mut t: i32; |
13 | let mut i: i32; |
14 | let mut r: u32; |
15 | |
16 | ix = x.to_bits() as i32; |
17 | |
18 | /* take care of Inf and NaN */ |
19 | if (ix as u32 & 0x7f800000) == 0x7f800000 { |
20 | return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ |
21 | } |
22 | |
23 | /* take care of zero */ |
24 | if ix <= 0 { |
25 | if (ix & !sign) == 0 { |
26 | return x; /* sqrt(+-0) = +-0 */ |
27 | } |
28 | if ix < 0 { |
29 | #[allow (clippy::eq_op)] // This has special semantics and is not wrong. |
30 | return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ |
31 | } |
32 | } |
33 | |
34 | /* normalize x */ |
35 | m = ix >> 23; |
36 | if m == 0 { |
37 | /* subnormal x */ |
38 | i = 0; |
39 | while ix & 0x00800000 == 0 { |
40 | ix <<= 1; |
41 | i = i + 1; |
42 | } |
43 | m -= i - 1; |
44 | } |
45 | m -= 127; /* unbias exponent */ |
46 | ix = (ix & 0x007fffff) | 0x00800000; |
47 | if m & 1 == 1 { |
48 | /* odd m, double x to make it even */ |
49 | ix += ix; |
50 | } |
51 | m >>= 1; /* m = [m/2] */ |
52 | |
53 | /* generate sqrt(x) bit by bit */ |
54 | ix += ix; |
55 | q = 0; |
56 | s = 0; |
57 | r = 0x01000000; /* r = moving bit from right to left */ |
58 | |
59 | while r != 0 { |
60 | t = s + r as i32; |
61 | if t <= ix { |
62 | s = t + r as i32; |
63 | ix -= t; |
64 | q += r as i32; |
65 | } |
66 | ix += ix; |
67 | r >>= 1; |
68 | } |
69 | |
70 | /* use floating add to find out rounding direction */ |
71 | if ix != 0 { |
72 | z = 1.0 - TINY; /* raise inexact flag */ |
73 | if z >= 1.0 { |
74 | z = 1.0 + TINY; |
75 | if z > 1.0 { |
76 | q += 2; |
77 | } else { |
78 | q += q & 1; |
79 | } |
80 | } |
81 | } |
82 | |
83 | ix = (q >> 1) + 0x3f000000; |
84 | ix += m << 23; |
85 | f32::from_bits(ix as u32) |
86 | } |
87 | |
88 | #[cfg (all(any(target_arch = "x86" , target_arch = "x86_64" ), feature = "simd" ))] |
89 | #[inline (always)] |
90 | pub fn sqrt(value: f32) -> f32 { |
91 | #[cfg (target_arch = "x86" )] |
92 | use core::arch::x86::*; |
93 | #[cfg (target_arch = "x86_64" )] |
94 | use core::arch::x86_64::*; |
95 | |
96 | unsafe { _mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(value))) } |
97 | } |
98 | |