Warning: This file is not a C or C++ file. It does not have highlighting.
| 1 | //===-- Common header for fmod implementations ------------------*- C++ -*-===// |
|---|---|
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMOD_H |
| 10 | #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMOD_H |
| 11 | |
| 12 | #include "src/__support/CPP/bit.h" |
| 13 | #include "src/__support/CPP/limits.h" |
| 14 | #include "src/__support/CPP/type_traits.h" |
| 15 | #include "src/__support/FPUtil/FEnvImpl.h" |
| 16 | #include "src/__support/FPUtil/FPBits.h" |
| 17 | #include "src/__support/macros/config.h" |
| 18 | #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY |
| 19 | |
| 20 | namespace LIBC_NAMESPACE_DECL { |
| 21 | namespace fputil { |
| 22 | namespace generic { |
| 23 | |
| 24 | // Objective: |
| 25 | // The algorithm uses integer arithmetic (max uint64_t) for general case. |
| 26 | // Some common cases, like abs(x) < abs(y) or abs(x) < 1000 * abs(y) are |
| 27 | // treated specially to increase performance. The part of checking special |
| 28 | // cases, numbers NaN, INF etc. treated separately. |
| 29 | // |
| 30 | // Objective: |
| 31 | // 1) FMod definition (https://cplusplus.com/reference/cmath/fmod/): |
| 32 | // fmod = numer - tquot * denom, where tquot is the truncated |
| 33 | // (i.e., rounded towards zero) result of: numer/denom. |
| 34 | // 2) FMod with negative x and/or y can be trivially converted to fmod for |
| 35 | // positive x and y. Therefore the algorithm below works only with |
| 36 | // positive numbers. |
| 37 | // 3) All positive floating point numbers can be represented as m * 2^e, |
| 38 | // where "m" is positive integer and "e" is signed. |
| 39 | // 4) FMod function can be calculated in integer numbers (x > y): |
| 40 | // fmod = m_x * 2^e_x - tquot * m_y * 2^e_y |
| 41 | // = 2^e_y * (m_x * 2^(e_x - e^y) - tquot * m_y). |
| 42 | // All variables in parentheses are unsigned integers. |
| 43 | // |
| 44 | // Mathematical background: |
| 45 | // Input x,y in the algorithm is represented (mathematically) like m_x*2^e_x |
| 46 | // and m_y*2^e_y. This is an ambiguous number representation. For example: |
| 47 | // m * 2^e = (2 * m) * 2^(e-1) |
| 48 | // The algorithm uses the facts that |
| 49 | // r = a % b = (a % (N * b)) % b, |
| 50 | // (a * c) % (b * c) = (a % b) * c |
| 51 | // where N is positive integer number. a, b and c - positive. Let's adopt |
| 52 | // the formula for representation above. |
| 53 | // a = m_x * 2^e_x, b = m_y * 2^e_y, N = 2^k |
| 54 | // r(k) = a % b = (m_x * 2^e_x) % (2^k * m_y * 2^e_y) |
| 55 | // = 2^(e_y + k) * (m_x * 2^(e_x - e_y - k) % m_y) |
| 56 | // r(k) = m_r * 2^e_r = (m_x % m_y) * 2^(m_y + k) |
| 57 | // = (2^p * (m_x % m_y) * 2^(e_y + k - p)) |
| 58 | // m_r = 2^p * (m_x % m_y), e_r = m_y + k - p |
| 59 | // |
| 60 | // Algorithm description: |
| 61 | // First, let write x = m_x * 2^e_x and y = m_y * 2^e_y with m_x, m_y, e_x, e_y |
| 62 | // are integers (m_x amd m_y positive). |
| 63 | // Then the naive implementation of the fmod function with a simple |
| 64 | // for/while loop: |
| 65 | // while (e_x > e_y) { |
| 66 | // m_x *= 2; --e_x; // m_x * 2^e_x == 2 * m_x * 2^(e_x - 1) |
| 67 | // m_x %= m_y; |
| 68 | // } |
| 69 | // On the other hand, the algorithm exploits the fact that m_x, m_y are the |
| 70 | // mantissas of floating point numbers, which use less bits than the storage |
| 71 | // integers: 24 / 32 for floats and 53 / 64 for doubles, so if in each step of |
| 72 | // the iteration, we can left shift m_x as many bits as the storage integer |
| 73 | // type can hold, the exponent reduction per step will be at least 32 - 24 = 8 |
| 74 | // for floats and 64 - 53 = 11 for doubles (double example below): |
| 75 | // while (e_x > e_y) { |
| 76 | // m_x <<= 11; e_x -= 11; // m_x * 2^e_x == 2^11 * m_x * 2^(e_x - 11) |
| 77 | // m_x %= m_y; |
| 78 | // } |
| 79 | // Some extra improvements are done: |
| 80 | // 1) Shift m_y maximum to the right, which can significantly improve |
| 81 | // performance for small integer numbers (y = 3 for example). |
| 82 | // The m_x shift in the loop can be 62 instead of 11 for double. |
| 83 | // 2) For some architectures with very slow division, it can be better to |
| 84 | // calculate inverse value ones, and after do multiplication in the loop. |
| 85 | // 3) "likely" special cases are treated specially to improve performance. |
| 86 | // |
| 87 | // Simple example: |
| 88 | // The examples below use byte for simplicity. |
| 89 | // 1) Shift hy maximum to right without losing bits and increase iy value |
| 90 | // m_y = 0b00101100 e_y = 20 after shift m_y = 0b00001011 e_y = 22. |
| 91 | // 2) m_x = m_x % m_y. |
| 92 | // 3) Move m_x maximum to left. Note that after (m_x = m_x % m_y) CLZ in m_x |
| 93 | // is not lower than CLZ in m_y. m_x=0b00001001 e_x = 100, m_x=0b10010000, |
| 94 | // e_x = 100-4 = 96. |
| 95 | // 4) Repeat (2) until e_x == e_y. |
| 96 | // |
| 97 | // Complexity analysis (double): |
| 98 | // Converting x,y to (m_x,e_x),(m_y, e_y): CTZ/shift/AND/OR/if. Loop count: |
| 99 | // (m_x - m_y) / (64 - "length of m_y"). |
| 100 | // max("length of m_y") = 53, |
| 101 | // max(e_x - e_y) = 2048 |
| 102 | // Maximum operation is 186. For rare "unrealistic" cases. |
| 103 | // |
| 104 | // Special cases (double): |
| 105 | // Supposing that case where |y| > 1e-292 and |x/y|<2000 is very common |
| 106 | // special processing is implemented. No m_y alignment, no loop: |
| 107 | // result = (m_x * 2^(e_x - e_y)) % m_y. |
| 108 | // When x and y are both subnormal (rare case but...) the |
| 109 | // result = m_x % m_y. |
| 110 | // Simplified conversion back to double. |
| 111 | |
| 112 | // Exceptional cases handler according to cppreference.com |
| 113 | // https://en.cppreference.com/w/cpp/numeric/math/fmod |
| 114 | // and POSIX standard described in Linux man |
| 115 | // https://man7.org/linux/man-pages/man3/fmod.3p.html |
| 116 | // C standard for the function is not full, so not by default (although it can |
| 117 | // be implemented in another handler. |
| 118 | // Signaling NaN converted to quiet NaN with FE_INVALID exception. |
| 119 | // https://www.open-std.org/JTC1/SC22/WG14/www/docs/n1011.htm |
| 120 | template <typename T> struct FModDivisionSimpleHelper { |
| 121 | LIBC_INLINE constexpr static T execute(int exp_diff, int sides_zeroes_count, |
| 122 | T m_x, T m_y) { |
| 123 | while (exp_diff > sides_zeroes_count) { |
| 124 | exp_diff -= sides_zeroes_count; |
| 125 | m_x <<= sides_zeroes_count; |
| 126 | m_x %= m_y; |
| 127 | } |
| 128 | m_x <<= exp_diff; |
| 129 | m_x %= m_y; |
| 130 | return m_x; |
| 131 | } |
| 132 | }; |
| 133 | |
| 134 | template <typename T> struct FModDivisionInvMultHelper { |
| 135 | LIBC_INLINE constexpr static T execute(int exp_diff, int sides_zeroes_count, |
| 136 | T m_x, T m_y) { |
| 137 | constexpr int LENGTH = sizeof(T) * CHAR_BIT; |
| 138 | if (exp_diff > sides_zeroes_count) { |
| 139 | T inv_hy = (cpp::numeric_limits<T>::max() / m_y); |
| 140 | while (exp_diff > sides_zeroes_count) { |
| 141 | exp_diff -= sides_zeroes_count; |
| 142 | T hd = (m_x * inv_hy) >> (LENGTH - sides_zeroes_count); |
| 143 | m_x <<= sides_zeroes_count; |
| 144 | m_x -= hd * m_y; |
| 145 | while (LIBC_UNLIKELY(m_x > m_y)) |
| 146 | m_x -= m_y; |
| 147 | } |
| 148 | T hd = (m_x * inv_hy) >> (LENGTH - exp_diff); |
| 149 | m_x <<= exp_diff; |
| 150 | m_x -= hd * m_y; |
| 151 | while (LIBC_UNLIKELY(m_x > m_y)) |
| 152 | m_x -= m_y; |
| 153 | } else { |
| 154 | m_x <<= exp_diff; |
| 155 | m_x %= m_y; |
| 156 | } |
| 157 | return m_x; |
| 158 | } |
| 159 | }; |
| 160 | |
| 161 | template <typename T, typename U = typename FPBits<T>::StorageType, |
| 162 | typename DivisionHelper = FModDivisionSimpleHelper<U>> |
| 163 | class FMod { |
| 164 | static_assert(cpp::is_floating_point_v<T> && |
| 165 | is_unsigned_integral_or_big_int_v<U> && |
| 166 | (sizeof(U) * CHAR_BIT > FPBits<T>::FRACTION_LEN), |
| 167 | "FMod instantiated with invalid type."); |
| 168 | |
| 169 | private: |
| 170 | using FPB = FPBits<T>; |
| 171 | using StorageType = typename FPB::StorageType; |
| 172 | |
| 173 | LIBC_INLINE static bool pre_check(T x, T y, T &out) { |
| 174 | using FPB = fputil::FPBits<T>; |
| 175 | const T quiet_nan = FPB::quiet_nan().get_val(); |
| 176 | FPB sx(x), sy(y); |
| 177 | if (LIBC_LIKELY(!sy.is_zero() && !sy.is_inf_or_nan() && |
| 178 | !sx.is_inf_or_nan())) |
| 179 | return false; |
| 180 | |
| 181 | if (sx.is_nan() || sy.is_nan()) { |
| 182 | if (sx.is_signaling_nan() || sy.is_signaling_nan()) |
| 183 | fputil::raise_except_if_required(FE_INVALID); |
| 184 | out = quiet_nan; |
| 185 | return true; |
| 186 | } |
| 187 | |
| 188 | if (sx.is_inf() || sy.is_zero()) { |
| 189 | fputil::raise_except_if_required(FE_INVALID); |
| 190 | fputil::set_errno_if_required(EDOM); |
| 191 | out = quiet_nan; |
| 192 | return true; |
| 193 | } |
| 194 | |
| 195 | out = x; |
| 196 | return true; |
| 197 | } |
| 198 | |
| 199 | LIBC_INLINE static constexpr FPB eval_internal(FPB sx, FPB sy) { |
| 200 | |
| 201 | if (LIBC_LIKELY(sx.uintval() <= sy.uintval())) { |
| 202 | if (sx.uintval() < sy.uintval()) |
| 203 | return sx; // |x|<|y| return x |
| 204 | return FPB::zero(); // |x|=|y| return 0.0 |
| 205 | } |
| 206 | |
| 207 | int e_x = sx.get_biased_exponent(); |
| 208 | int e_y = sy.get_biased_exponent(); |
| 209 | |
| 210 | // Most common case where |y| is "very normal" and |x/y| < 2^EXP_LEN |
| 211 | if (LIBC_LIKELY(e_y > int(FPB::FRACTION_LEN) && |
| 212 | e_x - e_y <= int(FPB::EXP_LEN))) { |
| 213 | StorageType m_x = sx.get_explicit_mantissa(); |
| 214 | StorageType m_y = sy.get_explicit_mantissa(); |
| 215 | StorageType d = (e_x == e_y) |
| 216 | ? (m_x - m_y) |
| 217 | : static_cast<StorageType>(m_x << (e_x - e_y)) % m_y; |
| 218 | if (d == 0) |
| 219 | return FPB::zero(); |
| 220 | // iy - 1 because of "zero power" for number with power 1 |
| 221 | return FPB::make_value(d, e_y - 1); |
| 222 | } |
| 223 | // Both subnormal special case. |
| 224 | if (LIBC_UNLIKELY(e_x == 0 && e_y == 0)) { |
| 225 | FPB d; |
| 226 | d.set_mantissa(sx.uintval() % sy.uintval()); |
| 227 | return d; |
| 228 | } |
| 229 | |
| 230 | // Note that hx is not subnormal by conditions above. |
| 231 | U m_x = static_cast<U>(sx.get_explicit_mantissa()); |
| 232 | e_x--; |
| 233 | |
| 234 | U m_y = static_cast<U>(sy.get_explicit_mantissa()); |
| 235 | constexpr int DEFAULT_LEAD_ZEROS = |
| 236 | sizeof(U) * CHAR_BIT - FPB::FRACTION_LEN - 1; |
| 237 | int lead_zeros_m_y = DEFAULT_LEAD_ZEROS; |
| 238 | if (LIBC_LIKELY(e_y > 0)) { |
| 239 | e_y--; |
| 240 | } else { |
| 241 | m_y = static_cast<U>(sy.get_mantissa()); |
| 242 | lead_zeros_m_y = cpp::countl_zero(m_y); |
| 243 | } |
| 244 | |
| 245 | // Assume hy != 0 |
| 246 | int tail_zeros_m_y = cpp::countr_zero(m_y); |
| 247 | int sides_zeroes_count = lead_zeros_m_y + tail_zeros_m_y; |
| 248 | // n > 0 by conditions above |
| 249 | int exp_diff = e_x - e_y; |
| 250 | { |
| 251 | // Shift hy right until the end or n = 0 |
| 252 | int right_shift = exp_diff < tail_zeros_m_y ? exp_diff : tail_zeros_m_y; |
| 253 | m_y >>= right_shift; |
| 254 | exp_diff -= right_shift; |
| 255 | e_y += right_shift; |
| 256 | } |
| 257 | |
| 258 | { |
| 259 | // Shift hx left until the end or n = 0 |
| 260 | int left_shift = |
| 261 | exp_diff < DEFAULT_LEAD_ZEROS ? exp_diff : DEFAULT_LEAD_ZEROS; |
| 262 | m_x <<= left_shift; |
| 263 | exp_diff -= left_shift; |
| 264 | } |
| 265 | |
| 266 | m_x %= m_y; |
| 267 | if (LIBC_UNLIKELY(m_x == 0)) |
| 268 | return FPB::zero(); |
| 269 | |
| 270 | if (exp_diff == 0) |
| 271 | return FPB::make_value(static_cast<StorageType>(m_x), e_y); |
| 272 | |
| 273 | // hx next can't be 0, because hx < hy, hy % 2 == 1 hx * 2^i % hy != 0 |
| 274 | m_x = DivisionHelper::execute(exp_diff, sides_zeroes_count, m_x, m_y); |
| 275 | return FPB::make_value(static_cast<StorageType>(m_x), e_y); |
| 276 | } |
| 277 | |
| 278 | public: |
| 279 | LIBC_INLINE static T eval(T x, T y) { |
| 280 | if (T out; LIBC_UNLIKELY(pre_check(x, y, out))) |
| 281 | return out; |
| 282 | FPB sx(x), sy(y); |
| 283 | Sign sign = sx.sign(); |
| 284 | sx.set_sign(Sign::POS); |
| 285 | sy.set_sign(Sign::POS); |
| 286 | FPB result = eval_internal(sx, sy); |
| 287 | result.set_sign(sign); |
| 288 | return result.get_val(); |
| 289 | } |
| 290 | }; |
| 291 | |
| 292 | } // namespace generic |
| 293 | } // namespace fputil |
| 294 | } // namespace LIBC_NAMESPACE_DECL |
| 295 | |
| 296 | #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMOD_H |
| 297 |
Warning: This file is not a C or C++ file. It does not have highlighting.
