| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> |
| 4 | * |
| 5 | * Based on former do_div() implementation from asm-parisc/div64.h: |
| 6 | * Copyright (C) 1999 Hewlett-Packard Co |
| 7 | * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> |
| 8 | * |
| 9 | * |
| 10 | * Generic C version of 64bit/32bit division and modulo, with |
| 11 | * 64bit result and 32bit remainder. |
| 12 | * |
| 13 | * The fast case for (n>>32 == 0) is handled inline by do_div(). |
| 14 | * |
| 15 | * Code generated for this function might be very inefficient |
| 16 | * for some CPUs. __div64_32() can be overridden by linking arch-specific |
| 17 | * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S |
| 18 | * or by defining a preprocessor macro in arch/include/asm/div64.h. |
| 19 | */ |
| 20 | |
| 21 | #include <linux/bitops.h> |
| 22 | #include <linux/export.h> |
| 23 | #include <linux/math.h> |
| 24 | #include <linux/math64.h> |
| 25 | #include <linux/minmax.h> |
| 26 | #include <linux/log2.h> |
| 27 | |
| 28 | /* Not needed on 64bit architectures */ |
| 29 | #if BITS_PER_LONG == 32 |
| 30 | |
| 31 | #ifndef __div64_32 |
| 32 | uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base) |
| 33 | { |
| 34 | uint64_t rem = *n; |
| 35 | uint64_t b = base; |
| 36 | uint64_t res, d = 1; |
| 37 | uint32_t high = rem >> 32; |
| 38 | |
| 39 | /* Reduce the thing a bit first */ |
| 40 | res = 0; |
| 41 | if (high >= base) { |
| 42 | high /= base; |
| 43 | res = (uint64_t) high << 32; |
| 44 | rem -= (uint64_t) (high*base) << 32; |
| 45 | } |
| 46 | |
| 47 | while ((int64_t)b > 0 && b < rem) { |
| 48 | b = b+b; |
| 49 | d = d+d; |
| 50 | } |
| 51 | |
| 52 | do { |
| 53 | if (rem >= b) { |
| 54 | rem -= b; |
| 55 | res += d; |
| 56 | } |
| 57 | b >>= 1; |
| 58 | d >>= 1; |
| 59 | } while (d); |
| 60 | |
| 61 | *n = res; |
| 62 | return rem; |
| 63 | } |
| 64 | EXPORT_SYMBOL(__div64_32); |
| 65 | #endif |
| 66 | |
| 67 | #ifndef div_s64_rem |
| 68 | s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) |
| 69 | { |
| 70 | u64 quotient; |
| 71 | |
| 72 | if (dividend < 0) { |
| 73 | quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder); |
| 74 | *remainder = -*remainder; |
| 75 | if (divisor > 0) |
| 76 | quotient = -quotient; |
| 77 | } else { |
| 78 | quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder); |
| 79 | if (divisor < 0) |
| 80 | quotient = -quotient; |
| 81 | } |
| 82 | return quotient; |
| 83 | } |
| 84 | EXPORT_SYMBOL(div_s64_rem); |
| 85 | #endif |
| 86 | |
| 87 | /* |
| 88 | * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder |
| 89 | * @dividend: 64bit dividend |
| 90 | * @divisor: 64bit divisor |
| 91 | * @remainder: 64bit remainder |
| 92 | * |
| 93 | * This implementation is a comparable to algorithm used by div64_u64. |
| 94 | * But this operation, which includes math for calculating the remainder, |
| 95 | * is kept distinct to avoid slowing down the div64_u64 operation on 32bit |
| 96 | * systems. |
| 97 | */ |
| 98 | #ifndef div64_u64_rem |
| 99 | u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) |
| 100 | { |
| 101 | u32 high = divisor >> 32; |
| 102 | u64 quot; |
| 103 | |
| 104 | if (high == 0) { |
| 105 | u32 rem32; |
| 106 | quot = div_u64_rem(dividend, divisor, &rem32); |
| 107 | *remainder = rem32; |
| 108 | } else { |
| 109 | int n = fls(high); |
| 110 | quot = div_u64(dividend >> n, divisor >> n); |
| 111 | |
| 112 | if (quot != 0) |
| 113 | quot--; |
| 114 | |
| 115 | *remainder = dividend - quot * divisor; |
| 116 | if (*remainder >= divisor) { |
| 117 | quot++; |
| 118 | *remainder -= divisor; |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | return quot; |
| 123 | } |
| 124 | EXPORT_SYMBOL(div64_u64_rem); |
| 125 | #endif |
| 126 | |
| 127 | /* |
| 128 | * div64_u64 - unsigned 64bit divide with 64bit divisor |
| 129 | * @dividend: 64bit dividend |
| 130 | * @divisor: 64bit divisor |
| 131 | * |
| 132 | * This implementation is a modified version of the algorithm proposed |
| 133 | * by the book 'Hacker's Delight'. The original source and full proof |
| 134 | * can be found here and is available for use without restriction. |
| 135 | * |
| 136 | * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt' |
| 137 | */ |
| 138 | #ifndef div64_u64 |
| 139 | u64 div64_u64(u64 dividend, u64 divisor) |
| 140 | { |
| 141 | u32 high = divisor >> 32; |
| 142 | u64 quot; |
| 143 | |
| 144 | if (high == 0) { |
| 145 | quot = div_u64(dividend, divisor); |
| 146 | } else { |
| 147 | int n = fls(high); |
| 148 | quot = div_u64(dividend >> n, divisor >> n); |
| 149 | |
| 150 | if (quot != 0) |
| 151 | quot--; |
| 152 | if ((dividend - quot * divisor) >= divisor) |
| 153 | quot++; |
| 154 | } |
| 155 | |
| 156 | return quot; |
| 157 | } |
| 158 | EXPORT_SYMBOL(div64_u64); |
| 159 | #endif |
| 160 | |
| 161 | #ifndef div64_s64 |
| 162 | s64 div64_s64(s64 dividend, s64 divisor) |
| 163 | { |
| 164 | s64 quot, t; |
| 165 | |
| 166 | quot = div64_u64(abs(dividend), abs(divisor)); |
| 167 | t = (dividend ^ divisor) >> 63; |
| 168 | |
| 169 | return (quot ^ t) - t; |
| 170 | } |
| 171 | EXPORT_SYMBOL(div64_s64); |
| 172 | #endif |
| 173 | |
| 174 | #endif /* BITS_PER_LONG == 32 */ |
| 175 | |
| 176 | /* |
| 177 | * Iterative div/mod for use when dividend is not expected to be much |
| 178 | * bigger than divisor. |
| 179 | */ |
| 180 | #ifndef iter_div_u64_rem |
| 181 | u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) |
| 182 | { |
| 183 | return __iter_div_u64_rem(dividend, divisor, remainder); |
| 184 | } |
| 185 | EXPORT_SYMBOL(iter_div_u64_rem); |
| 186 | #endif |
| 187 | |
| 188 | #if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64) |
| 189 | |
| 190 | #define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c) |
| 191 | |
| 192 | #if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64) |
| 193 | static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) |
| 194 | { |
| 195 | /* native 64x64=128 bits multiplication */ |
| 196 | u128 prod = (u128)a * b + c; |
| 197 | |
| 198 | *p_lo = prod; |
| 199 | return prod >> 64; |
| 200 | } |
| 201 | #else |
| 202 | static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) |
| 203 | { |
| 204 | /* perform a 64x64=128 bits multiplication in 32bit chunks */ |
| 205 | u64 x, y, z; |
| 206 | |
| 207 | /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */ |
| 208 | x = mul_add(a, b, c); |
| 209 | y = mul_add(a, b >> 32, c >> 32); |
| 210 | y = add_u64_u32(y, x >> 32); |
| 211 | z = mul_add(a >> 32, b >> 32, y >> 32); |
| 212 | y = mul_add(a >> 32, b, y); |
| 213 | *p_lo = (y << 32) + (u32)x; |
| 214 | return add_u64_u32(z, y >> 32); |
| 215 | } |
| 216 | #endif |
| 217 | |
| 218 | #ifndef BITS_PER_ITER |
| 219 | #define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16) |
| 220 | #endif |
| 221 | |
| 222 | #if BITS_PER_ITER == 32 |
| 223 | #define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c) |
| 224 | #define add_u64_long(a, b) ((a) + (b)) |
| 225 | #else |
| 226 | #undef BITS_PER_ITER |
| 227 | #define BITS_PER_ITER 16 |
| 228 | static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c) |
| 229 | { |
| 230 | u64 n_lo = mul_add(a, b, c); |
| 231 | u64 n_med = mul_add(a >> 32, b, c >> 32); |
| 232 | |
| 233 | n_med = add_u64_u32(n_med, n_lo >> 32); |
| 234 | *p_lo = n_med << 32 | (u32)n_lo; |
| 235 | return n_med >> 32; |
| 236 | } |
| 237 | |
| 238 | #define add_u64_long(a, b) add_u64_u32(a, b) |
| 239 | #endif |
| 240 | |
| 241 | u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) |
| 242 | { |
| 243 | unsigned long d_msig, q_digit; |
| 244 | unsigned int reps, d_z_hi; |
| 245 | u64 quotient, n_lo, n_hi; |
| 246 | u32 overflow; |
| 247 | |
| 248 | n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c); |
| 249 | |
| 250 | if (!n_hi) |
| 251 | return div64_u64(n_lo, d); |
| 252 | |
| 253 | if (unlikely(n_hi >= d)) { |
| 254 | /* trigger runtime exception if divisor is zero */ |
| 255 | if (d == 0) { |
| 256 | unsigned long zero = 0; |
| 257 | |
| 258 | OPTIMIZER_HIDE_VAR(zero); |
| 259 | return ~0UL/zero; |
| 260 | } |
| 261 | /* overflow: result is unrepresentable in a u64 */ |
| 262 | return ~0ULL; |
| 263 | } |
| 264 | |
| 265 | /* Left align the divisor, shifting the dividend to match */ |
| 266 | d_z_hi = __builtin_clzll(d); |
| 267 | if (d_z_hi) { |
| 268 | d <<= d_z_hi; |
| 269 | n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi); |
| 270 | n_lo <<= d_z_hi; |
| 271 | } |
| 272 | |
| 273 | reps = 64 / BITS_PER_ITER; |
| 274 | /* Optimise loop count for small dividends */ |
| 275 | if (!(u32)(n_hi >> 32)) { |
| 276 | reps -= 32 / BITS_PER_ITER; |
| 277 | n_hi = n_hi << 32 | n_lo >> 32; |
| 278 | n_lo <<= 32; |
| 279 | } |
| 280 | #if BITS_PER_ITER == 16 |
| 281 | if (!(u32)(n_hi >> 48)) { |
| 282 | reps--; |
| 283 | n_hi = add_u64_u32(n_hi << 16, n_lo >> 48); |
| 284 | n_lo <<= 16; |
| 285 | } |
| 286 | #endif |
| 287 | |
| 288 | /* Invert the dividend so we can use add instead of subtract. */ |
| 289 | n_lo = ~n_lo; |
| 290 | n_hi = ~n_hi; |
| 291 | |
| 292 | /* |
| 293 | * Get the most significant BITS_PER_ITER bits of the divisor. |
| 294 | * This is used to get a low 'guestimate' of the quotient digit. |
| 295 | */ |
| 296 | d_msig = (d >> (64 - BITS_PER_ITER)) + 1; |
| 297 | |
| 298 | /* |
| 299 | * Now do a 'long division' with BITS_PER_ITER bit 'digits'. |
| 300 | * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits. |
| 301 | * The worst case is dividing ~0 by 0x8000 which requires two subtracts. |
| 302 | */ |
| 303 | quotient = 0; |
| 304 | while (reps--) { |
| 305 | q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig; |
| 306 | /* Shift 'n' left to align with the product q_digit * d */ |
| 307 | overflow = n_hi >> (64 - BITS_PER_ITER); |
| 308 | n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER)); |
| 309 | n_lo <<= BITS_PER_ITER; |
| 310 | /* Add product to negated divisor */ |
| 311 | overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi); |
| 312 | /* Adjust for the q_digit 'guestimate' being low */ |
| 313 | while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) { |
| 314 | q_digit++; |
| 315 | n_hi += d; |
| 316 | overflow += n_hi < d; |
| 317 | } |
| 318 | quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit); |
| 319 | } |
| 320 | |
| 321 | /* |
| 322 | * The above only ensures the remainder doesn't overflow, |
| 323 | * it can still be possible to add (aka subtract) another copy |
| 324 | * of the divisor. |
| 325 | */ |
| 326 | if ((n_hi + d) > n_hi) |
| 327 | quotient++; |
| 328 | return quotient; |
| 329 | } |
| 330 | #if !defined(test_mul_u64_add_u64_div_u64) |
| 331 | EXPORT_SYMBOL(mul_u64_add_u64_div_u64); |
| 332 | #endif |
| 333 | #endif |
| 334 | |