1 | //===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements __udivmodti4 for the compiler_rt library. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "int_lib.h" |
14 | |
15 | #ifdef CRT_HAS_128BIT |
16 | |
17 | // Returns the 128 bit division result by 64 bit. Result must fit in 64 bits. |
18 | // Remainder stored in r. |
19 | // Taken and adjusted from libdivide libdivide_128_div_64_to_64 division |
20 | // fallback. For a correctness proof see the reference for this algorithm |
21 | // in Knuth, Volume 2, section 4.3.1, Algorithm D. |
22 | UNUSED |
23 | static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v, |
24 | du_int *r) { |
25 | const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; |
26 | const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits) |
27 | du_int un1, un0; // Norm. dividend LSD's |
28 | du_int vn1, vn0; // Norm. divisor digits |
29 | du_int q1, q0; // Quotient digits |
30 | du_int un64, un21, un10; // Dividend digit pairs |
31 | du_int rhat; // A remainder |
32 | si_int s; // Shift amount for normalization |
33 | |
34 | s = __builtin_clzll(v); |
35 | if (s > 0) { |
36 | // Normalize the divisor. |
37 | v = v << s; |
38 | un64 = (u1 << s) | (u0 >> (n_udword_bits - s)); |
39 | un10 = u0 << s; // Shift dividend left |
40 | } else { |
41 | // Avoid undefined behavior of (u0 >> 64). |
42 | un64 = u1; |
43 | un10 = u0; |
44 | } |
45 | |
46 | // Break divisor up into two 32-bit digits. |
47 | vn1 = v >> (n_udword_bits / 2); |
48 | vn0 = v & 0xFFFFFFFF; |
49 | |
50 | // Break right half of dividend into two digits. |
51 | un1 = un10 >> (n_udword_bits / 2); |
52 | un0 = un10 & 0xFFFFFFFF; |
53 | |
54 | // Compute the first quotient digit, q1. |
55 | q1 = un64 / vn1; |
56 | rhat = un64 - q1 * vn1; |
57 | |
58 | // q1 has at most error 2. No more than 2 iterations. |
59 | while (q1 >= b || q1 * vn0 > b * rhat + un1) { |
60 | q1 = q1 - 1; |
61 | rhat = rhat + vn1; |
62 | if (rhat >= b) |
63 | break; |
64 | } |
65 | |
66 | un21 = un64 * b + un1 - q1 * v; |
67 | |
68 | // Compute the second quotient digit. |
69 | q0 = un21 / vn1; |
70 | rhat = un21 - q0 * vn1; |
71 | |
72 | // q0 has at most error 2. No more than 2 iterations. |
73 | while (q0 >= b || q0 * vn0 > b * rhat + un0) { |
74 | q0 = q0 - 1; |
75 | rhat = rhat + vn1; |
76 | if (rhat >= b) |
77 | break; |
78 | } |
79 | |
80 | *r = (un21 * b + un0 - q0 * v) >> s; |
81 | return q1 * b + q0; |
82 | } |
83 | |
84 | static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v, |
85 | du_int *r) { |
86 | #if defined(__x86_64__) |
87 | du_int result; |
88 | __asm__("divq %[v]" |
89 | : "=a" (result), "=d" (*r) |
90 | : [ v ] "r" (v), "a" (u0), "d" (u1)); |
91 | return result; |
92 | #else |
93 | return udiv128by64to64default(u1, u0, v, r); |
94 | #endif |
95 | } |
96 | |
97 | // Effects: if rem != 0, *rem = a % b |
98 | // Returns: a / b |
99 | |
100 | COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) { |
101 | const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; |
102 | utwords dividend; |
103 | dividend.all = a; |
104 | utwords divisor; |
105 | divisor.all = b; |
106 | utwords quotient; |
107 | utwords remainder; |
108 | if (divisor.all > dividend.all) { |
109 | if (rem) |
110 | *rem = dividend.all; |
111 | return 0; |
112 | } |
113 | // When the divisor fits in 64 bits, we can use an optimized path. |
114 | if (divisor.s.high == 0) { |
115 | remainder.s.high = 0; |
116 | if (dividend.s.high < divisor.s.low) { |
117 | // The result fits in 64 bits. |
118 | quotient.s.low = udiv128by64to64(u1: dividend.s.high, u0: dividend.s.low, |
119 | v: divisor.s.low, r: &remainder.s.low); |
120 | quotient.s.high = 0; |
121 | } else { |
122 | // First, divide with the high part to get the remainder in dividend.s.high. |
123 | // After that dividend.s.high < divisor.s.low. |
124 | quotient.s.high = dividend.s.high / divisor.s.low; |
125 | dividend.s.high = dividend.s.high % divisor.s.low; |
126 | quotient.s.low = udiv128by64to64(u1: dividend.s.high, u0: dividend.s.low, |
127 | v: divisor.s.low, r: &remainder.s.low); |
128 | } |
129 | if (rem) |
130 | *rem = remainder.all; |
131 | return quotient.all; |
132 | } |
133 | // 0 <= shift <= 63. |
134 | si_int shift = |
135 | __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high); |
136 | divisor.all <<= shift; |
137 | quotient.s.high = 0; |
138 | quotient.s.low = 0; |
139 | for (; shift >= 0; --shift) { |
140 | quotient.s.low <<= 1; |
141 | // Branch free version of. |
142 | // if (dividend.all >= divisor.all) |
143 | // { |
144 | // dividend.all -= divisor.all; |
145 | // carry = 1; |
146 | // } |
147 | const ti_int s = |
148 | (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1); |
149 | quotient.s.low |= s & 1; |
150 | dividend.all -= divisor.all & s; |
151 | divisor.all >>= 1; |
152 | } |
153 | if (rem) |
154 | *rem = dividend.all; |
155 | return quotient.all; |
156 | } |
157 | |
158 | #endif // CRT_HAS_128BIT |
159 | |