udivmodti4.c source code [compiler-rt/lib/builtins/udivmodti4.c]

1	//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements __udivmodti4 for the compiler_rt library.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "int_lib.h"
14
15	#ifdef CRT_HAS_128BIT
16
17	// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
18	// Remainder stored in r.
19	// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
20	// fallback. For a correctness proof see the reference for this algorithm
21	// in Knuth, Volume 2, section 4.3.1, Algorithm D.
22	UNUSED
23	static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
24	du_int *r) {
25	const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
26	const du_int b = (`1ULL` << (n_udword_bits / `2`)); // Number base (32 bits)
27	du_int un1, un0; // Norm. dividend LSD's
28	du_int vn1, vn0; // Norm. divisor digits
29	du_int q1, q0; // Quotient digits
30	du_int un64, un21, un10; // Dividend digit pairs
31	du_int rhat; // A remainder
32	si_int s; // Shift amount for normalization
33
34	s = __builtin_clzll(v);
35	if (s > `0`) {
36	// Normalize the divisor.
37	v = v << s;
38	un64 = (u1 << s) \| (u0 >> (n_udword_bits - s));
39	un10 = u0 << s; // Shift dividend left
40	} else {
41	// Avoid undefined behavior of (u0 >> 64).
42	un64 = u1;
43	un10 = u0;
44	}
45
46	// Break divisor up into two 32-bit digits.
47	vn1 = v >> (n_udword_bits / `2`);
48	vn0 = v & `0xFFFFFFFF`;
49
50	// Break right half of dividend into two digits.
51	un1 = un10 >> (n_udword_bits / `2`);
52	un0 = un10 & `0xFFFFFFFF`;
53
54	// Compute the first quotient digit, q1.
55	q1 = un64 / vn1;
56	rhat = un64 - q1 * vn1;
57
58	// q1 has at most error 2. No more than 2 iterations.
59	while (q1 >= b \|\| q1 * vn0 > b * rhat + un1) {
60	q1 = q1 - `1`;
61	rhat = rhat + vn1;
62	if (rhat >= b)
63	break;
64	}
65
66	un21 = un64 * b + un1 - q1 * v;
67
68	// Compute the second quotient digit.
69	q0 = un21 / vn1;
70	rhat = un21 - q0 * vn1;
71
72	// q0 has at most error 2. No more than 2 iterations.
73	while (q0 >= b \|\| q0 * vn0 > b * rhat + un0) {
74	q0 = q0 - `1`;
75	rhat = rhat + vn1;
76	if (rhat >= b)
77	break;
78	}
79
80	r = (un21 b + un0 - q0 * v) >> s;
81	return q1 * b + q0;
82	}
83
84	static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
85	du_int *r) {
86	#if defined(__x86_64__) && !defined(__arm64ec__)
87	du_int result;
88	__asm__("divq %[v]"
89	: "=a"(result), "=d"(*r)
90	: [ v ] "r"(v), "a"(u0), "d"(u1));
91	return result;
92	#else
93	return udiv128by64to64default(u1, u0, v, r);
94	#endif
95	}
96
97	// Effects: if rem != 0, rem = a % b*
98	// Returns: a / b
99
100	COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
101	const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
102	utwords dividend;
103	dividend.all = a;
104	utwords divisor;
105	divisor.all = b;
106	utwords quotient;
107	utwords remainder;
108	if (divisor.all > dividend.all) {
109	if (rem)
110	*rem = dividend.all;
111	return `0`;
112	}
113	// When the divisor fits in 64 bits, we can use an optimized path.
114	if (divisor.s.high == `0`) {
115	remainder.s.high = `0`;
116	if (dividend.s.high < divisor.s.low) {
117	// The result fits in 64 bits.
118	quotient.s.low = udiv128by64to64(u1: dividend.s.high, u0: dividend.s.low,
119	v: divisor.s.low, r: &remainder.s.low);
120	quotient.s.high = `0`;
121	} else {
122	// First, divide with the high part to get the remainder in dividend.s.high.
123	// After that dividend.s.high < divisor.s.low.
124	quotient.s.high = dividend.s.high / divisor.s.low;
125	dividend.s.high = dividend.s.high % divisor.s.low;
126	quotient.s.low = udiv128by64to64(u1: dividend.s.high, u0: dividend.s.low,
127	v: divisor.s.low, r: &remainder.s.low);
128	}
129	if (rem)
130	*rem = remainder.all;
131	return quotient.all;
132	}
133	// 0 <= shift <= 63.
134	si_int shift =
135	__builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
136	divisor.all <<= shift;
137	quotient.s.high = `0`;
138	quotient.s.low = `0`;
139	for (; shift >= `0`; --shift) {
140	quotient.s.low <<= `1`;
141	// Branch free version of.
142	// if (dividend.all >= divisor.all)
143	// {
144	// dividend.all -= divisor.all;
145	// carry = 1;
146	// }
147	const ti_int s =
148	(ti_int)(divisor.all - dividend.all - `1`) >> (n_utword_bits - `1`);
149	quotient.s.low \|= s & `1`;
150	dividend.all -= divisor.all & s;
151	divisor.all >>= `1`;
152	}
153	if (rem)
154	*rem = dividend.all;
155	return quotient.all;
156	}
157
158	#endif // CRT_HAS_128BIT
159

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of compiler-rt/lib/builtins/udivmodti4.c