1 | //===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the __udivsi3 (32-bit unsigned integer divide) |
10 | // function for the ARM 32-bit architecture. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "../assembly.h" |
15 | |
16 | .syntax unified |
17 | .text |
18 | |
19 | DEFINE_CODE_STATE |
20 | |
21 | .p2align 2 |
22 | DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) |
23 | |
24 | @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) |
25 | @ Calculate and return the quotient of the (unsigned) division. |
26 | |
27 | DEFINE_COMPILERRT_FUNCTION(__udivsi3) |
28 | #if __ARM_ARCH_EXT_IDIV__ |
29 | tst r1, r1 |
30 | beq LOCAL_LABEL(divby0) |
31 | udiv r0, r0, r1 |
32 | bx lr |
33 | |
34 | LOCAL_LABEL(divby0): |
35 | // Use movs for compatibility with v8-m.base. |
36 | movs r0, #0 |
37 | # ifdef __ARM_EABI__ |
38 | b __aeabi_idiv0 |
39 | # else |
40 | JMP(lr) |
41 | # endif |
42 | |
43 | #else // ! __ARM_ARCH_EXT_IDIV__ |
44 | cmp r1, #1 |
45 | bcc LOCAL_LABEL(divby0) |
46 | #if defined(USE_THUMB_1) |
47 | bne LOCAL_LABEL(num_neq_denom) |
48 | JMP(lr) |
49 | LOCAL_LABEL(num_neq_denom): |
50 | #else |
51 | IT(eq) |
52 | JMPc(lr, eq) |
53 | #endif |
54 | cmp r0, r1 |
55 | #if defined(USE_THUMB_1) |
56 | bhs LOCAL_LABEL(num_ge_denom) |
57 | movs r0, #0 |
58 | JMP(lr) |
59 | LOCAL_LABEL(num_ge_denom): |
60 | #else |
61 | ITT(cc) |
62 | movcc r0, #0 |
63 | JMPc(lr, cc) |
64 | #endif |
65 | |
66 | // Implement division using binary long division algorithm. |
67 | // |
68 | // r0 is the numerator, r1 the denominator. |
69 | // |
70 | // The code before JMP computes the correct shift I, so that |
71 | // r0 and (r1 << I) have the highest bit set in the same position. |
72 | // At the time of JMP, ip := .Ldiv0block - 12 * I. |
73 | // This depends on the fixed instruction size of block. |
74 | // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. |
75 | // |
76 | // block(shift) implements the test-and-update-quotient core. |
77 | // It assumes (r0 << shift) can be computed without overflow and |
78 | // that (r0 << shift) < 2 * r1. The quotient is stored in r3. |
79 | |
80 | # if defined(__ARM_FEATURE_CLZ) |
81 | clz ip, r0 |
82 | clz r3, r1 |
83 | // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. |
84 | sub r3, r3, ip |
85 | # if defined(USE_THUMB_2) |
86 | adr ip, LOCAL_LABEL(div0block) + 1 |
87 | sub ip, ip, r3, lsl #1 |
88 | # else |
89 | adr ip, LOCAL_LABEL(div0block) |
90 | # endif |
91 | sub ip, ip, r3, lsl #2 |
92 | sub ip, ip, r3, lsl #3 |
93 | mov r3, #0 |
94 | bx ip |
95 | # else // No CLZ Feature |
96 | # if defined(USE_THUMB_2) |
97 | # error THUMB mode requires CLZ or UDIV |
98 | # endif |
99 | # if defined(USE_THUMB_1) |
100 | # define BLOCK_SIZE 10 |
101 | # else |
102 | # define BLOCK_SIZE 12 |
103 | # endif |
104 | |
105 | mov r2, r0 |
106 | # if defined(USE_THUMB_1) |
107 | mov ip, r0 |
108 | adr r0, LOCAL_LABEL(div0block) |
109 | adds r0, #1 |
110 | # else |
111 | adr ip, LOCAL_LABEL(div0block) |
112 | # endif |
113 | lsrs r3, r2, #16 |
114 | cmp r3, r1 |
115 | # if defined(USE_THUMB_1) |
116 | blo LOCAL_LABEL(skip_16) |
117 | movs r2, r3 |
118 | subs r0, r0, #(16 * BLOCK_SIZE) |
119 | LOCAL_LABEL(skip_16): |
120 | # else |
121 | movhs r2, r3 |
122 | subhs ip, ip, #(16 * BLOCK_SIZE) |
123 | # endif |
124 | |
125 | lsrs r3, r2, #8 |
126 | cmp r3, r1 |
127 | # if defined(USE_THUMB_1) |
128 | blo LOCAL_LABEL(skip_8) |
129 | movs r2, r3 |
130 | subs r0, r0, #(8 * BLOCK_SIZE) |
131 | LOCAL_LABEL(skip_8): |
132 | # else |
133 | movhs r2, r3 |
134 | subhs ip, ip, #(8 * BLOCK_SIZE) |
135 | # endif |
136 | |
137 | lsrs r3, r2, #4 |
138 | cmp r3, r1 |
139 | # if defined(USE_THUMB_1) |
140 | blo LOCAL_LABEL(skip_4) |
141 | movs r2, r3 |
142 | subs r0, r0, #(4 * BLOCK_SIZE) |
143 | LOCAL_LABEL(skip_4): |
144 | # else |
145 | movhs r2, r3 |
146 | subhs ip, #(4 * BLOCK_SIZE) |
147 | # endif |
148 | |
149 | lsrs r3, r2, #2 |
150 | cmp r3, r1 |
151 | # if defined(USE_THUMB_1) |
152 | blo LOCAL_LABEL(skip_2) |
153 | movs r2, r3 |
154 | subs r0, r0, #(2 * BLOCK_SIZE) |
155 | LOCAL_LABEL(skip_2): |
156 | # else |
157 | movhs r2, r3 |
158 | subhs ip, ip, #(2 * BLOCK_SIZE) |
159 | # endif |
160 | |
161 | // Last block, no need to update r2 or r3. |
162 | # if defined(USE_THUMB_1) |
163 | lsrs r3, r2, #1 |
164 | cmp r3, r1 |
165 | blo LOCAL_LABEL(skip_1) |
166 | subs r0, r0, #(1 * BLOCK_SIZE) |
167 | LOCAL_LABEL(skip_1): |
168 | movs r2, r0 |
169 | mov r0, ip |
170 | movs r3, #0 |
171 | JMP (r2) |
172 | |
173 | # else |
174 | cmp r1, r2, lsr #1 |
175 | subls ip, ip, #(1 * BLOCK_SIZE) |
176 | |
177 | movs r3, #0 |
178 | |
179 | JMP(ip) |
180 | # endif |
181 | # endif // __ARM_FEATURE_CLZ |
182 | |
183 | |
184 | #define IMM # |
185 | // due to the range limit of branch in Thumb1, we have to place the |
186 | // block closer |
187 | LOCAL_LABEL(divby0): |
188 | movs r0, #0 |
189 | # if defined(__ARM_EABI__) |
190 | push {r7, lr} |
191 | bl __aeabi_idiv0 // due to relocation limit, can't use b. |
192 | pop {r7, pc} |
193 | # else |
194 | JMP(lr) |
195 | # endif |
196 | |
197 | |
198 | #if defined(USE_THUMB_1) |
199 | #define block(shift) \ |
200 | lsls r2, r1, IMM shift; \ |
201 | cmp r0, r2; \ |
202 | blo LOCAL_LABEL(block_skip_##shift); \ |
203 | subs r0, r0, r2; \ |
204 | LOCAL_LABEL(block_skip_##shift) :; \ |
205 | adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. |
206 | |
207 | // TODO: if current location counter is not word aligned, we don't |
208 | // need the .p2align and nop |
209 | // Label div0block must be word-aligned. First align block 31 |
210 | .p2align 2 |
211 | nop // Padding to align div0block as 31 blocks = 310 bytes |
212 | |
213 | #else |
214 | #define block(shift) \ |
215 | cmp r0, r1, lsl IMM shift; \ |
216 | ITT(hs); \ |
217 | WIDE(addhs) r3, r3, IMM (1 << shift); \ |
218 | WIDE(subhs) r0, r0, r1, lsl IMM shift |
219 | #endif |
220 | |
221 | block(31) |
222 | block(30) |
223 | block(29) |
224 | block(28) |
225 | block(27) |
226 | block(26) |
227 | block(25) |
228 | block(24) |
229 | block(23) |
230 | block(22) |
231 | block(21) |
232 | block(20) |
233 | block(19) |
234 | block(18) |
235 | block(17) |
236 | block(16) |
237 | block(15) |
238 | block(14) |
239 | block(13) |
240 | block(12) |
241 | block(11) |
242 | block(10) |
243 | block(9) |
244 | block(8) |
245 | block(7) |
246 | block(6) |
247 | block(5) |
248 | block(4) |
249 | block(3) |
250 | block(2) |
251 | block(1) |
252 | LOCAL_LABEL(div0block): |
253 | block(0) |
254 | |
255 | mov r0, r3 |
256 | JMP(lr) |
257 | #endif // __ARM_ARCH_EXT_IDIV__ |
258 | |
259 | END_COMPILERRT_FUNCTION(__udivsi3) |
260 | |
261 | NO_EXEC_STACK_DIRECTIVE |
262 | |
263 | |