1//===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the __udivsi3 (32-bit unsigned integer divide)
10// function for the ARM 32-bit architecture.
11//
12//===----------------------------------------------------------------------===//
13
14#include "../assembly.h"
15
16 .syntax unified
17 .text
18
19DEFINE_CODE_STATE
20
21 .p2align 2
22DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
23
24@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
25@ Calculate and return the quotient of the (unsigned) division.
26
27DEFINE_COMPILERRT_FUNCTION(__udivsi3)
28#if __ARM_ARCH_EXT_IDIV__
29 tst r1, r1
30 beq LOCAL_LABEL(divby0)
31 udiv r0, r0, r1
32 bx lr
33
34LOCAL_LABEL(divby0):
35 // Use movs for compatibility with v8-m.base.
36 movs r0, #0
37# ifdef __ARM_EABI__
38 b __aeabi_idiv0
39# else
40 JMP(lr)
41# endif
42
43#else // ! __ARM_ARCH_EXT_IDIV__
44 cmp r1, #1
45 bcc LOCAL_LABEL(divby0)
46#if defined(USE_THUMB_1)
47 bne LOCAL_LABEL(num_neq_denom)
48 JMP(lr)
49LOCAL_LABEL(num_neq_denom):
50#else
51 IT(eq)
52 JMPc(lr, eq)
53#endif
54 cmp r0, r1
55#if defined(USE_THUMB_1)
56 bhs LOCAL_LABEL(num_ge_denom)
57 movs r0, #0
58 JMP(lr)
59LOCAL_LABEL(num_ge_denom):
60#else
61 ITT(cc)
62 movcc r0, #0
63 JMPc(lr, cc)
64#endif
65
66 // Implement division using binary long division algorithm.
67 //
68 // r0 is the numerator, r1 the denominator.
69 //
70 // The code before JMP computes the correct shift I, so that
71 // r0 and (r1 << I) have the highest bit set in the same position.
72 // At the time of JMP, ip := .Ldiv0block - 12 * I.
73 // This depends on the fixed instruction size of block.
74 // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
75 //
76 // block(shift) implements the test-and-update-quotient core.
77 // It assumes (r0 << shift) can be computed without overflow and
78 // that (r0 << shift) < 2 * r1. The quotient is stored in r3.
79
80# if defined(__ARM_FEATURE_CLZ)
81 clz ip, r0
82 clz r3, r1
83 // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
84 sub r3, r3, ip
85# if defined(USE_THUMB_2)
86 adr ip, LOCAL_LABEL(div0block) + 1
87 sub ip, ip, r3, lsl #1
88# else
89 adr ip, LOCAL_LABEL(div0block)
90# endif
91 sub ip, ip, r3, lsl #2
92 sub ip, ip, r3, lsl #3
93 mov r3, #0
94 bx ip
95# else // No CLZ Feature
96# if defined(USE_THUMB_2)
97# error THUMB mode requires CLZ or UDIV
98# endif
99# if defined(USE_THUMB_1)
100# define BLOCK_SIZE 10
101# else
102# define BLOCK_SIZE 12
103# endif
104
105 mov r2, r0
106# if defined(USE_THUMB_1)
107 mov ip, r0
108 adr r0, LOCAL_LABEL(div0block)
109 adds r0, #1
110# else
111 adr ip, LOCAL_LABEL(div0block)
112# endif
113 lsrs r3, r2, #16
114 cmp r3, r1
115# if defined(USE_THUMB_1)
116 blo LOCAL_LABEL(skip_16)
117 movs r2, r3
118 subs r0, r0, #(16 * BLOCK_SIZE)
119LOCAL_LABEL(skip_16):
120# else
121 movhs r2, r3
122 subhs ip, ip, #(16 * BLOCK_SIZE)
123# endif
124
125 lsrs r3, r2, #8
126 cmp r3, r1
127# if defined(USE_THUMB_1)
128 blo LOCAL_LABEL(skip_8)
129 movs r2, r3
130 subs r0, r0, #(8 * BLOCK_SIZE)
131LOCAL_LABEL(skip_8):
132# else
133 movhs r2, r3
134 subhs ip, ip, #(8 * BLOCK_SIZE)
135# endif
136
137 lsrs r3, r2, #4
138 cmp r3, r1
139# if defined(USE_THUMB_1)
140 blo LOCAL_LABEL(skip_4)
141 movs r2, r3
142 subs r0, r0, #(4 * BLOCK_SIZE)
143LOCAL_LABEL(skip_4):
144# else
145 movhs r2, r3
146 subhs ip, #(4 * BLOCK_SIZE)
147# endif
148
149 lsrs r3, r2, #2
150 cmp r3, r1
151# if defined(USE_THUMB_1)
152 blo LOCAL_LABEL(skip_2)
153 movs r2, r3
154 subs r0, r0, #(2 * BLOCK_SIZE)
155LOCAL_LABEL(skip_2):
156# else
157 movhs r2, r3
158 subhs ip, ip, #(2 * BLOCK_SIZE)
159# endif
160
161 // Last block, no need to update r2 or r3.
162# if defined(USE_THUMB_1)
163 lsrs r3, r2, #1
164 cmp r3, r1
165 blo LOCAL_LABEL(skip_1)
166 subs r0, r0, #(1 * BLOCK_SIZE)
167LOCAL_LABEL(skip_1):
168 movs r2, r0
169 mov r0, ip
170 movs r3, #0
171 JMP (r2)
172
173# else
174 cmp r1, r2, lsr #1
175 subls ip, ip, #(1 * BLOCK_SIZE)
176
177 movs r3, #0
178
179 JMP(ip)
180# endif
181# endif // __ARM_FEATURE_CLZ
182
183
184#define IMM #
185 // due to the range limit of branch in Thumb1, we have to place the
186 // block closer
187LOCAL_LABEL(divby0):
188 movs r0, #0
189# if defined(__ARM_EABI__)
190 push {r7, lr}
191 bl __aeabi_idiv0 // due to relocation limit, can't use b.
192 pop {r7, pc}
193# else
194 JMP(lr)
195# endif
196
197
198#if defined(USE_THUMB_1)
199#define block(shift) \
200 lsls r2, r1, IMM shift; \
201 cmp r0, r2; \
202 blo LOCAL_LABEL(block_skip_##shift); \
203 subs r0, r0, r2; \
204 LOCAL_LABEL(block_skip_##shift) :; \
205 adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2.
206
207 // TODO: if current location counter is not word aligned, we don't
208 // need the .p2align and nop
209 // Label div0block must be word-aligned. First align block 31
210 .p2align 2
211 nop // Padding to align div0block as 31 blocks = 310 bytes
212
213#else
214#define block(shift) \
215 cmp r0, r1, lsl IMM shift; \
216 ITT(hs); \
217 WIDE(addhs) r3, r3, IMM (1 << shift); \
218 WIDE(subhs) r0, r0, r1, lsl IMM shift
219#endif
220
221 block(31)
222 block(30)
223 block(29)
224 block(28)
225 block(27)
226 block(26)
227 block(25)
228 block(24)
229 block(23)
230 block(22)
231 block(21)
232 block(20)
233 block(19)
234 block(18)
235 block(17)
236 block(16)
237 block(15)
238 block(14)
239 block(13)
240 block(12)
241 block(11)
242 block(10)
243 block(9)
244 block(8)
245 block(7)
246 block(6)
247 block(5)
248 block(4)
249 block(3)
250 block(2)
251 block(1)
252LOCAL_LABEL(div0block):
253 block(0)
254
255 mov r0, r3
256 JMP(lr)
257#endif // __ARM_ARCH_EXT_IDIV__
258
259END_COMPILERRT_FUNCTION(__udivsi3)
260
261NO_EXEC_STACK_DIRECTIVE
262
263

source code of compiler-rt/lib/builtins/arm/udivsi3.S