| 1 | //===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the __udivsi3 (32-bit unsigned integer divide) |
| 10 | // function for the ARM 32-bit architecture. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "../assembly.h" |
| 15 | |
| 16 | .syntax unified |
| 17 | .text |
| 18 | |
| 19 | DEFINE_CODE_STATE |
| 20 | |
| 21 | .p2align 2 |
| 22 | DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) |
| 23 | |
| 24 | @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) |
| 25 | @ Calculate and return the quotient of the (unsigned) division. |
| 26 | |
| 27 | DEFINE_COMPILERRT_FUNCTION(__udivsi3) |
| 28 | #if __ARM_ARCH_EXT_IDIV__ |
| 29 | tst r1, r1 |
| 30 | beq LOCAL_LABEL(divby0) |
| 31 | udiv r0, r0, r1 |
| 32 | bx lr |
| 33 | |
| 34 | LOCAL_LABEL(divby0): |
| 35 | // Use movs for compatibility with v8-m.base. |
| 36 | movs r0, #0 |
| 37 | # ifdef __ARM_EABI__ |
| 38 | b __aeabi_idiv0 |
| 39 | # else |
| 40 | JMP(lr) |
| 41 | # endif |
| 42 | |
| 43 | #else // ! __ARM_ARCH_EXT_IDIV__ |
| 44 | cmp r1, #1 |
| 45 | bcc LOCAL_LABEL(divby0) |
| 46 | #if defined(USE_THUMB_1) |
| 47 | bne LOCAL_LABEL(num_neq_denom) |
| 48 | JMP(lr) |
| 49 | LOCAL_LABEL(num_neq_denom): |
| 50 | #else |
| 51 | IT(eq) |
| 52 | JMPc(lr, eq) |
| 53 | #endif |
| 54 | cmp r0, r1 |
| 55 | #if defined(USE_THUMB_1) |
| 56 | bhs LOCAL_LABEL(num_ge_denom) |
| 57 | movs r0, #0 |
| 58 | JMP(lr) |
| 59 | LOCAL_LABEL(num_ge_denom): |
| 60 | #else |
| 61 | ITT(cc) |
| 62 | movcc r0, #0 |
| 63 | JMPc(lr, cc) |
| 64 | #endif |
| 65 | |
| 66 | // Implement division using binary long division algorithm. |
| 67 | // |
| 68 | // r0 is the numerator, r1 the denominator. |
| 69 | // |
| 70 | // The code before JMP computes the correct shift I, so that |
| 71 | // r0 and (r1 << I) have the highest bit set in the same position. |
| 72 | // At the time of JMP, ip := .Ldiv0block - 12 * I. |
| 73 | // This depends on the fixed instruction size of block. |
| 74 | // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. |
| 75 | // |
| 76 | // block(shift) implements the test-and-update-quotient core. |
| 77 | // It assumes (r0 << shift) can be computed without overflow and |
| 78 | // that (r0 << shift) < 2 * r1. The quotient is stored in r3. |
| 79 | |
| 80 | # if defined(__ARM_FEATURE_CLZ) |
| 81 | clz ip, r0 |
| 82 | clz r3, r1 |
| 83 | // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. |
| 84 | sub r3, r3, ip |
| 85 | # if defined(USE_THUMB_2) |
| 86 | adr ip, LOCAL_LABEL(div0block) + 1 |
| 87 | sub ip, ip, r3, lsl #1 |
| 88 | # else |
| 89 | adr ip, LOCAL_LABEL(div0block) |
| 90 | # endif |
| 91 | sub ip, ip, r3, lsl #2 |
| 92 | sub ip, ip, r3, lsl #3 |
| 93 | mov r3, #0 |
| 94 | bx ip |
| 95 | # else // No CLZ Feature |
| 96 | # if defined(USE_THUMB_2) |
| 97 | # error THUMB mode requires CLZ or UDIV |
| 98 | # endif |
| 99 | # if defined(USE_THUMB_1) |
| 100 | # define BLOCK_SIZE 10 |
| 101 | # else |
| 102 | # define BLOCK_SIZE 12 |
| 103 | # endif |
| 104 | |
| 105 | mov r2, r0 |
| 106 | # if defined(USE_THUMB_1) |
| 107 | mov ip, r0 |
| 108 | adr r0, LOCAL_LABEL(div0block) |
| 109 | adds r0, #1 |
| 110 | # else |
| 111 | adr ip, LOCAL_LABEL(div0block) |
| 112 | # endif |
| 113 | lsrs r3, r2, #16 |
| 114 | cmp r3, r1 |
| 115 | # if defined(USE_THUMB_1) |
| 116 | blo LOCAL_LABEL(skip_16) |
| 117 | movs r2, r3 |
| 118 | subs r0, r0, #(16 * BLOCK_SIZE) |
| 119 | LOCAL_LABEL(skip_16): |
| 120 | # else |
| 121 | movhs r2, r3 |
| 122 | subhs ip, ip, #(16 * BLOCK_SIZE) |
| 123 | # endif |
| 124 | |
| 125 | lsrs r3, r2, #8 |
| 126 | cmp r3, r1 |
| 127 | # if defined(USE_THUMB_1) |
| 128 | blo LOCAL_LABEL(skip_8) |
| 129 | movs r2, r3 |
| 130 | subs r0, r0, #(8 * BLOCK_SIZE) |
| 131 | LOCAL_LABEL(skip_8): |
| 132 | # else |
| 133 | movhs r2, r3 |
| 134 | subhs ip, ip, #(8 * BLOCK_SIZE) |
| 135 | # endif |
| 136 | |
| 137 | lsrs r3, r2, #4 |
| 138 | cmp r3, r1 |
| 139 | # if defined(USE_THUMB_1) |
| 140 | blo LOCAL_LABEL(skip_4) |
| 141 | movs r2, r3 |
| 142 | subs r0, r0, #(4 * BLOCK_SIZE) |
| 143 | LOCAL_LABEL(skip_4): |
| 144 | # else |
| 145 | movhs r2, r3 |
| 146 | subhs ip, #(4 * BLOCK_SIZE) |
| 147 | # endif |
| 148 | |
| 149 | lsrs r3, r2, #2 |
| 150 | cmp r3, r1 |
| 151 | # if defined(USE_THUMB_1) |
| 152 | blo LOCAL_LABEL(skip_2) |
| 153 | movs r2, r3 |
| 154 | subs r0, r0, #(2 * BLOCK_SIZE) |
| 155 | LOCAL_LABEL(skip_2): |
| 156 | # else |
| 157 | movhs r2, r3 |
| 158 | subhs ip, ip, #(2 * BLOCK_SIZE) |
| 159 | # endif |
| 160 | |
| 161 | // Last block, no need to update r2 or r3. |
| 162 | # if defined(USE_THUMB_1) |
| 163 | lsrs r3, r2, #1 |
| 164 | cmp r3, r1 |
| 165 | blo LOCAL_LABEL(skip_1) |
| 166 | subs r0, r0, #(1 * BLOCK_SIZE) |
| 167 | LOCAL_LABEL(skip_1): |
| 168 | movs r2, r0 |
| 169 | mov r0, ip |
| 170 | movs r3, #0 |
| 171 | JMP (r2) |
| 172 | |
| 173 | # else |
| 174 | cmp r1, r2, lsr #1 |
| 175 | subls ip, ip, #(1 * BLOCK_SIZE) |
| 176 | |
| 177 | movs r3, #0 |
| 178 | |
| 179 | JMP(ip) |
| 180 | # endif |
| 181 | # endif // __ARM_FEATURE_CLZ |
| 182 | |
| 183 | |
| 184 | #define IMM # |
| 185 | // due to the range limit of branch in Thumb1, we have to place the |
| 186 | // block closer |
| 187 | LOCAL_LABEL(divby0): |
| 188 | movs r0, #0 |
| 189 | # if defined(__ARM_EABI__) |
| 190 | push {r7, lr} |
| 191 | bl __aeabi_idiv0 // due to relocation limit, can't use b. |
| 192 | pop {r7, pc} |
| 193 | # else |
| 194 | JMP(lr) |
| 195 | # endif |
| 196 | |
| 197 | |
| 198 | #if defined(USE_THUMB_1) |
| 199 | #define block(shift) \ |
| 200 | lsls r2, r1, IMM shift; \ |
| 201 | cmp r0, r2; \ |
| 202 | blo LOCAL_LABEL(block_skip_##shift); \ |
| 203 | subs r0, r0, r2; \ |
| 204 | LOCAL_LABEL(block_skip_##shift) :; \ |
| 205 | adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. |
| 206 | |
| 207 | // TODO: if current location counter is not word aligned, we don't |
| 208 | // need the .p2align and nop |
| 209 | // Label div0block must be word-aligned. First align block 31 |
| 210 | .p2align 2 |
| 211 | nop // Padding to align div0block as 31 blocks = 310 bytes |
| 212 | |
| 213 | #else |
| 214 | #define block(shift) \ |
| 215 | cmp r0, r1, lsl IMM shift; \ |
| 216 | ITT(hs); \ |
| 217 | WIDE(addhs) r3, r3, IMM (1 << shift); \ |
| 218 | WIDE(subhs) r0, r0, r1, lsl IMM shift |
| 219 | #endif |
| 220 | |
| 221 | block(31) |
| 222 | block(30) |
| 223 | block(29) |
| 224 | block(28) |
| 225 | block(27) |
| 226 | block(26) |
| 227 | block(25) |
| 228 | block(24) |
| 229 | block(23) |
| 230 | block(22) |
| 231 | block(21) |
| 232 | block(20) |
| 233 | block(19) |
| 234 | block(18) |
| 235 | block(17) |
| 236 | block(16) |
| 237 | block(15) |
| 238 | block(14) |
| 239 | block(13) |
| 240 | block(12) |
| 241 | block(11) |
| 242 | block(10) |
| 243 | block(9) |
| 244 | block(8) |
| 245 | block(7) |
| 246 | block(6) |
| 247 | block(5) |
| 248 | block(4) |
| 249 | block(3) |
| 250 | block(2) |
| 251 | block(1) |
| 252 | LOCAL_LABEL(div0block): |
| 253 | block(0) |
| 254 | |
| 255 | mov r0, r3 |
| 256 | JMP(lr) |
| 257 | #endif // __ARM_ARCH_EXT_IDIV__ |
| 258 | |
| 259 | END_COMPILERRT_FUNCTION(__udivsi3) |
| 260 | |
| 261 | NO_EXEC_STACK_DIRECTIVE |
| 262 | |
| 263 | |