| 1 | //===-- addsf3.S - Adds two single precision floating pointer numbers-----===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the __addsf3 (single precision floating pointer number |
| 10 | // addition with the IEEE-754 default rounding (to nearest, ties to even) |
| 11 | // function for the ARM Thumb1 ISA. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "../assembly.h" |
| 16 | #define significandBits 23 |
| 17 | #define typeWidth 32 |
| 18 | |
| 19 | .syntax unified |
| 20 | .text |
| 21 | .thumb |
| 22 | .p2align 2 |
| 23 | |
| 24 | DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3) |
| 25 | |
| 26 | DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3) |
| 27 | push {r4, r5, r6, r7, lr} |
| 28 | // Get the absolute value of a and b. |
| 29 | lsls r2, r0, #1 |
| 30 | lsls r3, r1, #1 |
| 31 | lsrs r2, r2, #1 // aAbs |
| 32 | beq LOCAL_LABEL(a_zero_nan_inf) |
| 33 | lsrs r3, r3, #1 // bAbs |
| 34 | beq LOCAL_LABEL(zero_nan_inf) |
| 35 | |
| 36 | // Detect if a or b is infinity or Nan. |
| 37 | lsrs r6, r2, #(significandBits) |
| 38 | lsrs r7, r3, #(significandBits) |
| 39 | cmp r6, #0xFF |
| 40 | beq LOCAL_LABEL(zero_nan_inf) |
| 41 | cmp r7, #0xFF |
| 42 | beq LOCAL_LABEL(zero_nan_inf) |
| 43 | |
| 44 | // Swap Rep and Abs so that a and aAbs has the larger absolute value. |
| 45 | cmp r2, r3 |
| 46 | bhs LOCAL_LABEL(no_swap) |
| 47 | movs r4, r0 |
| 48 | movs r5, r2 |
| 49 | movs r0, r1 |
| 50 | movs r2, r3 |
| 51 | movs r1, r4 |
| 52 | movs r3, r5 |
| 53 | LOCAL_LABEL(no_swap): |
| 54 | |
| 55 | // Get the significands and shift them to give us round, guard and sticky. |
| 56 | lsls r4, r0, #(typeWidth - significandBits) |
| 57 | lsrs r4, r4, #(typeWidth - significandBits - 3) // aSignificand << 3 |
| 58 | lsls r5, r1, #(typeWidth - significandBits) |
| 59 | lsrs r5, r5, #(typeWidth - significandBits - 3) // bSignificand << 3 |
| 60 | |
| 61 | // Get the implicitBit. |
| 62 | movs r6, #1 |
| 63 | lsls r6, r6, #(significandBits + 3) |
| 64 | |
| 65 | // Get aExponent and set implicit bit if necessary. |
| 66 | lsrs r2, r2, #(significandBits) |
| 67 | beq LOCAL_LABEL(a_done_implicit_bit) |
| 68 | orrs r4, r6 |
| 69 | LOCAL_LABEL(a_done_implicit_bit): |
| 70 | |
| 71 | // Get bExponent and set implicit bit if necessary. |
| 72 | lsrs r3, r3, #(significandBits) |
| 73 | beq LOCAL_LABEL(b_done_implicit_bit) |
| 74 | orrs r5, r6 |
| 75 | LOCAL_LABEL(b_done_implicit_bit): |
| 76 | |
| 77 | // Get the difference in exponents. |
| 78 | subs r6, r2, r3 |
| 79 | beq LOCAL_LABEL(done_align) |
| 80 | |
| 81 | // If b is denormal, then a must be normal as align > 0, and we only need to |
| 82 | // right shift bSignificand by (align - 1) bits. |
| 83 | cmp r3, #0 |
| 84 | bne 1f |
| 85 | subs r6, r6, #1 |
| 86 | 1: |
| 87 | |
| 88 | // No longer needs bExponent. r3 is dead here. |
| 89 | // Set sticky bits of b: sticky = bSignificand << (typeWidth - align). |
| 90 | movs r3, #(typeWidth) |
| 91 | subs r3, r3, r6 |
| 92 | movs r7, r5 |
| 93 | lsls r7, r3 |
| 94 | beq 1f |
| 95 | movs r7, #1 |
| 96 | 1: |
| 97 | |
| 98 | // bSignificand = bSignificand >> align | sticky; |
| 99 | lsrs r5, r6 |
| 100 | orrs r5, r7 |
| 101 | bne LOCAL_LABEL(done_align) |
| 102 | movs r5, #1 // sticky; b is known to be non-zero. |
| 103 | |
| 104 | LOCAL_LABEL(done_align): |
| 105 | // isSubtraction = (aRep ^ bRep) >> 31; |
| 106 | movs r7, r0 |
| 107 | eors r7, r1 |
| 108 | lsrs r7, #31 |
| 109 | bne LOCAL_LABEL(do_substraction) |
| 110 | |
| 111 | // Same sign, do Addition. |
| 112 | |
| 113 | // aSignificand += bSignificand; |
| 114 | adds r4, r4, r5 |
| 115 | |
| 116 | // Check carry bit. |
| 117 | movs r6, #1 |
| 118 | lsls r6, r6, #(significandBits + 3 + 1) |
| 119 | movs r7, r4 |
| 120 | ands r7, r6 |
| 121 | beq LOCAL_LABEL(form_result) |
| 122 | // If the addition carried up, we need to right-shift the result and |
| 123 | // adjust the exponent. |
| 124 | movs r7, r4 |
| 125 | movs r6, #1 |
| 126 | ands r7, r6 // sticky = aSignificand & 1; |
| 127 | lsrs r4, #1 |
| 128 | orrs r4, r7 // result Significand |
| 129 | adds r2, #1 // result Exponent |
| 130 | // If we have overflowed the type, return +/- infinity. |
| 131 | cmp r2, 0xFF |
| 132 | beq LOCAL_LABEL(ret_inf) |
| 133 | |
| 134 | LOCAL_LABEL(form_result): |
| 135 | // Shift the sign, exponent and significand into place. |
| 136 | lsrs r0, #(typeWidth - 1) |
| 137 | lsls r0, #(typeWidth - 1) // Get Sign. |
| 138 | lsls r2, #(significandBits) |
| 139 | orrs r0, r2 |
| 140 | movs r1, r4 |
| 141 | lsls r4, #(typeWidth - significandBits - 3) |
| 142 | lsrs r4, #(typeWidth - significandBits) |
| 143 | orrs r0, r4 |
| 144 | |
| 145 | // Final rounding. The result may overflow to infinity, but that is the |
| 146 | // correct result in that case. |
| 147 | // roundGuardSticky = aSignificand & 0x7; |
| 148 | movs r2, #0x7 |
| 149 | ands r1, r2 |
| 150 | // if (roundGuardSticky > 0x4) result++; |
| 151 | |
| 152 | cmp r1, #0x4 |
| 153 | blt LOCAL_LABEL(done_round) |
| 154 | beq 1f |
| 155 | adds r0, #1 |
| 156 | pop {r4, r5, r6, r7, pc} |
| 157 | 1: |
| 158 | |
| 159 | // if (roundGuardSticky == 0x4) result += result & 1; |
| 160 | movs r1, r0 |
| 161 | lsrs r1, #1 |
| 162 | bcc LOCAL_LABEL(done_round) |
| 163 | adds r0, r0, #1 |
| 164 | LOCAL_LABEL(done_round): |
| 165 | pop {r4, r5, r6, r7, pc} |
| 166 | |
| 167 | LOCAL_LABEL(do_substraction): |
| 168 | subs r4, r4, r5 // aSignificand -= bSignificand; |
| 169 | beq LOCAL_LABEL(ret_zero) |
| 170 | movs r6, r4 |
| 171 | cmp r2, 0 |
| 172 | beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize. |
| 173 | // If partial cancellation occured, we need to left-shift the result |
| 174 | // and adjust the exponent: |
| 175 | lsrs r6, r6, #(significandBits + 3) |
| 176 | bne LOCAL_LABEL(form_result) |
| 177 | |
| 178 | push {r0, r1, r2, r3} |
| 179 | movs r0, r4 |
| 180 | bl SYMBOL_NAME(__clzsi2) |
| 181 | movs r5, r0 |
| 182 | pop {r0, r1, r2, r3} |
| 183 | // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); |
| 184 | subs r5, r5, #(typeWidth - significandBits - 3 - 1) |
| 185 | // aSignificand <<= shift; aExponent -= shift; |
| 186 | lsls r4, r5 |
| 187 | subs r2, r2, r5 |
| 188 | bgt LOCAL_LABEL(form_result) |
| 189 | |
| 190 | // Do normalization if aExponent <= 0. |
| 191 | movs r6, #1 |
| 192 | subs r6, r6, r2 // 1 - aExponent; |
| 193 | movs r2, #0 // aExponent = 0; |
| 194 | movs r3, #(typeWidth) // bExponent is dead. |
| 195 | subs r3, r3, r6 |
| 196 | movs r7, r4 |
| 197 | lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align)) |
| 198 | beq 1f |
| 199 | movs r7, #1 |
| 200 | 1: |
| 201 | lsrs r4, r6 // aSignificand >> shift |
| 202 | orrs r4, r7 |
| 203 | b LOCAL_LABEL(form_result) |
| 204 | |
| 205 | LOCAL_LABEL(ret_zero): |
| 206 | movs r0, #0 |
| 207 | pop {r4, r5, r6, r7, pc} |
| 208 | |
| 209 | |
| 210 | LOCAL_LABEL(a_zero_nan_inf): |
| 211 | lsrs r3, r3, #1 |
| 212 | |
| 213 | LOCAL_LABEL(zero_nan_inf): |
| 214 | // Here r2 has aAbs, r3 has bAbs |
| 215 | movs r4, #0xFF |
| 216 | lsls r4, r4, #(significandBits) // Make +inf. |
| 217 | |
| 218 | cmp r2, r4 |
| 219 | bhi LOCAL_LABEL(a_is_nan) |
| 220 | cmp r3, r4 |
| 221 | bhi LOCAL_LABEL(b_is_nan) |
| 222 | |
| 223 | cmp r2, r4 |
| 224 | bne LOCAL_LABEL(a_is_rational) |
| 225 | // aAbs is INF. |
| 226 | eors r1, r0 // aRep ^ bRep. |
| 227 | movs r6, #1 |
| 228 | lsls r6, r6, #(typeWidth - 1) // get sign mask. |
| 229 | cmp r1, r6 // if they only differ on sign bit, it's -INF + INF |
| 230 | beq LOCAL_LABEL(a_is_nan) |
| 231 | pop {r4, r5, r6, r7, pc} |
| 232 | |
| 233 | LOCAL_LABEL(a_is_rational): |
| 234 | cmp r3, r4 |
| 235 | bne LOCAL_LABEL(b_is_rational) |
| 236 | movs r0, r1 |
| 237 | pop {r4, r5, r6, r7, pc} |
| 238 | |
| 239 | LOCAL_LABEL(b_is_rational): |
| 240 | // either a or b or both are zero. |
| 241 | adds r4, r2, r3 |
| 242 | beq LOCAL_LABEL(both_zero) |
| 243 | cmp r2, #0 // is absA 0 ? |
| 244 | beq LOCAL_LABEL(ret_b) |
| 245 | pop {r4, r5, r6, r7, pc} |
| 246 | |
| 247 | LOCAL_LABEL(both_zero): |
| 248 | ands r0, r1 // +0 + -0 = +0 |
| 249 | pop {r4, r5, r6, r7, pc} |
| 250 | |
| 251 | LOCAL_LABEL(ret_b): |
| 252 | movs r0, r1 |
| 253 | |
| 254 | LOCAL_LABEL(ret): |
| 255 | pop {r4, r5, r6, r7, pc} |
| 256 | |
| 257 | LOCAL_LABEL(b_is_nan): |
| 258 | movs r0, r1 |
| 259 | LOCAL_LABEL(a_is_nan): |
| 260 | movs r1, #1 |
| 261 | lsls r1, r1, #(significandBits -1) // r1 is quiet bit. |
| 262 | orrs r0, r1 |
| 263 | pop {r4, r5, r6, r7, pc} |
| 264 | |
| 265 | LOCAL_LABEL(ret_inf): |
| 266 | movs r4, #0xFF |
| 267 | lsls r4, r4, #(significandBits) |
| 268 | orrs r0, r4 |
| 269 | lsrs r0, r0, #(significandBits) |
| 270 | lsls r0, r0, #(significandBits) |
| 271 | pop {r4, r5, r6, r7, pc} |
| 272 | |
| 273 | |
| 274 | END_COMPILERRT_FUNCTION(__addsf3) |
| 275 | |
| 276 | NO_EXEC_STACK_DIRECTIVE |
| 277 | |