1 | //===-- addsf3.S - Adds two single precision floating pointer numbers-----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the __addsf3 (single precision floating pointer number |
10 | // addition with the IEEE-754 default rounding (to nearest, ties to even) |
11 | // function for the ARM Thumb1 ISA. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "../assembly.h" |
16 | #define significandBits 23 |
17 | #define typeWidth 32 |
18 | |
19 | .syntax unified |
20 | .text |
21 | .thumb |
22 | .p2align 2 |
23 | |
24 | DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3) |
25 | |
26 | DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3) |
27 | push {r4, r5, r6, r7, lr} |
28 | // Get the absolute value of a and b. |
29 | lsls r2, r0, #1 |
30 | lsls r3, r1, #1 |
31 | lsrs r2, r2, #1 // aAbs |
32 | beq LOCAL_LABEL(a_zero_nan_inf) |
33 | lsrs r3, r3, #1 // bAbs |
34 | beq LOCAL_LABEL(zero_nan_inf) |
35 | |
36 | // Detect if a or b is infinity or Nan. |
37 | lsrs r6, r2, #(significandBits) |
38 | lsrs r7, r3, #(significandBits) |
39 | cmp r6, #0xFF |
40 | beq LOCAL_LABEL(zero_nan_inf) |
41 | cmp r7, #0xFF |
42 | beq LOCAL_LABEL(zero_nan_inf) |
43 | |
44 | // Swap Rep and Abs so that a and aAbs has the larger absolute value. |
45 | cmp r2, r3 |
46 | bhs LOCAL_LABEL(no_swap) |
47 | movs r4, r0 |
48 | movs r5, r2 |
49 | movs r0, r1 |
50 | movs r2, r3 |
51 | movs r1, r4 |
52 | movs r3, r5 |
53 | LOCAL_LABEL(no_swap): |
54 | |
55 | // Get the significands and shift them to give us round, guard and sticky. |
56 | lsls r4, r0, #(typeWidth - significandBits) |
57 | lsrs r4, r4, #(typeWidth - significandBits - 3) // aSignificand << 3 |
58 | lsls r5, r1, #(typeWidth - significandBits) |
59 | lsrs r5, r5, #(typeWidth - significandBits - 3) // bSignificand << 3 |
60 | |
61 | // Get the implicitBit. |
62 | movs r6, #1 |
63 | lsls r6, r6, #(significandBits + 3) |
64 | |
65 | // Get aExponent and set implicit bit if necessary. |
66 | lsrs r2, r2, #(significandBits) |
67 | beq LOCAL_LABEL(a_done_implicit_bit) |
68 | orrs r4, r6 |
69 | LOCAL_LABEL(a_done_implicit_bit): |
70 | |
71 | // Get bExponent and set implicit bit if necessary. |
72 | lsrs r3, r3, #(significandBits) |
73 | beq LOCAL_LABEL(b_done_implicit_bit) |
74 | orrs r5, r6 |
75 | LOCAL_LABEL(b_done_implicit_bit): |
76 | |
77 | // Get the difference in exponents. |
78 | subs r6, r2, r3 |
79 | beq LOCAL_LABEL(done_align) |
80 | |
81 | // If b is denormal, then a must be normal as align > 0, and we only need to |
82 | // right shift bSignificand by (align - 1) bits. |
83 | cmp r3, #0 |
84 | bne 1f |
85 | subs r6, r6, #1 |
86 | 1: |
87 | |
88 | // No longer needs bExponent. r3 is dead here. |
89 | // Set sticky bits of b: sticky = bSignificand << (typeWidth - align). |
90 | movs r3, #(typeWidth) |
91 | subs r3, r3, r6 |
92 | movs r7, r5 |
93 | lsls r7, r3 |
94 | beq 1f |
95 | movs r7, #1 |
96 | 1: |
97 | |
98 | // bSignificand = bSignificand >> align | sticky; |
99 | lsrs r5, r6 |
100 | orrs r5, r7 |
101 | bne LOCAL_LABEL(done_align) |
102 | movs r5, #1 // sticky; b is known to be non-zero. |
103 | |
104 | LOCAL_LABEL(done_align): |
105 | // isSubtraction = (aRep ^ bRep) >> 31; |
106 | movs r7, r0 |
107 | eors r7, r1 |
108 | lsrs r7, #31 |
109 | bne LOCAL_LABEL(do_substraction) |
110 | |
111 | // Same sign, do Addition. |
112 | |
113 | // aSignificand += bSignificand; |
114 | adds r4, r4, r5 |
115 | |
116 | // Check carry bit. |
117 | movs r6, #1 |
118 | lsls r6, r6, #(significandBits + 3 + 1) |
119 | movs r7, r4 |
120 | ands r7, r6 |
121 | beq LOCAL_LABEL(form_result) |
122 | // If the addition carried up, we need to right-shift the result and |
123 | // adjust the exponent. |
124 | movs r7, r4 |
125 | movs r6, #1 |
126 | ands r7, r6 // sticky = aSignificand & 1; |
127 | lsrs r4, #1 |
128 | orrs r4, r7 // result Significand |
129 | adds r2, #1 // result Exponent |
130 | // If we have overflowed the type, return +/- infinity. |
131 | cmp r2, 0xFF |
132 | beq LOCAL_LABEL(ret_inf) |
133 | |
134 | LOCAL_LABEL(form_result): |
135 | // Shift the sign, exponent and significand into place. |
136 | lsrs r0, #(typeWidth - 1) |
137 | lsls r0, #(typeWidth - 1) // Get Sign. |
138 | lsls r2, #(significandBits) |
139 | orrs r0, r2 |
140 | movs r1, r4 |
141 | lsls r4, #(typeWidth - significandBits - 3) |
142 | lsrs r4, #(typeWidth - significandBits) |
143 | orrs r0, r4 |
144 | |
145 | // Final rounding. The result may overflow to infinity, but that is the |
146 | // correct result in that case. |
147 | // roundGuardSticky = aSignificand & 0x7; |
148 | movs r2, #0x7 |
149 | ands r1, r2 |
150 | // if (roundGuardSticky > 0x4) result++; |
151 | |
152 | cmp r1, #0x4 |
153 | blt LOCAL_LABEL(done_round) |
154 | beq 1f |
155 | adds r0, #1 |
156 | pop {r4, r5, r6, r7, pc} |
157 | 1: |
158 | |
159 | // if (roundGuardSticky == 0x4) result += result & 1; |
160 | movs r1, r0 |
161 | lsrs r1, #1 |
162 | bcc LOCAL_LABEL(done_round) |
163 | adds r0, r0, #1 |
164 | LOCAL_LABEL(done_round): |
165 | pop {r4, r5, r6, r7, pc} |
166 | |
167 | LOCAL_LABEL(do_substraction): |
168 | subs r4, r4, r5 // aSignificand -= bSignificand; |
169 | beq LOCAL_LABEL(ret_zero) |
170 | movs r6, r4 |
171 | cmp r2, 0 |
172 | beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize. |
173 | // If partial cancellation occured, we need to left-shift the result |
174 | // and adjust the exponent: |
175 | lsrs r6, r6, #(significandBits + 3) |
176 | bne LOCAL_LABEL(form_result) |
177 | |
178 | push {r0, r1, r2, r3} |
179 | movs r0, r4 |
180 | bl SYMBOL_NAME(__clzsi2) |
181 | movs r5, r0 |
182 | pop {r0, r1, r2, r3} |
183 | // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); |
184 | subs r5, r5, #(typeWidth - significandBits - 3 - 1) |
185 | // aSignificand <<= shift; aExponent -= shift; |
186 | lsls r4, r5 |
187 | subs r2, r2, r5 |
188 | bgt LOCAL_LABEL(form_result) |
189 | |
190 | // Do normalization if aExponent <= 0. |
191 | movs r6, #1 |
192 | subs r6, r6, r2 // 1 - aExponent; |
193 | movs r2, #0 // aExponent = 0; |
194 | movs r3, #(typeWidth) // bExponent is dead. |
195 | subs r3, r3, r6 |
196 | movs r7, r4 |
197 | lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align)) |
198 | beq 1f |
199 | movs r7, #1 |
200 | 1: |
201 | lsrs r4, r6 // aSignificand >> shift |
202 | orrs r4, r7 |
203 | b LOCAL_LABEL(form_result) |
204 | |
205 | LOCAL_LABEL(ret_zero): |
206 | movs r0, #0 |
207 | pop {r4, r5, r6, r7, pc} |
208 | |
209 | |
210 | LOCAL_LABEL(a_zero_nan_inf): |
211 | lsrs r3, r3, #1 |
212 | |
213 | LOCAL_LABEL(zero_nan_inf): |
214 | // Here r2 has aAbs, r3 has bAbs |
215 | movs r4, #0xFF |
216 | lsls r4, r4, #(significandBits) // Make +inf. |
217 | |
218 | cmp r2, r4 |
219 | bhi LOCAL_LABEL(a_is_nan) |
220 | cmp r3, r4 |
221 | bhi LOCAL_LABEL(b_is_nan) |
222 | |
223 | cmp r2, r4 |
224 | bne LOCAL_LABEL(a_is_rational) |
225 | // aAbs is INF. |
226 | eors r1, r0 // aRep ^ bRep. |
227 | movs r6, #1 |
228 | lsls r6, r6, #(typeWidth - 1) // get sign mask. |
229 | cmp r1, r6 // if they only differ on sign bit, it's -INF + INF |
230 | beq LOCAL_LABEL(a_is_nan) |
231 | pop {r4, r5, r6, r7, pc} |
232 | |
233 | LOCAL_LABEL(a_is_rational): |
234 | cmp r3, r4 |
235 | bne LOCAL_LABEL(b_is_rational) |
236 | movs r0, r1 |
237 | pop {r4, r5, r6, r7, pc} |
238 | |
239 | LOCAL_LABEL(b_is_rational): |
240 | // either a or b or both are zero. |
241 | adds r4, r2, r3 |
242 | beq LOCAL_LABEL(both_zero) |
243 | cmp r2, #0 // is absA 0 ? |
244 | beq LOCAL_LABEL(ret_b) |
245 | pop {r4, r5, r6, r7, pc} |
246 | |
247 | LOCAL_LABEL(both_zero): |
248 | ands r0, r1 // +0 + -0 = +0 |
249 | pop {r4, r5, r6, r7, pc} |
250 | |
251 | LOCAL_LABEL(ret_b): |
252 | movs r0, r1 |
253 | |
254 | LOCAL_LABEL(ret): |
255 | pop {r4, r5, r6, r7, pc} |
256 | |
257 | LOCAL_LABEL(b_is_nan): |
258 | movs r0, r1 |
259 | LOCAL_LABEL(a_is_nan): |
260 | movs r1, #1 |
261 | lsls r1, r1, #(significandBits -1) // r1 is quiet bit. |
262 | orrs r0, r1 |
263 | pop {r4, r5, r6, r7, pc} |
264 | |
265 | LOCAL_LABEL(ret_inf): |
266 | movs r4, #0xFF |
267 | lsls r4, r4, #(significandBits) |
268 | orrs r0, r4 |
269 | lsrs r0, r0, #(significandBits) |
270 | lsls r0, r0, #(significandBits) |
271 | pop {r4, r5, r6, r7, pc} |
272 | |
273 | |
274 | END_COMPILERRT_FUNCTION(__addsf3) |
275 | |
276 | NO_EXEC_STACK_DIRECTIVE |
277 | |