1 | //===----------------------Hexagon builtin routine ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // Double Precision Multiply |
10 | |
11 | #define A r1:0 |
12 | #define AH r1 |
13 | #define AL r0 |
14 | #define B r3:2 |
15 | #define BH r3 |
16 | #define BL r2 |
17 | |
18 | #define EXPA r4 |
19 | #define EXPB r5 |
20 | #define EXPB_A r5:4 |
21 | |
22 | #define ZTMP r7:6 |
23 | #define ZTMPH r7 |
24 | #define ZTMPL r6 |
25 | |
26 | #define ATMP r13:12 |
27 | #define ATMPH r13 |
28 | #define ATMPL r12 |
29 | |
30 | #define BTMP r9:8 |
31 | #define BTMPH r9 |
32 | #define BTMPL r8 |
33 | |
34 | #define ATMP2 r11:10 |
35 | #define ATMP2H r11 |
36 | #define ATMP2L r10 |
37 | |
38 | #define EXPDIFF r15 |
39 | #define r14 |
40 | #define r15:14 |
41 | |
42 | #define TMP r28 |
43 | |
44 | #define MANTBITS 52 |
45 | #define HI_MANTBITS 20 |
46 | #define EXPBITS 11 |
47 | #define BIAS 1024 |
48 | #define MANTISSA_TO_INT_BIAS 52 |
49 | #define SR_BIT_INEXACT 5 |
50 | |
51 | #ifndef SR_ROUND_OFF |
52 | #define SR_ROUND_OFF 22 |
53 | #endif |
54 | |
55 | #define NORMAL p3 |
56 | #define BIGB p2 |
57 | |
58 | #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG |
59 | #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG |
60 | #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG |
61 | #define END(TAG) .size TAG,.-TAG |
62 | |
63 | .text |
64 | .global __hexagon_adddf3 |
65 | .global __hexagon_subdf3 |
66 | .type __hexagon_adddf3, @function |
67 | .type __hexagon_subdf3, @function |
68 | |
69 | Q6_ALIAS(adddf3) |
70 | FAST_ALIAS(adddf3) |
71 | FAST2_ALIAS(adddf3) |
72 | Q6_ALIAS(subdf3) |
73 | FAST_ALIAS(subdf3) |
74 | FAST2_ALIAS(subdf3) |
75 | |
76 | .p2align 5 |
77 | __hexagon_adddf3: |
78 | { |
79 | EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) |
80 | EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) |
81 | ATMP = combine(##0x20000000,#0) |
82 | } |
83 | { |
84 | NORMAL = dfclass(A,#2) |
85 | NORMAL = dfclass(B,#2) |
86 | BTMP = ATMP |
87 | BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A? |
88 | } |
89 | { |
90 | if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code |
91 | if (BIGB) A = B // if B >> A, swap A and B |
92 | if (BIGB) B = A // If B >> A, swap A and B |
93 | if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents |
94 | } |
95 | { |
96 | ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62 |
97 | BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62 |
98 | EXPDIFF = sub(EXPA,EXPB) |
99 | ZTMP = combine(#62,#1) |
100 | } |
101 | #undef BIGB |
102 | #undef NORMAL |
103 | #define B_POS p3 |
104 | #define A_POS p2 |
105 | #define NO_STICKIES p1 |
106 | .Ladd_continue: |
107 | { |
108 | EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60, |
109 | // will collapse to sticky bit |
110 | ATMP2 = neg(ATMP) |
111 | A_POS = cmp.gt(AH,#-1) |
112 | EXTRACTOFF = #0 |
113 | } |
114 | { |
115 | if (!A_POS) ATMP = ATMP2 |
116 | ATMP2 = extractu(BTMP,EXTRACTAMT) |
117 | BTMP = ASR(BTMP,EXPDIFF) |
118 | #undef EXTRACTAMT |
119 | #undef EXPDIFF |
120 | #undef EXTRACTOFF |
121 | #define ZERO r15:14 |
122 | ZERO = #0 |
123 | } |
124 | { |
125 | NO_STICKIES = cmp.eq(ATMP2,ZERO) |
126 | if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL) |
127 | EXPB = add(EXPA,#-BIAS-60) |
128 | B_POS = cmp.gt(BH,#-1) |
129 | } |
130 | { |
131 | ATMP = add(ATMP,BTMP) // ADD!!! |
132 | ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!! |
133 | ZTMP = combine(#54,##2045) |
134 | } |
135 | { |
136 | p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation |
137 | p0 = !cmp.gtu(EXPA,ZTMPL) |
138 | if (!p0.new) jump:nt .Ladd_ovf_unf |
139 | if (!B_POS) ATMP = ATMP2 // if B neg, pick difference |
140 | } |
141 | { |
142 | A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice! |
143 | p0 = cmp.eq(ATMPH,#0) |
144 | p0 = cmp.eq(ATMPL,#0) |
145 | if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly? |
146 | } |
147 | { |
148 | AH += asl(EXPB,#HI_MANTBITS) |
149 | jumpr r31 |
150 | } |
151 | .falign |
152 | __hexagon_subdf3: |
153 | { |
154 | BH = togglebit(BH,#31) |
155 | jump __qdsp_adddf3 |
156 | } |
157 | |
158 | |
159 | .falign |
160 | .Ladd_zero: |
161 | // True zero, full cancellation |
162 | // +0 unless round towards negative infinity |
163 | { |
164 | TMP = USR |
165 | A = #0 |
166 | BH = #1 |
167 | } |
168 | { |
169 | TMP = extractu(TMP,#2,#22) |
170 | BH = asl(BH,#31) |
171 | } |
172 | { |
173 | p0 = cmp.eq(TMP,#2) |
174 | if (p0.new) AH = xor(AH,BH) |
175 | jumpr r31 |
176 | } |
177 | .falign |
178 | .Ladd_ovf_unf: |
179 | // Overflow or Denormal is possible |
180 | // Good news: Underflow flag is not possible! |
181 | |
182 | // ATMP has 2's complement value |
183 | // |
184 | // EXPA has A's exponent, EXPB has EXPA-BIAS-60 |
185 | // |
186 | // Convert, extract exponent, add adjustment. |
187 | // If > 2046, overflow |
188 | // If <= 0, denormal |
189 | // |
190 | // Note that we've not done our zero check yet, so do that too |
191 | |
192 | { |
193 | A = convert_d2df(ATMP) |
194 | p0 = cmp.eq(ATMPH,#0) |
195 | p0 = cmp.eq(ATMPL,#0) |
196 | if (p0.new) jump:nt .Ladd_zero |
197 | } |
198 | { |
199 | TMP = extractu(AH,#EXPBITS,#HI_MANTBITS) |
200 | AH += asl(EXPB,#HI_MANTBITS) |
201 | } |
202 | { |
203 | EXPB = add(EXPB,TMP) |
204 | B = combine(##0x00100000,#0) |
205 | } |
206 | { |
207 | p0 = cmp.gt(EXPB,##BIAS+BIAS-2) |
208 | if (p0.new) jump:nt .Ladd_ovf |
209 | } |
210 | { |
211 | p0 = cmp.gt(EXPB,#0) |
212 | if (p0.new) jumpr:t r31 |
213 | TMP = sub(#1,EXPB) |
214 | } |
215 | { |
216 | B = insert(A,#MANTBITS,#0) |
217 | A = ATMP |
218 | } |
219 | { |
220 | B = lsr(B,TMP) |
221 | } |
222 | { |
223 | A = insert(B,#63,#0) |
224 | jumpr r31 |
225 | } |
226 | .falign |
227 | .Ladd_ovf: |
228 | // We get either max finite value or infinity. Either way, overflow+inexact |
229 | { |
230 | A = ATMP // 2's complement value |
231 | TMP = USR |
232 | ATMP = combine(##0x7fefffff,#-1) // positive max finite |
233 | } |
234 | { |
235 | EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits |
236 | TMP = or(TMP,#0x28) // inexact + overflow |
237 | BTMP = combine(##0x7ff00000,#0) // positive infinity |
238 | } |
239 | { |
240 | USR = TMP |
241 | EXPB ^= lsr(AH,#31) // Does sign match rounding? |
242 | TMP = EXPB // unmodified rounding mode |
243 | } |
244 | { |
245 | p0 = !cmp.eq(TMP,#1) // If not round-to-zero and |
246 | p0 = !cmp.eq(EXPB,#2) // Not rounding the other way, |
247 | if (p0.new) ATMP = BTMP // we should get infinity |
248 | } |
249 | { |
250 | A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign |
251 | } |
252 | { |
253 | p0 = dfcmp.eq(A,A) |
254 | jumpr r31 |
255 | } |
256 | |
257 | .Ladd_abnormal: |
258 | { |
259 | ATMP = extractu(A,#63,#0) // strip off sign |
260 | BTMP = extractu(B,#63,#0) // strip off sign |
261 | } |
262 | { |
263 | p3 = cmp.gtu(ATMP,BTMP) |
264 | if (!p3.new) A = B // sort values |
265 | if (!p3.new) B = A // sort values |
266 | } |
267 | { |
268 | // Any NaN --> NaN, possibly raise invalid if sNaN |
269 | p0 = dfclass(A,#0x0f) // A not NaN? |
270 | if (!p0.new) jump:nt .Linvalid_nan_add |
271 | if (!p3) ATMP = BTMP |
272 | if (!p3) BTMP = ATMP |
273 | } |
274 | { |
275 | // Infinity + non-infinity number is infinity |
276 | // Infinity + infinity --> inf or nan |
277 | p1 = dfclass(A,#0x08) // A is infinity |
278 | if (p1.new) jump:nt .Linf_add |
279 | } |
280 | { |
281 | p2 = dfclass(B,#0x01) // B is zero |
282 | if (p2.new) jump:nt .LB_zero // so return A or special 0+0 |
283 | ATMP = #0 |
284 | } |
285 | // We are left with adding one or more subnormals |
286 | { |
287 | p0 = dfclass(A,#4) |
288 | if (p0.new) jump:nt .Ladd_two_subnormal |
289 | ATMP = combine(##0x20000000,#0) |
290 | } |
291 | { |
292 | EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) |
293 | EXPB = #1 |
294 | // BTMP already ABS(B) |
295 | BTMP = asl(BTMP,#EXPBITS-2) |
296 | } |
297 | #undef ZERO |
298 | #define r14 |
299 | #define EXPDIFF r15 |
300 | { |
301 | ATMP = insert(A,#MANTBITS,#EXPBITS-2) |
302 | EXPDIFF = sub(EXPA,EXPB) |
303 | ZTMP = combine(#62,#1) |
304 | jump .Ladd_continue |
305 | } |
306 | |
307 | .Ladd_two_subnormal: |
308 | { |
309 | ATMP = extractu(A,#63,#0) |
310 | BTMP = extractu(B,#63,#0) |
311 | } |
312 | { |
313 | ATMP = neg(ATMP) |
314 | BTMP = neg(BTMP) |
315 | p0 = cmp.gt(AH,#-1) |
316 | p1 = cmp.gt(BH,#-1) |
317 | } |
318 | { |
319 | if (p0) ATMP = A |
320 | if (p1) BTMP = B |
321 | } |
322 | { |
323 | ATMP = add(ATMP,BTMP) |
324 | } |
325 | { |
326 | BTMP = neg(ATMP) |
327 | p0 = cmp.gt(ATMPH,#-1) |
328 | B = #0 |
329 | } |
330 | { |
331 | if (!p0) A = BTMP |
332 | if (p0) A = ATMP |
333 | BH = ##0x80000000 |
334 | } |
335 | { |
336 | if (!p0) AH = or(AH,BH) |
337 | p0 = dfcmp.eq(A,B) |
338 | if (p0.new) jump:nt .Lzero_plus_zero |
339 | } |
340 | { |
341 | jumpr r31 |
342 | } |
343 | |
344 | .Linvalid_nan_add: |
345 | { |
346 | TMP = convert_df2sf(A) // will generate invalid if sNaN |
347 | p0 = dfclass(B,#0x0f) // if B is not NaN |
348 | if (p0.new) B = A // make it whatever A is |
349 | } |
350 | { |
351 | BL = convert_df2sf(B) // will generate invalid if sNaN |
352 | A = #-1 |
353 | jumpr r31 |
354 | } |
355 | .falign |
356 | .LB_zero: |
357 | { |
358 | p0 = dfcmp.eq(ATMP,A) // is A also zero? |
359 | if (!p0.new) jumpr:t r31 // If not, just return A |
360 | } |
361 | // 0 + 0 is special |
362 | // if equal integral values, they have the same sign, which is fine for all rounding |
363 | // modes. |
364 | // If unequal in sign, we get +0 for all rounding modes except round down |
365 | .Lzero_plus_zero: |
366 | { |
367 | p0 = cmp.eq(A,B) |
368 | if (p0.new) jumpr:t r31 |
369 | } |
370 | { |
371 | TMP = USR |
372 | } |
373 | { |
374 | TMP = extractu(TMP,#2,#SR_ROUND_OFF) |
375 | A = #0 |
376 | } |
377 | { |
378 | p0 = cmp.eq(TMP,#2) |
379 | if (p0.new) AH = ##0x80000000 |
380 | jumpr r31 |
381 | } |
382 | .Linf_add: |
383 | // adding infinities is only OK if they are equal |
384 | { |
385 | p0 = !cmp.eq(AH,BH) // Do they have different signs |
386 | p0 = dfclass(B,#8) // And is B also infinite? |
387 | if (!p0.new) jumpr:t r31 // If not, just a normal inf |
388 | } |
389 | { |
390 | BL = ##0x7f800001 // sNAN |
391 | } |
392 | { |
393 | A = convert_sf2df(BL) // trigger invalid, set NaN |
394 | jumpr r31 |
395 | } |
396 | END(__hexagon_adddf3) |
397 | |