1 | //===----------------------Hexagon builtin routine ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG |
10 | #define END(TAG) .size TAG,.-TAG |
11 | |
12 | // Double Precision Multiply |
13 | |
14 | |
15 | #define A r1:0 |
16 | #define AH r1 |
17 | #define AL r0 |
18 | #define B r3:2 |
19 | #define BH r3 |
20 | #define BL r2 |
21 | #define C r5:4 |
22 | #define CH r5 |
23 | #define CL r4 |
24 | |
25 | |
26 | |
27 | #define BTMP r15:14 |
28 | #define BTMPH r15 |
29 | #define BTMPL r14 |
30 | |
31 | #define ATMP r13:12 |
32 | #define ATMPH r13 |
33 | #define ATMPL r12 |
34 | |
35 | #define CTMP r11:10 |
36 | #define CTMPH r11 |
37 | #define CTMPL r10 |
38 | |
39 | #define PP_LL r9:8 |
40 | #define PP_LL_H r9 |
41 | #define PP_LL_L r8 |
42 | |
43 | #define PP_ODD r7:6 |
44 | #define PP_ODD_H r7 |
45 | #define PP_ODD_L r6 |
46 | |
47 | |
48 | #define PP_HH r17:16 |
49 | #define PP_HH_H r17 |
50 | #define PP_HH_L r16 |
51 | |
52 | #define EXPA r18 |
53 | #define EXPB r19 |
54 | #define EXPBA r19:18 |
55 | |
56 | #define TMP r28 |
57 | |
58 | #define P_TMP p0 |
59 | #define PROD_NEG p3 |
60 | #define EXACT p2 |
61 | #define SWAP p1 |
62 | |
63 | #define MANTBITS 52 |
64 | #define HI_MANTBITS 20 |
65 | #define EXPBITS 11 |
66 | #define BIAS 1023 |
67 | #define STACKSPACE 32 |
68 | |
69 | #define ADJUST 4 |
70 | |
71 | #define FUDGE 7 |
72 | #define FUDGE2 3 |
73 | |
74 | #ifndef SR_ROUND_OFF |
75 | #define SR_ROUND_OFF 22 |
76 | #endif |
77 | |
78 | // First, classify for normal values, and abort if abnormal |
79 | // |
80 | // Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8 |
81 | // |
82 | // Since we know that the 2 MSBs of the H registers is zero, we should never carry |
83 | // the partial products that involve the H registers |
84 | // |
85 | // Try to buy X slots, at the expense of latency if needed |
86 | // |
87 | // We will have PP_HH with the upper bits of the product, PP_LL with the lower |
88 | // PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts |
89 | // PP_HH can have a minimum of 0x0100_0000_0000_0000 |
90 | // |
91 | // 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS |
92 | // |
93 | // We need to align CTMP. |
94 | // If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add |
95 | // If CTMP << PP align CTMP and add 128 bits. Then compute sticky |
96 | // If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation. |
97 | // |
98 | // Convert partial product and CTMP to 2's complement prior to addition |
99 | // |
100 | // After we add, we need to normalize into upper 64 bits, then compute sticky. |
101 | |
102 | .text |
103 | .global __hexagon_fmadf4 |
104 | .type __hexagon_fmadf4,@function |
105 | .global __hexagon_fmadf5 |
106 | .type __hexagon_fmadf5,@function |
107 | Q6_ALIAS(fmadf5) |
108 | .p2align 5 |
109 | __hexagon_fmadf4: |
110 | __hexagon_fmadf5: |
111 | .Lfma_begin: |
112 | { |
113 | P_TMP = dfclass(A,#2) |
114 | P_TMP = dfclass(B,#2) |
115 | ATMP = #0 |
116 | BTMP = #0 |
117 | } |
118 | { |
119 | ATMP = insert(A,#MANTBITS,#EXPBITS-3) |
120 | BTMP = insert(B,#MANTBITS,#EXPBITS-3) |
121 | PP_ODD_H = ##0x10000000 |
122 | allocframe(#STACKSPACE) |
123 | } |
124 | { |
125 | PP_LL = mpyu(ATMPL,BTMPL) |
126 | if (!P_TMP) jump .Lfma_abnormal_ab |
127 | ATMPH = or(ATMPH,PP_ODD_H) |
128 | BTMPH = or(BTMPH,PP_ODD_H) |
129 | } |
130 | { |
131 | P_TMP = dfclass(C,#2) |
132 | if (!P_TMP.new) jump:nt .Lfma_abnormal_c |
133 | CTMP = combine(PP_ODD_H,#0) |
134 | PP_ODD = combine(#0,PP_LL_H) |
135 | } |
136 | .Lfma_abnormal_c_restart: |
137 | { |
138 | PP_ODD += mpyu(BTMPL,ATMPH) |
139 | CTMP = insert(C,#MANTBITS,#EXPBITS-3) |
140 | memd(r29+#0) = PP_HH |
141 | memd(r29+#8) = EXPBA |
142 | } |
143 | { |
144 | PP_ODD += mpyu(ATMPL,BTMPH) |
145 | EXPBA = neg(CTMP) |
146 | P_TMP = cmp.gt(CH,#-1) |
147 | TMP = xor(AH,BH) |
148 | } |
149 | { |
150 | EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) |
151 | EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) |
152 | PP_HH = combine(#0,PP_ODD_H) |
153 | if (!P_TMP) CTMP = EXPBA |
154 | } |
155 | { |
156 | PP_HH += mpyu(ATMPH,BTMPH) |
157 | PP_LL = combine(PP_ODD_L,PP_LL_L) |
158 | #undef PP_ODD |
159 | #undef PP_ODD_H |
160 | #undef PP_ODD_L |
161 | #undef ATMP |
162 | #undef ATMPL |
163 | #undef ATMPH |
164 | #undef BTMP |
165 | #undef BTMPL |
166 | #undef BTMPH |
167 | #define RIGHTLEFTSHIFT r13:12 |
168 | #define RIGHTSHIFT r13 |
169 | #define LEFTSHIFT r12 |
170 | |
171 | EXPA = add(EXPA,EXPB) |
172 | #undef EXPB |
173 | #undef EXPBA |
174 | #define EXPC r19 |
175 | #define EXPCA r19:18 |
176 | EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS) |
177 | } |
178 | // PP_HH:PP_LL now has product |
179 | // CTMP is negated |
180 | // EXPA,B,C are extracted |
181 | // We need to negate PP |
182 | // Since we will be adding with carry later, if we need to negate, |
183 | // just invert all bits now, which we can do conditionally and in parallel |
184 | #define PP_HH_TMP r15:14 |
185 | #define PP_LL_TMP r7:6 |
186 | { |
187 | EXPA = add(EXPA,#-BIAS+(ADJUST)) |
188 | PROD_NEG = !cmp.gt(TMP,#-1) |
189 | PP_LL_TMP = #0 |
190 | PP_HH_TMP = #0 |
191 | } |
192 | { |
193 | PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry |
194 | P_TMP = !cmp.gt(TMP,#-1) |
195 | SWAP = cmp.gt(EXPC,EXPA) // If C >> PP |
196 | if (SWAP.new) EXPCA = combine(EXPA,EXPC) |
197 | } |
198 | { |
199 | PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry |
200 | if (P_TMP) PP_LL = PP_LL_TMP |
201 | #undef PP_LL_TMP |
202 | #define CTMP2 r7:6 |
203 | #define CTMP2H r7 |
204 | #define CTMP2L r6 |
205 | CTMP2 = #0 |
206 | EXPC = sub(EXPA,EXPC) |
207 | } |
208 | { |
209 | if (P_TMP) PP_HH = PP_HH_TMP |
210 | P_TMP = cmp.gt(EXPC,#63) |
211 | if (SWAP) PP_LL = CTMP2 |
212 | if (SWAP) CTMP2 = PP_LL |
213 | } |
214 | #undef PP_HH_TMP |
215 | //#define ONE r15:14 |
216 | //#define S_ONE r14 |
217 | #define ZERO r15:14 |
218 | #define S_ZERO r15 |
219 | #undef PROD_NEG |
220 | #define P_CARRY p3 |
221 | { |
222 | if (SWAP) PP_HH = CTMP // Swap C and PP |
223 | if (SWAP) CTMP = PP_HH |
224 | if (P_TMP) EXPC = add(EXPC,#-64) |
225 | TMP = #63 |
226 | } |
227 | { |
228 | // If diff > 63, pre-shift-right by 64... |
229 | if (P_TMP) CTMP2 = CTMP |
230 | TMP = asr(CTMPH,#31) |
231 | RIGHTSHIFT = min(EXPC,TMP) |
232 | LEFTSHIFT = #0 |
233 | } |
234 | #undef C |
235 | #undef CH |
236 | #undef CL |
237 | #define STICKIES r5:4 |
238 | #define STICKIESH r5 |
239 | #define STICKIESL r4 |
240 | { |
241 | if (P_TMP) CTMP = combine(TMP,TMP) // sign extension of pre-shift-right-64 |
242 | STICKIES = extract(CTMP2,RIGHTLEFTSHIFT) |
243 | CTMP2 = lsr(CTMP2,RIGHTSHIFT) |
244 | LEFTSHIFT = sub(#64,RIGHTSHIFT) |
245 | } |
246 | { |
247 | ZERO = #0 |
248 | TMP = #-2 |
249 | CTMP2 |= lsl(CTMP,LEFTSHIFT) |
250 | CTMP = asr(CTMP,RIGHTSHIFT) |
251 | } |
252 | { |
253 | P_CARRY = cmp.gtu(STICKIES,ZERO) // If we have sticky bits from C shift |
254 | if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR |
255 | #undef ZERO |
256 | #define ONE r15:14 |
257 | #define S_ONE r14 |
258 | ONE = #1 |
259 | STICKIES = #0 |
260 | } |
261 | { |
262 | PP_LL = add(CTMP2,PP_LL,P_CARRY):carry // use the carry to add the sticky |
263 | } |
264 | { |
265 | PP_HH = add(CTMP,PP_HH,P_CARRY):carry |
266 | TMP = #62 |
267 | } |
268 | // PP_HH:PP_LL now holds the sum |
269 | // We may need to normalize left, up to ??? bits. |
270 | // |
271 | // I think that if we have massive cancellation, the range we normalize by |
272 | // is still limited |
273 | { |
274 | LEFTSHIFT = add(clb(PP_HH),#-2) |
275 | if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits? |
276 | } |
277 | // We had all sign bits, shift left by 62. |
278 | { |
279 | CTMP = extractu(PP_LL,#62,#2) |
280 | PP_LL = asl(PP_LL,#62) |
281 | EXPA = add(EXPA,#-62) // And adjust exponent of result |
282 | } |
283 | { |
284 | PP_HH = insert(CTMP,#62,#0) // Then shift 63 |
285 | } |
286 | { |
287 | LEFTSHIFT = add(clb(PP_HH),#-2) |
288 | } |
289 | .falign |
290 | 1: |
291 | { |
292 | CTMP = asl(PP_HH,LEFTSHIFT) |
293 | STICKIES |= asl(PP_LL,LEFTSHIFT) |
294 | RIGHTSHIFT = sub(#64,LEFTSHIFT) |
295 | EXPA = sub(EXPA,LEFTSHIFT) |
296 | } |
297 | { |
298 | CTMP |= lsr(PP_LL,RIGHTSHIFT) |
299 | EXACT = cmp.gtu(ONE,STICKIES) |
300 | TMP = #BIAS+BIAS-2 |
301 | } |
302 | { |
303 | if (!EXACT) CTMPL = or(CTMPL,S_ONE) |
304 | // If EXPA is overflow/underflow, jump to ovf_unf |
305 | P_TMP = !cmp.gt(EXPA,TMP) |
306 | P_TMP = cmp.gt(EXPA,#1) |
307 | if (!P_TMP.new) jump:nt .Lfma_ovf_unf |
308 | } |
309 | { |
310 | // XXX: FIXME: should PP_HH for check of zero be CTMP? |
311 | P_TMP = cmp.gtu(ONE,CTMP) // is result true zero? |
312 | A = convert_d2df(CTMP) |
313 | EXPA = add(EXPA,#-BIAS-60) |
314 | PP_HH = memd(r29+#0) |
315 | } |
316 | { |
317 | AH += asl(EXPA,#HI_MANTBITS) |
318 | EXPCA = memd(r29+#8) |
319 | if (!P_TMP) dealloc_return // not zero, return |
320 | } |
321 | .Ladd_yields_zero: |
322 | // We had full cancellation. Return +/- zero (-0 when round-down) |
323 | { |
324 | TMP = USR |
325 | A = #0 |
326 | } |
327 | { |
328 | TMP = extractu(TMP,#2,#SR_ROUND_OFF) |
329 | PP_HH = memd(r29+#0) |
330 | EXPCA = memd(r29+#8) |
331 | } |
332 | { |
333 | p0 = cmp.eq(TMP,#2) |
334 | if (p0.new) AH = ##0x80000000 |
335 | dealloc_return |
336 | } |
337 | |
338 | #undef RIGHTLEFTSHIFT |
339 | #undef RIGHTSHIFT |
340 | #undef LEFTSHIFT |
341 | #undef CTMP2 |
342 | #undef CTMP2H |
343 | #undef CTMP2L |
344 | |
345 | .Lfma_ovf_unf: |
346 | { |
347 | p0 = cmp.gtu(ONE,CTMP) |
348 | if (p0.new) jump:nt .Ladd_yields_zero |
349 | } |
350 | { |
351 | A = convert_d2df(CTMP) |
352 | EXPA = add(EXPA,#-BIAS-60) |
353 | TMP = EXPA |
354 | } |
355 | #define NEW_EXPB r7 |
356 | #define NEW_EXPA r6 |
357 | { |
358 | AH += asl(EXPA,#HI_MANTBITS) |
359 | NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS) |
360 | } |
361 | { |
362 | NEW_EXPA = add(EXPA,NEW_EXPB) |
363 | PP_HH = memd(r29+#0) |
364 | EXPCA = memd(r29+#8) |
365 | #undef PP_HH |
366 | #undef PP_HH_H |
367 | #undef PP_HH_L |
368 | #undef EXPCA |
369 | #undef EXPC |
370 | #undef EXPA |
371 | #undef PP_LL |
372 | #undef PP_LL_H |
373 | #undef PP_LL_L |
374 | #define EXPA r6 |
375 | #define EXPB r7 |
376 | #define EXPBA r7:6 |
377 | #define ATMP r9:8 |
378 | #define ATMPH r9 |
379 | #define ATMPL r8 |
380 | #undef NEW_EXPB |
381 | #undef NEW_EXPA |
382 | ATMP = abs(CTMP) |
383 | } |
384 | { |
385 | p0 = cmp.gt(EXPA,##BIAS+BIAS) |
386 | if (p0.new) jump:nt .Lfma_ovf |
387 | } |
388 | { |
389 | p0 = cmp.gt(EXPA,#0) |
390 | if (p0.new) jump:nt .Lpossible_unf |
391 | } |
392 | { |
393 | // TMP has original EXPA. |
394 | // ATMP is corresponding value |
395 | // Normalize ATMP and shift right to correct location |
396 | EXPB = add(clb(ATMP),#-2) // Amount to left shift to normalize |
397 | EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize |
398 | p3 = cmp.gt(CTMPH,#-1) |
399 | } |
400 | // Underflow |
401 | // We know that the infinte range exponent should be EXPA |
402 | // CTMP is 2's complement, ATMP is abs(CTMP) |
403 | { |
404 | EXPA = add(EXPA,EXPB) // how much to shift back right |
405 | ATMP = asl(ATMP,EXPB) // shift left |
406 | AH = USR |
407 | TMP = #63 |
408 | } |
409 | { |
410 | EXPB = min(EXPA,TMP) |
411 | EXPA = #0 |
412 | AL = #0x0030 |
413 | } |
414 | { |
415 | B = extractu(ATMP,EXPBA) |
416 | ATMP = asr(ATMP,EXPB) |
417 | } |
418 | { |
419 | p0 = cmp.gtu(ONE,B) |
420 | if (!p0.new) ATMPL = or(ATMPL,S_ONE) |
421 | ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2) |
422 | } |
423 | { |
424 | CTMP = neg(ATMP) |
425 | p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1) |
426 | if (!p1.new) AH = or(AH,AL) |
427 | B = #0 |
428 | } |
429 | { |
430 | if (p3) CTMP = ATMP |
431 | USR = AH |
432 | TMP = #-BIAS-(MANTBITS+FUDGE2) |
433 | } |
434 | { |
435 | A = convert_d2df(CTMP) |
436 | } |
437 | { |
438 | AH += asl(TMP,#HI_MANTBITS) |
439 | dealloc_return |
440 | } |
441 | .Lpossible_unf: |
442 | { |
443 | TMP = ##0x7fefffff |
444 | ATMP = abs(CTMP) |
445 | } |
446 | { |
447 | p0 = cmp.eq(AL,#0) |
448 | p0 = bitsclr(AH,TMP) |
449 | if (!p0.new) dealloc_return:t |
450 | TMP = #0x7fff |
451 | } |
452 | { |
453 | p0 = bitsset(ATMPH,TMP) |
454 | BH = USR |
455 | BL = #0x0030 |
456 | } |
457 | { |
458 | if (p0) BH = or(BH,BL) |
459 | } |
460 | { |
461 | USR = BH |
462 | } |
463 | { |
464 | p0 = dfcmp.eq(A,A) |
465 | dealloc_return |
466 | } |
467 | .Lfma_ovf: |
468 | { |
469 | TMP = USR |
470 | CTMP = combine(##0x7fefffff,#-1) |
471 | A = CTMP |
472 | } |
473 | { |
474 | ATMP = combine(##0x7ff00000,#0) |
475 | BH = extractu(TMP,#2,#SR_ROUND_OFF) |
476 | TMP = or(TMP,#0x28) |
477 | } |
478 | { |
479 | USR = TMP |
480 | BH ^= lsr(AH,#31) |
481 | BL = BH |
482 | } |
483 | { |
484 | p0 = !cmp.eq(BL,#1) |
485 | p0 = !cmp.eq(BH,#2) |
486 | } |
487 | { |
488 | p0 = dfcmp.eq(ATMP,ATMP) |
489 | if (p0.new) CTMP = ATMP |
490 | } |
491 | { |
492 | A = insert(CTMP,#63,#0) |
493 | dealloc_return |
494 | } |
495 | #undef CTMP |
496 | #undef CTMPH |
497 | #undef CTMPL |
498 | #define BTMP r11:10 |
499 | #define BTMPH r11 |
500 | #define BTMPL r10 |
501 | |
502 | #undef STICKIES |
503 | #undef STICKIESH |
504 | #undef STICKIESL |
505 | #define C r5:4 |
506 | #define CH r5 |
507 | #define CL r4 |
508 | |
509 | .Lfma_abnormal_ab: |
510 | { |
511 | ATMP = extractu(A,#63,#0) |
512 | BTMP = extractu(B,#63,#0) |
513 | deallocframe |
514 | } |
515 | { |
516 | p3 = cmp.gtu(ATMP,BTMP) |
517 | if (!p3.new) A = B // sort values |
518 | if (!p3.new) B = A |
519 | } |
520 | { |
521 | p0 = dfclass(A,#0x0f) // A NaN? |
522 | if (!p0.new) jump:nt .Lnan |
523 | if (!p3) ATMP = BTMP |
524 | if (!p3) BTMP = ATMP |
525 | } |
526 | { |
527 | p1 = dfclass(A,#0x08) // A is infinity |
528 | p1 = dfclass(B,#0x0e) // B is nonzero |
529 | } |
530 | { |
531 | p0 = dfclass(A,#0x08) // a is inf |
532 | p0 = dfclass(B,#0x01) // b is zero |
533 | } |
534 | { |
535 | if (p1) jump .Lab_inf |
536 | p2 = dfclass(B,#0x01) |
537 | } |
538 | { |
539 | if (p0) jump .Linvalid |
540 | if (p2) jump .Lab_true_zero |
541 | TMP = ##0x7c000000 |
542 | } |
543 | // We are left with a normal or subnormal times a subnormal, A > B |
544 | // If A and B are both very small, we will go to a single sticky bit; replace |
545 | // A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results |
546 | // if A and B might multiply to something bigger, decrease A exp and increase B exp |
547 | // and start over |
548 | { |
549 | p0 = bitsclr(AH,TMP) |
550 | if (p0.new) jump:nt .Lfma_ab_tiny |
551 | } |
552 | { |
553 | TMP = add(clb(BTMP),#-EXPBITS) |
554 | } |
555 | { |
556 | BTMP = asl(BTMP,TMP) |
557 | } |
558 | { |
559 | B = insert(BTMP,#63,#0) |
560 | AH -= asl(TMP,#HI_MANTBITS) |
561 | } |
562 | jump .Lfma_begin |
563 | |
564 | .Lfma_ab_tiny: |
565 | ATMP = combine(##0x00100000,#0) |
566 | { |
567 | A = insert(ATMP,#63,#0) |
568 | B = insert(ATMP,#63,#0) |
569 | } |
570 | jump .Lfma_begin |
571 | |
572 | .Lab_inf: |
573 | { |
574 | B = lsr(B,#63) |
575 | p0 = dfclass(C,#0x10) |
576 | } |
577 | { |
578 | A ^= asl(B,#63) |
579 | if (p0) jump .Lnan |
580 | } |
581 | { |
582 | p1 = dfclass(C,#0x08) |
583 | if (p1.new) jump:nt .Lfma_inf_plus_inf |
584 | } |
585 | // A*B is +/- inf, C is finite. Return A |
586 | { |
587 | jumpr r31 |
588 | } |
589 | .falign |
590 | .Lfma_inf_plus_inf: |
591 | { // adding infinities of different signs is invalid |
592 | p0 = dfcmp.eq(A,C) |
593 | if (!p0.new) jump:nt .Linvalid |
594 | } |
595 | { |
596 | jumpr r31 |
597 | } |
598 | |
599 | .Lnan: |
600 | { |
601 | p0 = dfclass(B,#0x10) |
602 | p1 = dfclass(C,#0x10) |
603 | if (!p0.new) B = A |
604 | if (!p1.new) C = A |
605 | } |
606 | { // find sNaNs |
607 | BH = convert_df2sf(B) |
608 | BL = convert_df2sf(C) |
609 | } |
610 | { |
611 | BH = convert_df2sf(A) |
612 | A = #-1 |
613 | jumpr r31 |
614 | } |
615 | |
616 | .Linvalid: |
617 | { |
618 | TMP = ##0x7f800001 // sp snan |
619 | } |
620 | { |
621 | A = convert_sf2df(TMP) |
622 | jumpr r31 |
623 | } |
624 | |
625 | .Lab_true_zero: |
626 | // B is zero, A is finite number |
627 | { |
628 | p0 = dfclass(C,#0x10) |
629 | if (p0.new) jump:nt .Lnan |
630 | if (p0.new) A = C |
631 | } |
632 | { |
633 | p0 = dfcmp.eq(B,C) // is C also zero? |
634 | AH = lsr(AH,#31) // get sign |
635 | } |
636 | { |
637 | BH ^= asl(AH,#31) // form correctly signed zero in B |
638 | if (!p0) A = C // If C is not zero, return C |
639 | if (!p0) jumpr r31 |
640 | } |
641 | // B has correctly signed zero, C is also zero |
642 | .Lzero_plus_zero: |
643 | { |
644 | p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0 |
645 | if (p0.new) jumpr:t r31 |
646 | A = B |
647 | } |
648 | { |
649 | TMP = USR |
650 | } |
651 | { |
652 | TMP = extractu(TMP,#2,#SR_ROUND_OFF) |
653 | A = #0 |
654 | } |
655 | { |
656 | p0 = cmp.eq(TMP,#2) |
657 | if (p0.new) AH = ##0x80000000 |
658 | jumpr r31 |
659 | } |
660 | #undef BTMP |
661 | #undef BTMPH |
662 | #undef BTMPL |
663 | #define CTMP r11:10 |
664 | .falign |
665 | .Lfma_abnormal_c: |
666 | // We know that AB is normal * normal |
667 | // C is not normal: zero, subnormal, inf, or NaN. |
668 | { |
669 | p0 = dfclass(C,#0x10) // is C NaN? |
670 | if (p0.new) jump:nt .Lnan |
671 | if (p0.new) A = C // move NaN to A |
672 | deallocframe |
673 | } |
674 | { |
675 | p0 = dfclass(C,#0x08) // is C inf? |
676 | if (p0.new) A = C // return C |
677 | if (p0.new) jumpr:nt r31 |
678 | } |
679 | // zero or subnormal |
680 | // If we have a zero, and we know AB is normal*normal, we can just call normal multiply |
681 | { |
682 | p0 = dfclass(C,#0x01) // is C zero? |
683 | if (p0.new) jump:nt __hexagon_muldf3 |
684 | TMP = #1 |
685 | } |
686 | // Left with: subnormal |
687 | // Adjust C and jump back to restart |
688 | { |
689 | allocframe(#STACKSPACE) // oops, deallocated above, re-allocate frame |
690 | CTMP = #0 |
691 | CH = insert(TMP,#EXPBITS,#HI_MANTBITS) |
692 | jump .Lfma_abnormal_c_restart |
693 | } |
694 | END(fma) |
695 | |