| 1 | //===----------------------Hexagon builtin routine ------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG |
| 10 | #define END(TAG) .size TAG,.-TAG |
| 11 | |
| 12 | // Double Precision Multiply |
| 13 | |
| 14 | |
| 15 | #define A r1:0 |
| 16 | #define AH r1 |
| 17 | #define AL r0 |
| 18 | #define B r3:2 |
| 19 | #define BH r3 |
| 20 | #define BL r2 |
| 21 | #define C r5:4 |
| 22 | #define CH r5 |
| 23 | #define CL r4 |
| 24 | |
| 25 | |
| 26 | |
| 27 | #define BTMP r15:14 |
| 28 | #define BTMPH r15 |
| 29 | #define BTMPL r14 |
| 30 | |
| 31 | #define ATMP r13:12 |
| 32 | #define ATMPH r13 |
| 33 | #define ATMPL r12 |
| 34 | |
| 35 | #define CTMP r11:10 |
| 36 | #define CTMPH r11 |
| 37 | #define CTMPL r10 |
| 38 | |
| 39 | #define PP_LL r9:8 |
| 40 | #define PP_LL_H r9 |
| 41 | #define PP_LL_L r8 |
| 42 | |
| 43 | #define PP_ODD r7:6 |
| 44 | #define PP_ODD_H r7 |
| 45 | #define PP_ODD_L r6 |
| 46 | |
| 47 | |
| 48 | #define PP_HH r17:16 |
| 49 | #define PP_HH_H r17 |
| 50 | #define PP_HH_L r16 |
| 51 | |
| 52 | #define EXPA r18 |
| 53 | #define EXPB r19 |
| 54 | #define EXPBA r19:18 |
| 55 | |
| 56 | #define TMP r28 |
| 57 | |
| 58 | #define P_TMP p0 |
| 59 | #define PROD_NEG p3 |
| 60 | #define EXACT p2 |
| 61 | #define SWAP p1 |
| 62 | |
| 63 | #define MANTBITS 52 |
| 64 | #define HI_MANTBITS 20 |
| 65 | #define EXPBITS 11 |
| 66 | #define BIAS 1023 |
| 67 | #define STACKSPACE 32 |
| 68 | |
| 69 | #define ADJUST 4 |
| 70 | |
| 71 | #define FUDGE 7 |
| 72 | #define FUDGE2 3 |
| 73 | |
| 74 | #ifndef SR_ROUND_OFF |
| 75 | #define SR_ROUND_OFF 22 |
| 76 | #endif |
| 77 | |
| 78 | // First, classify for normal values, and abort if abnormal |
| 79 | // |
| 80 | // Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8 |
| 81 | // |
| 82 | // Since we know that the 2 MSBs of the H registers is zero, we should never carry |
| 83 | // the partial products that involve the H registers |
| 84 | // |
| 85 | // Try to buy X slots, at the expense of latency if needed |
| 86 | // |
| 87 | // We will have PP_HH with the upper bits of the product, PP_LL with the lower |
| 88 | // PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts |
| 89 | // PP_HH can have a minimum of 0x0100_0000_0000_0000 |
| 90 | // |
| 91 | // 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS |
| 92 | // |
| 93 | // We need to align CTMP. |
| 94 | // If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add |
| 95 | // If CTMP << PP align CTMP and add 128 bits. Then compute sticky |
| 96 | // If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation. |
| 97 | // |
| 98 | // Convert partial product and CTMP to 2's complement prior to addition |
| 99 | // |
| 100 | // After we add, we need to normalize into upper 64 bits, then compute sticky. |
| 101 | |
| 102 | .text |
| 103 | .global __hexagon_fmadf4 |
| 104 | .type __hexagon_fmadf4,@function |
| 105 | .global __hexagon_fmadf5 |
| 106 | .type __hexagon_fmadf5,@function |
| 107 | Q6_ALIAS(fmadf5) |
| 108 | .p2align 5 |
| 109 | __hexagon_fmadf4: |
| 110 | __hexagon_fmadf5: |
| 111 | .Lfma_begin: |
| 112 | { |
| 113 | P_TMP = dfclass(A,#2) |
| 114 | P_TMP = dfclass(B,#2) |
| 115 | ATMP = #0 |
| 116 | BTMP = #0 |
| 117 | } |
| 118 | { |
| 119 | ATMP = insert(A,#MANTBITS,#EXPBITS-3) |
| 120 | BTMP = insert(B,#MANTBITS,#EXPBITS-3) |
| 121 | PP_ODD_H = ##0x10000000 |
| 122 | allocframe(#STACKSPACE) |
| 123 | } |
| 124 | { |
| 125 | PP_LL = mpyu(ATMPL,BTMPL) |
| 126 | if (!P_TMP) jump .Lfma_abnormal_ab |
| 127 | ATMPH = or(ATMPH,PP_ODD_H) |
| 128 | BTMPH = or(BTMPH,PP_ODD_H) |
| 129 | } |
| 130 | { |
| 131 | P_TMP = dfclass(C,#2) |
| 132 | if (!P_TMP.new) jump:nt .Lfma_abnormal_c |
| 133 | CTMP = combine(PP_ODD_H,#0) |
| 134 | PP_ODD = combine(#0,PP_LL_H) |
| 135 | } |
| 136 | .Lfma_abnormal_c_restart: |
| 137 | { |
| 138 | PP_ODD += mpyu(BTMPL,ATMPH) |
| 139 | CTMP = insert(C,#MANTBITS,#EXPBITS-3) |
| 140 | memd(r29+#0) = PP_HH |
| 141 | memd(r29+#8) = EXPBA |
| 142 | } |
| 143 | { |
| 144 | PP_ODD += mpyu(ATMPL,BTMPH) |
| 145 | EXPBA = neg(CTMP) |
| 146 | P_TMP = cmp.gt(CH,#-1) |
| 147 | TMP = xor(AH,BH) |
| 148 | } |
| 149 | { |
| 150 | EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) |
| 151 | EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) |
| 152 | PP_HH = combine(#0,PP_ODD_H) |
| 153 | if (!P_TMP) CTMP = EXPBA |
| 154 | } |
| 155 | { |
| 156 | PP_HH += mpyu(ATMPH,BTMPH) |
| 157 | PP_LL = combine(PP_ODD_L,PP_LL_L) |
| 158 | #undef PP_ODD |
| 159 | #undef PP_ODD_H |
| 160 | #undef PP_ODD_L |
| 161 | #undef ATMP |
| 162 | #undef ATMPL |
| 163 | #undef ATMPH |
| 164 | #undef BTMP |
| 165 | #undef BTMPL |
| 166 | #undef BTMPH |
| 167 | #define RIGHTLEFTSHIFT r13:12 |
| 168 | #define RIGHTSHIFT r13 |
| 169 | #define LEFTSHIFT r12 |
| 170 | |
| 171 | EXPA = add(EXPA,EXPB) |
| 172 | #undef EXPB |
| 173 | #undef EXPBA |
| 174 | #define EXPC r19 |
| 175 | #define EXPCA r19:18 |
| 176 | EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS) |
| 177 | } |
| 178 | // PP_HH:PP_LL now has product |
| 179 | // CTMP is negated |
| 180 | // EXPA,B,C are extracted |
| 181 | // We need to negate PP |
| 182 | // Since we will be adding with carry later, if we need to negate, |
| 183 | // just invert all bits now, which we can do conditionally and in parallel |
| 184 | #define PP_HH_TMP r15:14 |
| 185 | #define PP_LL_TMP r7:6 |
| 186 | { |
| 187 | EXPA = add(EXPA,#-BIAS+(ADJUST)) |
| 188 | PROD_NEG = !cmp.gt(TMP,#-1) |
| 189 | PP_LL_TMP = #0 |
| 190 | PP_HH_TMP = #0 |
| 191 | } |
| 192 | { |
| 193 | PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry |
| 194 | P_TMP = !cmp.gt(TMP,#-1) |
| 195 | SWAP = cmp.gt(EXPC,EXPA) // If C >> PP |
| 196 | if (SWAP.new) EXPCA = combine(EXPA,EXPC) |
| 197 | } |
| 198 | { |
| 199 | PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry |
| 200 | if (P_TMP) PP_LL = PP_LL_TMP |
| 201 | #undef PP_LL_TMP |
| 202 | #define CTMP2 r7:6 |
| 203 | #define CTMP2H r7 |
| 204 | #define CTMP2L r6 |
| 205 | CTMP2 = #0 |
| 206 | EXPC = sub(EXPA,EXPC) |
| 207 | } |
| 208 | { |
| 209 | if (P_TMP) PP_HH = PP_HH_TMP |
| 210 | P_TMP = cmp.gt(EXPC,#63) |
| 211 | if (SWAP) PP_LL = CTMP2 |
| 212 | if (SWAP) CTMP2 = PP_LL |
| 213 | } |
| 214 | #undef PP_HH_TMP |
| 215 | //#define ONE r15:14 |
| 216 | //#define S_ONE r14 |
| 217 | #define ZERO r15:14 |
| 218 | #define S_ZERO r15 |
| 219 | #undef PROD_NEG |
| 220 | #define P_CARRY p3 |
| 221 | { |
| 222 | if (SWAP) PP_HH = CTMP // Swap C and PP |
| 223 | if (SWAP) CTMP = PP_HH |
| 224 | if (P_TMP) EXPC = add(EXPC,#-64) |
| 225 | TMP = #63 |
| 226 | } |
| 227 | { |
| 228 | // If diff > 63, pre-shift-right by 64... |
| 229 | if (P_TMP) CTMP2 = CTMP |
| 230 | TMP = asr(CTMPH,#31) |
| 231 | RIGHTSHIFT = min(EXPC,TMP) |
| 232 | LEFTSHIFT = #0 |
| 233 | } |
| 234 | #undef C |
| 235 | #undef CH |
| 236 | #undef CL |
| 237 | #define STICKIES r5:4 |
| 238 | #define STICKIESH r5 |
| 239 | #define STICKIESL r4 |
| 240 | { |
| 241 | if (P_TMP) CTMP = combine(TMP,TMP) // sign extension of pre-shift-right-64 |
| 242 | STICKIES = extract(CTMP2,RIGHTLEFTSHIFT) |
| 243 | CTMP2 = lsr(CTMP2,RIGHTSHIFT) |
| 244 | LEFTSHIFT = sub(#64,RIGHTSHIFT) |
| 245 | } |
| 246 | { |
| 247 | ZERO = #0 |
| 248 | TMP = #-2 |
| 249 | CTMP2 |= lsl(CTMP,LEFTSHIFT) |
| 250 | CTMP = asr(CTMP,RIGHTSHIFT) |
| 251 | } |
| 252 | { |
| 253 | P_CARRY = cmp.gtu(STICKIES,ZERO) // If we have sticky bits from C shift |
| 254 | if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR |
| 255 | #undef ZERO |
| 256 | #define ONE r15:14 |
| 257 | #define S_ONE r14 |
| 258 | ONE = #1 |
| 259 | STICKIES = #0 |
| 260 | } |
| 261 | { |
| 262 | PP_LL = add(CTMP2,PP_LL,P_CARRY):carry // use the carry to add the sticky |
| 263 | } |
| 264 | { |
| 265 | PP_HH = add(CTMP,PP_HH,P_CARRY):carry |
| 266 | TMP = #62 |
| 267 | } |
| 268 | // PP_HH:PP_LL now holds the sum |
| 269 | // We may need to normalize left, up to ??? bits. |
| 270 | // |
| 271 | // I think that if we have massive cancellation, the range we normalize by |
| 272 | // is still limited |
| 273 | { |
| 274 | LEFTSHIFT = add(clb(PP_HH),#-2) |
| 275 | if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits? |
| 276 | } |
| 277 | // We had all sign bits, shift left by 62. |
| 278 | { |
| 279 | CTMP = extractu(PP_LL,#62,#2) |
| 280 | PP_LL = asl(PP_LL,#62) |
| 281 | EXPA = add(EXPA,#-62) // And adjust exponent of result |
| 282 | } |
| 283 | { |
| 284 | PP_HH = insert(CTMP,#62,#0) // Then shift 63 |
| 285 | } |
| 286 | { |
| 287 | LEFTSHIFT = add(clb(PP_HH),#-2) |
| 288 | } |
| 289 | .falign |
| 290 | 1: |
| 291 | { |
| 292 | CTMP = asl(PP_HH,LEFTSHIFT) |
| 293 | STICKIES |= asl(PP_LL,LEFTSHIFT) |
| 294 | RIGHTSHIFT = sub(#64,LEFTSHIFT) |
| 295 | EXPA = sub(EXPA,LEFTSHIFT) |
| 296 | } |
| 297 | { |
| 298 | CTMP |= lsr(PP_LL,RIGHTSHIFT) |
| 299 | EXACT = cmp.gtu(ONE,STICKIES) |
| 300 | TMP = #BIAS+BIAS-2 |
| 301 | } |
| 302 | { |
| 303 | if (!EXACT) CTMPL = or(CTMPL,S_ONE) |
| 304 | // If EXPA is overflow/underflow, jump to ovf_unf |
| 305 | P_TMP = !cmp.gt(EXPA,TMP) |
| 306 | P_TMP = cmp.gt(EXPA,#1) |
| 307 | if (!P_TMP.new) jump:nt .Lfma_ovf_unf |
| 308 | } |
| 309 | { |
| 310 | // XXX: FIXME: should PP_HH for check of zero be CTMP? |
| 311 | P_TMP = cmp.gtu(ONE,CTMP) // is result true zero? |
| 312 | A = convert_d2df(CTMP) |
| 313 | EXPA = add(EXPA,#-BIAS-60) |
| 314 | PP_HH = memd(r29+#0) |
| 315 | } |
| 316 | { |
| 317 | AH += asl(EXPA,#HI_MANTBITS) |
| 318 | EXPCA = memd(r29+#8) |
| 319 | if (!P_TMP) dealloc_return // not zero, return |
| 320 | } |
| 321 | .Ladd_yields_zero: |
| 322 | // We had full cancellation. Return +/- zero (-0 when round-down) |
| 323 | { |
| 324 | TMP = USR |
| 325 | A = #0 |
| 326 | } |
| 327 | { |
| 328 | TMP = extractu(TMP,#2,#SR_ROUND_OFF) |
| 329 | PP_HH = memd(r29+#0) |
| 330 | EXPCA = memd(r29+#8) |
| 331 | } |
| 332 | { |
| 333 | p0 = cmp.eq(TMP,#2) |
| 334 | if (p0.new) AH = ##0x80000000 |
| 335 | dealloc_return |
| 336 | } |
| 337 | |
| 338 | #undef RIGHTLEFTSHIFT |
| 339 | #undef RIGHTSHIFT |
| 340 | #undef LEFTSHIFT |
| 341 | #undef CTMP2 |
| 342 | #undef CTMP2H |
| 343 | #undef CTMP2L |
| 344 | |
| 345 | .Lfma_ovf_unf: |
| 346 | { |
| 347 | p0 = cmp.gtu(ONE,CTMP) |
| 348 | if (p0.new) jump:nt .Ladd_yields_zero |
| 349 | } |
| 350 | { |
| 351 | A = convert_d2df(CTMP) |
| 352 | EXPA = add(EXPA,#-BIAS-60) |
| 353 | TMP = EXPA |
| 354 | } |
| 355 | #define NEW_EXPB r7 |
| 356 | #define NEW_EXPA r6 |
| 357 | { |
| 358 | AH += asl(EXPA,#HI_MANTBITS) |
| 359 | NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS) |
| 360 | } |
| 361 | { |
| 362 | NEW_EXPA = add(EXPA,NEW_EXPB) |
| 363 | PP_HH = memd(r29+#0) |
| 364 | EXPCA = memd(r29+#8) |
| 365 | #undef PP_HH |
| 366 | #undef PP_HH_H |
| 367 | #undef PP_HH_L |
| 368 | #undef EXPCA |
| 369 | #undef EXPC |
| 370 | #undef EXPA |
| 371 | #undef PP_LL |
| 372 | #undef PP_LL_H |
| 373 | #undef PP_LL_L |
| 374 | #define EXPA r6 |
| 375 | #define EXPB r7 |
| 376 | #define EXPBA r7:6 |
| 377 | #define ATMP r9:8 |
| 378 | #define ATMPH r9 |
| 379 | #define ATMPL r8 |
| 380 | #undef NEW_EXPB |
| 381 | #undef NEW_EXPA |
| 382 | ATMP = abs(CTMP) |
| 383 | } |
| 384 | { |
| 385 | p0 = cmp.gt(EXPA,##BIAS+BIAS) |
| 386 | if (p0.new) jump:nt .Lfma_ovf |
| 387 | } |
| 388 | { |
| 389 | p0 = cmp.gt(EXPA,#0) |
| 390 | if (p0.new) jump:nt .Lpossible_unf |
| 391 | } |
| 392 | { |
| 393 | // TMP has original EXPA. |
| 394 | // ATMP is corresponding value |
| 395 | // Normalize ATMP and shift right to correct location |
| 396 | EXPB = add(clb(ATMP),#-2) // Amount to left shift to normalize |
| 397 | EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize |
| 398 | p3 = cmp.gt(CTMPH,#-1) |
| 399 | } |
| 400 | // Underflow |
| 401 | // We know that the infinte range exponent should be EXPA |
| 402 | // CTMP is 2's complement, ATMP is abs(CTMP) |
| 403 | { |
| 404 | EXPA = add(EXPA,EXPB) // how much to shift back right |
| 405 | ATMP = asl(ATMP,EXPB) // shift left |
| 406 | AH = USR |
| 407 | TMP = #63 |
| 408 | } |
| 409 | { |
| 410 | EXPB = min(EXPA,TMP) |
| 411 | EXPA = #0 |
| 412 | AL = #0x0030 |
| 413 | } |
| 414 | { |
| 415 | B = extractu(ATMP,EXPBA) |
| 416 | ATMP = asr(ATMP,EXPB) |
| 417 | } |
| 418 | { |
| 419 | p0 = cmp.gtu(ONE,B) |
| 420 | if (!p0.new) ATMPL = or(ATMPL,S_ONE) |
| 421 | ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2) |
| 422 | } |
| 423 | { |
| 424 | CTMP = neg(ATMP) |
| 425 | p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1) |
| 426 | if (!p1.new) AH = or(AH,AL) |
| 427 | B = #0 |
| 428 | } |
| 429 | { |
| 430 | if (p3) CTMP = ATMP |
| 431 | USR = AH |
| 432 | TMP = #-BIAS-(MANTBITS+FUDGE2) |
| 433 | } |
| 434 | { |
| 435 | A = convert_d2df(CTMP) |
| 436 | } |
| 437 | { |
| 438 | AH += asl(TMP,#HI_MANTBITS) |
| 439 | dealloc_return |
| 440 | } |
| 441 | .Lpossible_unf: |
| 442 | { |
| 443 | TMP = ##0x7fefffff |
| 444 | ATMP = abs(CTMP) |
| 445 | } |
| 446 | { |
| 447 | p0 = cmp.eq(AL,#0) |
| 448 | p0 = bitsclr(AH,TMP) |
| 449 | if (!p0.new) dealloc_return:t |
| 450 | TMP = #0x7fff |
| 451 | } |
| 452 | { |
| 453 | p0 = bitsset(ATMPH,TMP) |
| 454 | BH = USR |
| 455 | BL = #0x0030 |
| 456 | } |
| 457 | { |
| 458 | if (p0) BH = or(BH,BL) |
| 459 | } |
| 460 | { |
| 461 | USR = BH |
| 462 | } |
| 463 | { |
| 464 | p0 = dfcmp.eq(A,A) |
| 465 | dealloc_return |
| 466 | } |
| 467 | .Lfma_ovf: |
| 468 | { |
| 469 | TMP = USR |
| 470 | CTMP = combine(##0x7fefffff,#-1) |
| 471 | A = CTMP |
| 472 | } |
| 473 | { |
| 474 | ATMP = combine(##0x7ff00000,#0) |
| 475 | BH = extractu(TMP,#2,#SR_ROUND_OFF) |
| 476 | TMP = or(TMP,#0x28) |
| 477 | } |
| 478 | { |
| 479 | USR = TMP |
| 480 | BH ^= lsr(AH,#31) |
| 481 | BL = BH |
| 482 | } |
| 483 | { |
| 484 | p0 = !cmp.eq(BL,#1) |
| 485 | p0 = !cmp.eq(BH,#2) |
| 486 | } |
| 487 | { |
| 488 | p0 = dfcmp.eq(ATMP,ATMP) |
| 489 | if (p0.new) CTMP = ATMP |
| 490 | } |
| 491 | { |
| 492 | A = insert(CTMP,#63,#0) |
| 493 | dealloc_return |
| 494 | } |
| 495 | #undef CTMP |
| 496 | #undef CTMPH |
| 497 | #undef CTMPL |
| 498 | #define BTMP r11:10 |
| 499 | #define BTMPH r11 |
| 500 | #define BTMPL r10 |
| 501 | |
| 502 | #undef STICKIES |
| 503 | #undef STICKIESH |
| 504 | #undef STICKIESL |
| 505 | #define C r5:4 |
| 506 | #define CH r5 |
| 507 | #define CL r4 |
| 508 | |
| 509 | .Lfma_abnormal_ab: |
| 510 | { |
| 511 | ATMP = extractu(A,#63,#0) |
| 512 | BTMP = extractu(B,#63,#0) |
| 513 | deallocframe |
| 514 | } |
| 515 | { |
| 516 | p3 = cmp.gtu(ATMP,BTMP) |
| 517 | if (!p3.new) A = B // sort values |
| 518 | if (!p3.new) B = A |
| 519 | } |
| 520 | { |
| 521 | p0 = dfclass(A,#0x0f) // A NaN? |
| 522 | if (!p0.new) jump:nt .Lnan |
| 523 | if (!p3) ATMP = BTMP |
| 524 | if (!p3) BTMP = ATMP |
| 525 | } |
| 526 | { |
| 527 | p1 = dfclass(A,#0x08) // A is infinity |
| 528 | p1 = dfclass(B,#0x0e) // B is nonzero |
| 529 | } |
| 530 | { |
| 531 | p0 = dfclass(A,#0x08) // a is inf |
| 532 | p0 = dfclass(B,#0x01) // b is zero |
| 533 | } |
| 534 | { |
| 535 | if (p1) jump .Lab_inf |
| 536 | p2 = dfclass(B,#0x01) |
| 537 | } |
| 538 | { |
| 539 | if (p0) jump .Linvalid |
| 540 | if (p2) jump .Lab_true_zero |
| 541 | TMP = ##0x7c000000 |
| 542 | } |
| 543 | // We are left with a normal or subnormal times a subnormal, A > B |
| 544 | // If A and B are both very small, we will go to a single sticky bit; replace |
| 545 | // A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results |
| 546 | // if A and B might multiply to something bigger, decrease A exp and increase B exp |
| 547 | // and start over |
| 548 | { |
| 549 | p0 = bitsclr(AH,TMP) |
| 550 | if (p0.new) jump:nt .Lfma_ab_tiny |
| 551 | } |
| 552 | { |
| 553 | TMP = add(clb(BTMP),#-EXPBITS) |
| 554 | } |
| 555 | { |
| 556 | BTMP = asl(BTMP,TMP) |
| 557 | } |
| 558 | { |
| 559 | B = insert(BTMP,#63,#0) |
| 560 | AH -= asl(TMP,#HI_MANTBITS) |
| 561 | } |
| 562 | jump .Lfma_begin |
| 563 | |
| 564 | .Lfma_ab_tiny: |
| 565 | ATMP = combine(##0x00100000,#0) |
| 566 | { |
| 567 | A = insert(ATMP,#63,#0) |
| 568 | B = insert(ATMP,#63,#0) |
| 569 | } |
| 570 | jump .Lfma_begin |
| 571 | |
| 572 | .Lab_inf: |
| 573 | { |
| 574 | B = lsr(B,#63) |
| 575 | p0 = dfclass(C,#0x10) |
| 576 | } |
| 577 | { |
| 578 | A ^= asl(B,#63) |
| 579 | if (p0) jump .Lnan |
| 580 | } |
| 581 | { |
| 582 | p1 = dfclass(C,#0x08) |
| 583 | if (p1.new) jump:nt .Lfma_inf_plus_inf |
| 584 | } |
| 585 | // A*B is +/- inf, C is finite. Return A |
| 586 | { |
| 587 | jumpr r31 |
| 588 | } |
| 589 | .falign |
| 590 | .Lfma_inf_plus_inf: |
| 591 | { // adding infinities of different signs is invalid |
| 592 | p0 = dfcmp.eq(A,C) |
| 593 | if (!p0.new) jump:nt .Linvalid |
| 594 | } |
| 595 | { |
| 596 | jumpr r31 |
| 597 | } |
| 598 | |
| 599 | .Lnan: |
| 600 | { |
| 601 | p0 = dfclass(B,#0x10) |
| 602 | p1 = dfclass(C,#0x10) |
| 603 | if (!p0.new) B = A |
| 604 | if (!p1.new) C = A |
| 605 | } |
| 606 | { // find sNaNs |
| 607 | BH = convert_df2sf(B) |
| 608 | BL = convert_df2sf(C) |
| 609 | } |
| 610 | { |
| 611 | BH = convert_df2sf(A) |
| 612 | A = #-1 |
| 613 | jumpr r31 |
| 614 | } |
| 615 | |
| 616 | .Linvalid: |
| 617 | { |
| 618 | TMP = ##0x7f800001 // sp snan |
| 619 | } |
| 620 | { |
| 621 | A = convert_sf2df(TMP) |
| 622 | jumpr r31 |
| 623 | } |
| 624 | |
| 625 | .Lab_true_zero: |
| 626 | // B is zero, A is finite number |
| 627 | { |
| 628 | p0 = dfclass(C,#0x10) |
| 629 | if (p0.new) jump:nt .Lnan |
| 630 | if (p0.new) A = C |
| 631 | } |
| 632 | { |
| 633 | p0 = dfcmp.eq(B,C) // is C also zero? |
| 634 | AH = lsr(AH,#31) // get sign |
| 635 | } |
| 636 | { |
| 637 | BH ^= asl(AH,#31) // form correctly signed zero in B |
| 638 | if (!p0) A = C // If C is not zero, return C |
| 639 | if (!p0) jumpr r31 |
| 640 | } |
| 641 | // B has correctly signed zero, C is also zero |
| 642 | .Lzero_plus_zero: |
| 643 | { |
| 644 | p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0 |
| 645 | if (p0.new) jumpr:t r31 |
| 646 | A = B |
| 647 | } |
| 648 | { |
| 649 | TMP = USR |
| 650 | } |
| 651 | { |
| 652 | TMP = extractu(TMP,#2,#SR_ROUND_OFF) |
| 653 | A = #0 |
| 654 | } |
| 655 | { |
| 656 | p0 = cmp.eq(TMP,#2) |
| 657 | if (p0.new) AH = ##0x80000000 |
| 658 | jumpr r31 |
| 659 | } |
| 660 | #undef BTMP |
| 661 | #undef BTMPH |
| 662 | #undef BTMPL |
| 663 | #define CTMP r11:10 |
| 664 | .falign |
| 665 | .Lfma_abnormal_c: |
| 666 | // We know that AB is normal * normal |
| 667 | // C is not normal: zero, subnormal, inf, or NaN. |
| 668 | { |
| 669 | p0 = dfclass(C,#0x10) // is C NaN? |
| 670 | if (p0.new) jump:nt .Lnan |
| 671 | if (p0.new) A = C // move NaN to A |
| 672 | deallocframe |
| 673 | } |
| 674 | { |
| 675 | p0 = dfclass(C,#0x08) // is C inf? |
| 676 | if (p0.new) A = C // return C |
| 677 | if (p0.new) jumpr:nt r31 |
| 678 | } |
| 679 | // zero or subnormal |
| 680 | // If we have a zero, and we know AB is normal*normal, we can just call normal multiply |
| 681 | { |
| 682 | p0 = dfclass(C,#0x01) // is C zero? |
| 683 | if (p0.new) jump:nt __hexagon_muldf3 |
| 684 | TMP = #1 |
| 685 | } |
| 686 | // Left with: subnormal |
| 687 | // Adjust C and jump back to restart |
| 688 | { |
| 689 | allocframe(#STACKSPACE) // oops, deallocated above, re-allocate frame |
| 690 | CTMP = #0 |
| 691 | CH = insert(TMP,#EXPBITS,#HI_MANTBITS) |
| 692 | jump .Lfma_abnormal_c_restart |
| 693 | } |
| 694 | END(fma) |
| 695 | |