| 1 | //===----------------------Hexagon builtin routine ------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | // Double Precision Divide |
| 10 | |
| 11 | #define A r1:0 |
| 12 | #define AH r1 |
| 13 | #define AL r0 |
| 14 | |
| 15 | #define B r3:2 |
| 16 | #define BH r3 |
| 17 | #define BL r2 |
| 18 | |
| 19 | #define Q r5:4 |
| 20 | #define QH r5 |
| 21 | #define QL r4 |
| 22 | |
| 23 | #define PROD r7:6 |
| 24 | #define PRODHI r7 |
| 25 | #define PRODLO r6 |
| 26 | |
| 27 | #define SFONE r8 |
| 28 | #define SFDEN r9 |
| 29 | #define SFERROR r10 |
| 30 | #define SFRECIP r11 |
| 31 | |
| 32 | #define EXPBA r13:12 |
| 33 | #define EXPB r13 |
| 34 | #define EXPA r12 |
| 35 | |
| 36 | #define REMSUB2 r15:14 |
| 37 | |
| 38 | |
| 39 | |
| 40 | #define SIGN r28 |
| 41 | |
| 42 | #define Q_POSITIVE p3 |
| 43 | #define NORMAL p2 |
| 44 | #define NO_OVF_UNF p1 |
| 45 | #define P_TMP p0 |
| 46 | |
| 47 | #define RECIPEST_SHIFT 3 |
| 48 | #define QADJ 61 |
| 49 | |
| 50 | #define DFCLASS_NORMAL 0x02 |
| 51 | #define DFCLASS_NUMBER 0x0F |
| 52 | #define DFCLASS_INFINITE 0x08 |
| 53 | #define DFCLASS_ZERO 0x01 |
| 54 | #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO) |
| 55 | #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE) |
| 56 | |
| 57 | #define DF_MANTBITS 52 |
| 58 | #define DF_EXPBITS 11 |
| 59 | #define SF_MANTBITS 23 |
| 60 | #define SF_EXPBITS 8 |
| 61 | #define DF_BIAS 0x3ff |
| 62 | |
| 63 | #define SR_ROUND_OFF 22 |
| 64 | |
| 65 | #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG |
| 66 | #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG |
| 67 | #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG |
| 68 | #define END(TAG) .size TAG,.-TAG |
| 69 | |
| 70 | .text |
| 71 | .global __hexagon_divdf3 |
| 72 | .type __hexagon_divdf3,@function |
| 73 | Q6_ALIAS(divdf3) |
| 74 | FAST_ALIAS(divdf3) |
| 75 | FAST2_ALIAS(divdf3) |
| 76 | .p2align 5 |
| 77 | __hexagon_divdf3: |
| 78 | { |
| 79 | NORMAL = dfclass(A,#DFCLASS_NORMAL) |
| 80 | NORMAL = dfclass(B,#DFCLASS_NORMAL) |
| 81 | EXPBA = combine(BH,AH) |
| 82 | SIGN = xor(AH,BH) |
| 83 | } |
| 84 | #undef A |
| 85 | #undef AH |
| 86 | #undef AL |
| 87 | #undef B |
| 88 | #undef BH |
| 89 | #undef BL |
| 90 | #define REM r1:0 |
| 91 | #define REMHI r1 |
| 92 | #define REMLO r0 |
| 93 | #define DENOM r3:2 |
| 94 | #define DENOMHI r3 |
| 95 | #define DENOMLO r2 |
| 96 | { |
| 97 | if (!NORMAL) jump .Ldiv_abnormal |
| 98 | PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) |
| 99 | SFONE = ##0x3f800001 |
| 100 | } |
| 101 | { |
| 102 | SFDEN = or(SFONE,PRODLO) |
| 103 | EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) |
| 104 | EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) |
| 105 | Q_POSITIVE = cmp.gt(SIGN,#-1) |
| 106 | } |
| 107 | #undef SIGN |
| 108 | #define ONE r28 |
| 109 | .Ldenorm_continue: |
| 110 | { |
| 111 | SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN) |
| 112 | SFERROR = and(SFONE,#-2) |
| 113 | ONE = #1 |
| 114 | EXPA = sub(EXPA,EXPB) |
| 115 | } |
| 116 | #undef EXPB |
| 117 | #define RECIPEST r13 |
| 118 | { |
| 119 | SFERROR -= sfmpy(SFRECIP,SFDEN):lib |
| 120 | REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) |
| 121 | RECIPEST = ##0x00800000 << RECIPEST_SHIFT |
| 122 | } |
| 123 | { |
| 124 | SFRECIP += sfmpy(SFRECIP,SFERROR):lib |
| 125 | DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) |
| 126 | SFERROR = and(SFONE,#-2) |
| 127 | } |
| 128 | { |
| 129 | SFERROR -= sfmpy(SFRECIP,SFDEN):lib |
| 130 | QH = #-DF_BIAS+1 |
| 131 | QL = #DF_BIAS-1 |
| 132 | } |
| 133 | { |
| 134 | SFRECIP += sfmpy(SFRECIP,SFERROR):lib |
| 135 | NO_OVF_UNF = cmp.gt(EXPA,QH) |
| 136 | NO_OVF_UNF = !cmp.gt(EXPA,QL) |
| 137 | } |
| 138 | { |
| 139 | RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT) |
| 140 | Q = #0 |
| 141 | EXPA = add(EXPA,#-QADJ) |
| 142 | } |
| 143 | #undef SFERROR |
| 144 | #undef SFRECIP |
| 145 | #define TMP r10 |
| 146 | #define TMP1 r11 |
| 147 | { |
| 148 | RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT)) |
| 149 | } |
| 150 | |
| 151 | #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \ |
| 152 | { \ |
| 153 | PROD = mpyu(RECIPEST,REMHI); \ |
| 154 | REM = asl(REM,# ## ( REMSHIFT )); \ |
| 155 | }; \ |
| 156 | { \ |
| 157 | PRODLO = # ## 0; \ |
| 158 | REM -= mpyu(PRODHI,DENOMLO); \ |
| 159 | REMSUB2 = mpyu(PRODHI,DENOMHI); \ |
| 160 | }; \ |
| 161 | { \ |
| 162 | Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \ |
| 163 | REM -= asl(REMSUB2, # ## 32); \ |
| 164 | EXTRA \ |
| 165 | } |
| 166 | |
| 167 | |
| 168 | DIV_ITER1B(ASL,14,15,) |
| 169 | DIV_ITER1B(ASR,1,15,) |
| 170 | DIV_ITER1B(ASR,16,15,) |
| 171 | DIV_ITER1B(ASR,31,15,PROD=# ( 0 );) |
| 172 | |
| 173 | #undef REMSUB2 |
| 174 | #define TMPPAIR r15:14 |
| 175 | #define TMPPAIRHI r15 |
| 176 | #define TMPPAIRLO r14 |
| 177 | #undef RECIPEST |
| 178 | #define EXPB r13 |
| 179 | { |
| 180 | // compare or sub with carry |
| 181 | TMPPAIR = sub(REM,DENOM) |
| 182 | P_TMP = cmp.gtu(DENOM,REM) |
| 183 | // set up amt to add to q |
| 184 | if (!P_TMP.new) PRODLO = #2 |
| 185 | } |
| 186 | { |
| 187 | Q = add(Q,PROD) |
| 188 | if (!P_TMP) REM = TMPPAIR |
| 189 | TMPPAIR = #0 |
| 190 | } |
| 191 | { |
| 192 | P_TMP = cmp.eq(REM,TMPPAIR) |
| 193 | if (!P_TMP.new) QL = or(QL,ONE) |
| 194 | } |
| 195 | { |
| 196 | PROD = neg(Q) |
| 197 | } |
| 198 | { |
| 199 | if (!Q_POSITIVE) Q = PROD |
| 200 | } |
| 201 | #undef REM |
| 202 | #undef REMHI |
| 203 | #undef REMLO |
| 204 | #undef DENOM |
| 205 | #undef DENOMLO |
| 206 | #undef DENOMHI |
| 207 | #define A r1:0 |
| 208 | #define AH r1 |
| 209 | #define AL r0 |
| 210 | #define B r3:2 |
| 211 | #define BH r3 |
| 212 | #define BL r2 |
| 213 | { |
| 214 | A = convert_d2df(Q) |
| 215 | if (!NO_OVF_UNF) jump .Ldiv_ovf_unf |
| 216 | } |
| 217 | { |
| 218 | AH += asl(EXPA,#DF_MANTBITS-32) |
| 219 | jumpr r31 |
| 220 | } |
| 221 | |
| 222 | .Ldiv_ovf_unf: |
| 223 | { |
| 224 | AH += asl(EXPA,#DF_MANTBITS-32) |
| 225 | EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) |
| 226 | } |
| 227 | { |
| 228 | PROD = abs(Q) |
| 229 | EXPA = add(EXPA,EXPB) |
| 230 | } |
| 231 | { |
| 232 | P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow |
| 233 | if (P_TMP.new) jump:nt .Ldiv_ovf |
| 234 | } |
| 235 | { |
| 236 | P_TMP = cmp.gt(EXPA,#0) |
| 237 | if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible... |
| 238 | } |
| 239 | // Underflow |
| 240 | // We know what the infinite range exponent should be (EXPA) |
| 241 | // Q is 2's complement, PROD is abs(Q) |
| 242 | // Normalize Q, shift right, add a high bit, convert, change exponent |
| 243 | |
| 244 | #define FUDGE1 7 // how much to shift right |
| 245 | #define FUDGE2 4 // how many guard/round to keep at lsbs |
| 246 | |
| 247 | { |
| 248 | EXPB = add(clb(PROD),#-1) // doesn't need to be added in since |
| 249 | EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent |
| 250 | TMP = USR |
| 251 | TMP1 = #63 |
| 252 | } |
| 253 | { |
| 254 | EXPB = min(EXPA,TMP1) |
| 255 | TMP1 = or(TMP,#0x030) |
| 256 | PROD = asl(PROD,EXPB) |
| 257 | EXPA = #0 |
| 258 | } |
| 259 | { |
| 260 | TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out |
| 261 | PROD = lsr(PROD,EXPB) // shift out bits |
| 262 | B = #1 |
| 263 | } |
| 264 | { |
| 265 | P_TMP = cmp.gtu(B,TMPPAIR) |
| 266 | if (!P_TMP.new) PRODLO = or(BL,PRODLO) |
| 267 | PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2) |
| 268 | } |
| 269 | { |
| 270 | Q = neg(PROD) |
| 271 | P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1) |
| 272 | if (!P_TMP.new) TMP = TMP1 |
| 273 | } |
| 274 | { |
| 275 | USR = TMP |
| 276 | if (Q_POSITIVE) Q = PROD |
| 277 | TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2) |
| 278 | } |
| 279 | { |
| 280 | A = convert_d2df(Q) |
| 281 | } |
| 282 | { |
| 283 | AH += asl(TMP,#DF_MANTBITS-32) |
| 284 | jumpr r31 |
| 285 | } |
| 286 | |
| 287 | |
| 288 | .Lpossible_unf: |
| 289 | // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal |
| 290 | // The answer is correct, but we need to raise Underflow |
| 291 | { |
| 292 | B = extractu(A,#63,#0) |
| 293 | TMPPAIR = combine(##0x00100000,#0) // min normal |
| 294 | TMP = #0x7FFF |
| 295 | } |
| 296 | { |
| 297 | P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value... |
| 298 | P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)? |
| 299 | } |
| 300 | |
| 301 | #if (__HEXAGON_ARCH__ == 60) |
| 302 | TMP = USR // If not, just return |
| 303 | if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact |
| 304 | // Note that inexact is already set... |
| 305 | #else |
| 306 | { |
| 307 | if (!P_TMP) jumpr r31 // If not, just return |
| 308 | TMP = USR // Else, we want to set Unf+Inexact |
| 309 | } // Note that inexact is already set... |
| 310 | #endif |
| 311 | { |
| 312 | TMP = or(TMP,#0x30) |
| 313 | } |
| 314 | { |
| 315 | USR = TMP |
| 316 | } |
| 317 | { |
| 318 | p0 = dfcmp.eq(A,A) |
| 319 | jumpr r31 |
| 320 | } |
| 321 | |
| 322 | .Ldiv_ovf: |
| 323 | |
| 324 | // Raise Overflow, and choose the correct overflow value (saturated normal or infinity) |
| 325 | |
| 326 | { |
| 327 | TMP = USR |
| 328 | B = combine(##0x7fefffff,#-1) |
| 329 | AH = mux(Q_POSITIVE,#0,#-1) |
| 330 | } |
| 331 | { |
| 332 | PROD = combine(##0x7ff00000,#0) |
| 333 | QH = extractu(TMP,#2,#SR_ROUND_OFF) |
| 334 | TMP = or(TMP,#0x28) |
| 335 | } |
| 336 | { |
| 337 | USR = TMP |
| 338 | QH ^= lsr(AH,#31) |
| 339 | QL = QH |
| 340 | } |
| 341 | { |
| 342 | p0 = !cmp.eq(QL,#1) // if not round-to-zero |
| 343 | p0 = !cmp.eq(QH,#2) // and not rounding the other way |
| 344 | if (p0.new) B = PROD // go to inf |
| 345 | p0 = dfcmp.eq(B,B) // get exceptions |
| 346 | } |
| 347 | { |
| 348 | A = insert(B,#63,#0) |
| 349 | jumpr r31 |
| 350 | } |
| 351 | |
| 352 | #undef ONE |
| 353 | #define SIGN r28 |
| 354 | #undef NORMAL |
| 355 | #undef NO_OVF_UNF |
| 356 | #define P_INF p1 |
| 357 | #define P_ZERO p2 |
| 358 | .Ldiv_abnormal: |
| 359 | { |
| 360 | P_TMP = dfclass(A,#DFCLASS_NUMBER) |
| 361 | P_TMP = dfclass(B,#DFCLASS_NUMBER) |
| 362 | Q_POSITIVE = cmp.gt(SIGN,#-1) |
| 363 | } |
| 364 | { |
| 365 | P_INF = dfclass(A,#DFCLASS_INFINITE) |
| 366 | P_INF = dfclass(B,#DFCLASS_INFINITE) |
| 367 | } |
| 368 | { |
| 369 | P_ZERO = dfclass(A,#DFCLASS_ZERO) |
| 370 | P_ZERO = dfclass(B,#DFCLASS_ZERO) |
| 371 | } |
| 372 | { |
| 373 | if (!P_TMP) jump .Ldiv_nan |
| 374 | if (P_INF) jump .Ldiv_invalid |
| 375 | } |
| 376 | { |
| 377 | if (P_ZERO) jump .Ldiv_invalid |
| 378 | } |
| 379 | { |
| 380 | P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero |
| 381 | P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite |
| 382 | } |
| 383 | { |
| 384 | P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite |
| 385 | P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero |
| 386 | } |
| 387 | { |
| 388 | if (!P_ZERO) jump .Ldiv_zero_result |
| 389 | if (!P_INF) jump .Ldiv_inf_result |
| 390 | } |
| 391 | // Now we've narrowed it down to (de)normal / (de)normal |
| 392 | // Set up A/EXPA B/EXPB and go back |
| 393 | #undef P_ZERO |
| 394 | #undef P_INF |
| 395 | #define P_TMP2 p1 |
| 396 | { |
| 397 | P_TMP = dfclass(A,#DFCLASS_NORMAL) |
| 398 | P_TMP2 = dfclass(B,#DFCLASS_NORMAL) |
| 399 | TMP = ##0x00100000 |
| 400 | } |
| 401 | { |
| 402 | EXPBA = combine(BH,AH) |
| 403 | AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit |
| 404 | BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit |
| 405 | } |
| 406 | { |
| 407 | if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit |
| 408 | if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit |
| 409 | } |
| 410 | { |
| 411 | QH = add(clb(A),#-DF_EXPBITS) |
| 412 | QL = add(clb(B),#-DF_EXPBITS) |
| 413 | TMP = #1 |
| 414 | } |
| 415 | { |
| 416 | EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) |
| 417 | EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) |
| 418 | } |
| 419 | { |
| 420 | A = asl(A,QH) |
| 421 | B = asl(B,QL) |
| 422 | if (!P_TMP) EXPA = sub(TMP,QH) |
| 423 | if (!P_TMP2) EXPB = sub(TMP,QL) |
| 424 | } // recreate values needed by resume coke |
| 425 | { |
| 426 | PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) |
| 427 | } |
| 428 | { |
| 429 | SFDEN = or(SFONE,PRODLO) |
| 430 | jump .Ldenorm_continue |
| 431 | } |
| 432 | |
| 433 | .Ldiv_zero_result: |
| 434 | { |
| 435 | AH = xor(AH,BH) |
| 436 | B = #0 |
| 437 | } |
| 438 | { |
| 439 | A = insert(B,#63,#0) |
| 440 | jumpr r31 |
| 441 | } |
| 442 | .Ldiv_inf_result: |
| 443 | { |
| 444 | p2 = dfclass(B,#DFCLASS_ZERO) |
| 445 | p2 = dfclass(A,#DFCLASS_NONINFINITE) |
| 446 | } |
| 447 | { |
| 448 | TMP = USR |
| 449 | if (!p2) jump 1f |
| 450 | AH = xor(AH,BH) |
| 451 | } |
| 452 | { |
| 453 | TMP = or(TMP,#0x04) // DBZ |
| 454 | } |
| 455 | { |
| 456 | USR = TMP |
| 457 | } |
| 458 | 1: |
| 459 | { |
| 460 | B = combine(##0x7ff00000,#0) |
| 461 | p0 = dfcmp.uo(B,B) // take possible exception |
| 462 | } |
| 463 | { |
| 464 | A = insert(B,#63,#0) |
| 465 | jumpr r31 |
| 466 | } |
| 467 | .Ldiv_nan: |
| 468 | { |
| 469 | p0 = dfclass(A,#0x10) |
| 470 | p1 = dfclass(B,#0x10) |
| 471 | if (!p0.new) A = B |
| 472 | if (!p1.new) B = A |
| 473 | } |
| 474 | { |
| 475 | QH = convert_df2sf(A) // get possible invalid exceptions |
| 476 | QL = convert_df2sf(B) |
| 477 | } |
| 478 | { |
| 479 | A = #-1 |
| 480 | jumpr r31 |
| 481 | } |
| 482 | |
| 483 | .Ldiv_invalid: |
| 484 | { |
| 485 | TMP = ##0x7f800001 |
| 486 | } |
| 487 | { |
| 488 | A = convert_sf2df(TMP) // get invalid, get DF qNaN |
| 489 | jumpr r31 |
| 490 | } |
| 491 | END(__hexagon_divdf3) |
| 492 | |