| 1 | //===----------------------Hexagon builtin routine ------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | // Double Precision Multiply |
| 10 | |
| 11 | #define A r1:0 |
| 12 | #define AH r1 |
| 13 | #define AL r0 |
| 14 | #define B r3:2 |
| 15 | #define BH r3 |
| 16 | #define BL r2 |
| 17 | |
| 18 | #define EXPA r4 |
| 19 | #define EXPB r5 |
| 20 | #define EXPB_A r5:4 |
| 21 | |
| 22 | #define ZTMP r7:6 |
| 23 | #define ZTMPH r7 |
| 24 | #define ZTMPL r6 |
| 25 | |
| 26 | #define ATMP r13:12 |
| 27 | #define ATMPH r13 |
| 28 | #define ATMPL r12 |
| 29 | |
| 30 | #define BTMP r9:8 |
| 31 | #define BTMPH r9 |
| 32 | #define BTMPL r8 |
| 33 | |
| 34 | #define ATMP2 r11:10 |
| 35 | #define ATMP2H r11 |
| 36 | #define ATMP2L r10 |
| 37 | |
| 38 | #define EXPDIFF r15 |
| 39 | #define r14 |
| 40 | #define r15:14 |
| 41 | |
| 42 | #define TMP r28 |
| 43 | |
| 44 | #define MANTBITS 52 |
| 45 | #define HI_MANTBITS 20 |
| 46 | #define EXPBITS 11 |
| 47 | #define BIAS 1024 |
| 48 | #define MANTISSA_TO_INT_BIAS 52 |
| 49 | #define SR_BIT_INEXACT 5 |
| 50 | |
| 51 | #ifndef SR_ROUND_OFF |
| 52 | #define SR_ROUND_OFF 22 |
| 53 | #endif |
| 54 | |
| 55 | #define NORMAL p3 |
| 56 | #define BIGB p2 |
| 57 | |
| 58 | #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG |
| 59 | #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG |
| 60 | #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG |
| 61 | #define END(TAG) .size TAG,.-TAG |
| 62 | |
| 63 | .text |
| 64 | .global __hexagon_adddf3 |
| 65 | .global __hexagon_subdf3 |
| 66 | .type __hexagon_adddf3, @function |
| 67 | .type __hexagon_subdf3, @function |
| 68 | |
| 69 | Q6_ALIAS(adddf3) |
| 70 | FAST_ALIAS(adddf3) |
| 71 | FAST2_ALIAS(adddf3) |
| 72 | Q6_ALIAS(subdf3) |
| 73 | FAST_ALIAS(subdf3) |
| 74 | FAST2_ALIAS(subdf3) |
| 75 | |
| 76 | .p2align 5 |
| 77 | __hexagon_adddf3: |
| 78 | { |
| 79 | EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) |
| 80 | EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) |
| 81 | ATMP = combine(##0x20000000,#0) |
| 82 | } |
| 83 | { |
| 84 | NORMAL = dfclass(A,#2) |
| 85 | NORMAL = dfclass(B,#2) |
| 86 | BTMP = ATMP |
| 87 | BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A? |
| 88 | } |
| 89 | { |
| 90 | if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code |
| 91 | if (BIGB) A = B // if B >> A, swap A and B |
| 92 | if (BIGB) B = A // If B >> A, swap A and B |
| 93 | if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents |
| 94 | } |
| 95 | { |
| 96 | ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62 |
| 97 | BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62 |
| 98 | EXPDIFF = sub(EXPA,EXPB) |
| 99 | ZTMP = combine(#62,#1) |
| 100 | } |
| 101 | #undef BIGB |
| 102 | #undef NORMAL |
| 103 | #define B_POS p3 |
| 104 | #define A_POS p2 |
| 105 | #define NO_STICKIES p1 |
| 106 | .Ladd_continue: |
| 107 | { |
| 108 | EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60, |
| 109 | // will collapse to sticky bit |
| 110 | ATMP2 = neg(ATMP) |
| 111 | A_POS = cmp.gt(AH,#-1) |
| 112 | EXTRACTOFF = #0 |
| 113 | } |
| 114 | { |
| 115 | if (!A_POS) ATMP = ATMP2 |
| 116 | ATMP2 = extractu(BTMP,EXTRACTAMT) |
| 117 | BTMP = ASR(BTMP,EXPDIFF) |
| 118 | #undef EXTRACTAMT |
| 119 | #undef EXPDIFF |
| 120 | #undef EXTRACTOFF |
| 121 | #define ZERO r15:14 |
| 122 | ZERO = #0 |
| 123 | } |
| 124 | { |
| 125 | NO_STICKIES = cmp.eq(ATMP2,ZERO) |
| 126 | if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL) |
| 127 | EXPB = add(EXPA,#-BIAS-60) |
| 128 | B_POS = cmp.gt(BH,#-1) |
| 129 | } |
| 130 | { |
| 131 | ATMP = add(ATMP,BTMP) // ADD!!! |
| 132 | ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!! |
| 133 | ZTMP = combine(#54,##2045) |
| 134 | } |
| 135 | { |
| 136 | p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation |
| 137 | p0 = !cmp.gtu(EXPA,ZTMPL) |
| 138 | if (!p0.new) jump:nt .Ladd_ovf_unf |
| 139 | if (!B_POS) ATMP = ATMP2 // if B neg, pick difference |
| 140 | } |
| 141 | { |
| 142 | A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice! |
| 143 | p0 = cmp.eq(ATMPH,#0) |
| 144 | p0 = cmp.eq(ATMPL,#0) |
| 145 | if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly? |
| 146 | } |
| 147 | { |
| 148 | AH += asl(EXPB,#HI_MANTBITS) |
| 149 | jumpr r31 |
| 150 | } |
| 151 | .falign |
| 152 | __hexagon_subdf3: |
| 153 | { |
| 154 | BH = togglebit(BH,#31) |
| 155 | jump __qdsp_adddf3 |
| 156 | } |
| 157 | |
| 158 | |
| 159 | .falign |
| 160 | .Ladd_zero: |
| 161 | // True zero, full cancellation |
| 162 | // +0 unless round towards negative infinity |
| 163 | { |
| 164 | TMP = USR |
| 165 | A = #0 |
| 166 | BH = #1 |
| 167 | } |
| 168 | { |
| 169 | TMP = extractu(TMP,#2,#22) |
| 170 | BH = asl(BH,#31) |
| 171 | } |
| 172 | { |
| 173 | p0 = cmp.eq(TMP,#2) |
| 174 | if (p0.new) AH = xor(AH,BH) |
| 175 | jumpr r31 |
| 176 | } |
| 177 | .falign |
| 178 | .Ladd_ovf_unf: |
| 179 | // Overflow or Denormal is possible |
| 180 | // Good news: Underflow flag is not possible! |
| 181 | |
| 182 | // ATMP has 2's complement value |
| 183 | // |
| 184 | // EXPA has A's exponent, EXPB has EXPA-BIAS-60 |
| 185 | // |
| 186 | // Convert, extract exponent, add adjustment. |
| 187 | // If > 2046, overflow |
| 188 | // If <= 0, denormal |
| 189 | // |
| 190 | // Note that we've not done our zero check yet, so do that too |
| 191 | |
| 192 | { |
| 193 | A = convert_d2df(ATMP) |
| 194 | p0 = cmp.eq(ATMPH,#0) |
| 195 | p0 = cmp.eq(ATMPL,#0) |
| 196 | if (p0.new) jump:nt .Ladd_zero |
| 197 | } |
| 198 | { |
| 199 | TMP = extractu(AH,#EXPBITS,#HI_MANTBITS) |
| 200 | AH += asl(EXPB,#HI_MANTBITS) |
| 201 | } |
| 202 | { |
| 203 | EXPB = add(EXPB,TMP) |
| 204 | B = combine(##0x00100000,#0) |
| 205 | } |
| 206 | { |
| 207 | p0 = cmp.gt(EXPB,##BIAS+BIAS-2) |
| 208 | if (p0.new) jump:nt .Ladd_ovf |
| 209 | } |
| 210 | { |
| 211 | p0 = cmp.gt(EXPB,#0) |
| 212 | if (p0.new) jumpr:t r31 |
| 213 | TMP = sub(#1,EXPB) |
| 214 | } |
| 215 | { |
| 216 | B = insert(A,#MANTBITS,#0) |
| 217 | A = ATMP |
| 218 | } |
| 219 | { |
| 220 | B = lsr(B,TMP) |
| 221 | } |
| 222 | { |
| 223 | A = insert(B,#63,#0) |
| 224 | jumpr r31 |
| 225 | } |
| 226 | .falign |
| 227 | .Ladd_ovf: |
| 228 | // We get either max finite value or infinity. Either way, overflow+inexact |
| 229 | { |
| 230 | A = ATMP // 2's complement value |
| 231 | TMP = USR |
| 232 | ATMP = combine(##0x7fefffff,#-1) // positive max finite |
| 233 | } |
| 234 | { |
| 235 | EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits |
| 236 | TMP = or(TMP,#0x28) // inexact + overflow |
| 237 | BTMP = combine(##0x7ff00000,#0) // positive infinity |
| 238 | } |
| 239 | { |
| 240 | USR = TMP |
| 241 | EXPB ^= lsr(AH,#31) // Does sign match rounding? |
| 242 | TMP = EXPB // unmodified rounding mode |
| 243 | } |
| 244 | { |
| 245 | p0 = !cmp.eq(TMP,#1) // If not round-to-zero and |
| 246 | p0 = !cmp.eq(EXPB,#2) // Not rounding the other way, |
| 247 | if (p0.new) ATMP = BTMP // we should get infinity |
| 248 | } |
| 249 | { |
| 250 | A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign |
| 251 | } |
| 252 | { |
| 253 | p0 = dfcmp.eq(A,A) |
| 254 | jumpr r31 |
| 255 | } |
| 256 | |
| 257 | .Ladd_abnormal: |
| 258 | { |
| 259 | ATMP = extractu(A,#63,#0) // strip off sign |
| 260 | BTMP = extractu(B,#63,#0) // strip off sign |
| 261 | } |
| 262 | { |
| 263 | p3 = cmp.gtu(ATMP,BTMP) |
| 264 | if (!p3.new) A = B // sort values |
| 265 | if (!p3.new) B = A // sort values |
| 266 | } |
| 267 | { |
| 268 | // Any NaN --> NaN, possibly raise invalid if sNaN |
| 269 | p0 = dfclass(A,#0x0f) // A not NaN? |
| 270 | if (!p0.new) jump:nt .Linvalid_nan_add |
| 271 | if (!p3) ATMP = BTMP |
| 272 | if (!p3) BTMP = ATMP |
| 273 | } |
| 274 | { |
| 275 | // Infinity + non-infinity number is infinity |
| 276 | // Infinity + infinity --> inf or nan |
| 277 | p1 = dfclass(A,#0x08) // A is infinity |
| 278 | if (p1.new) jump:nt .Linf_add |
| 279 | } |
| 280 | { |
| 281 | p2 = dfclass(B,#0x01) // B is zero |
| 282 | if (p2.new) jump:nt .LB_zero // so return A or special 0+0 |
| 283 | ATMP = #0 |
| 284 | } |
| 285 | // We are left with adding one or more subnormals |
| 286 | { |
| 287 | p0 = dfclass(A,#4) |
| 288 | if (p0.new) jump:nt .Ladd_two_subnormal |
| 289 | ATMP = combine(##0x20000000,#0) |
| 290 | } |
| 291 | { |
| 292 | EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) |
| 293 | EXPB = #1 |
| 294 | // BTMP already ABS(B) |
| 295 | BTMP = asl(BTMP,#EXPBITS-2) |
| 296 | } |
| 297 | #undef ZERO |
| 298 | #define r14 |
| 299 | #define EXPDIFF r15 |
| 300 | { |
| 301 | ATMP = insert(A,#MANTBITS,#EXPBITS-2) |
| 302 | EXPDIFF = sub(EXPA,EXPB) |
| 303 | ZTMP = combine(#62,#1) |
| 304 | jump .Ladd_continue |
| 305 | } |
| 306 | |
| 307 | .Ladd_two_subnormal: |
| 308 | { |
| 309 | ATMP = extractu(A,#63,#0) |
| 310 | BTMP = extractu(B,#63,#0) |
| 311 | } |
| 312 | { |
| 313 | ATMP = neg(ATMP) |
| 314 | BTMP = neg(BTMP) |
| 315 | p0 = cmp.gt(AH,#-1) |
| 316 | p1 = cmp.gt(BH,#-1) |
| 317 | } |
| 318 | { |
| 319 | if (p0) ATMP = A |
| 320 | if (p1) BTMP = B |
| 321 | } |
| 322 | { |
| 323 | ATMP = add(ATMP,BTMP) |
| 324 | } |
| 325 | { |
| 326 | BTMP = neg(ATMP) |
| 327 | p0 = cmp.gt(ATMPH,#-1) |
| 328 | B = #0 |
| 329 | } |
| 330 | { |
| 331 | if (!p0) A = BTMP |
| 332 | if (p0) A = ATMP |
| 333 | BH = ##0x80000000 |
| 334 | } |
| 335 | { |
| 336 | if (!p0) AH = or(AH,BH) |
| 337 | p0 = dfcmp.eq(A,B) |
| 338 | if (p0.new) jump:nt .Lzero_plus_zero |
| 339 | } |
| 340 | { |
| 341 | jumpr r31 |
| 342 | } |
| 343 | |
| 344 | .Linvalid_nan_add: |
| 345 | { |
| 346 | TMP = convert_df2sf(A) // will generate invalid if sNaN |
| 347 | p0 = dfclass(B,#0x0f) // if B is not NaN |
| 348 | if (p0.new) B = A // make it whatever A is |
| 349 | } |
| 350 | { |
| 351 | BL = convert_df2sf(B) // will generate invalid if sNaN |
| 352 | A = #-1 |
| 353 | jumpr r31 |
| 354 | } |
| 355 | .falign |
| 356 | .LB_zero: |
| 357 | { |
| 358 | p0 = dfcmp.eq(ATMP,A) // is A also zero? |
| 359 | if (!p0.new) jumpr:t r31 // If not, just return A |
| 360 | } |
| 361 | // 0 + 0 is special |
| 362 | // if equal integral values, they have the same sign, which is fine for all rounding |
| 363 | // modes. |
| 364 | // If unequal in sign, we get +0 for all rounding modes except round down |
| 365 | .Lzero_plus_zero: |
| 366 | { |
| 367 | p0 = cmp.eq(A,B) |
| 368 | if (p0.new) jumpr:t r31 |
| 369 | } |
| 370 | { |
| 371 | TMP = USR |
| 372 | } |
| 373 | { |
| 374 | TMP = extractu(TMP,#2,#SR_ROUND_OFF) |
| 375 | A = #0 |
| 376 | } |
| 377 | { |
| 378 | p0 = cmp.eq(TMP,#2) |
| 379 | if (p0.new) AH = ##0x80000000 |
| 380 | jumpr r31 |
| 381 | } |
| 382 | .Linf_add: |
| 383 | // adding infinities is only OK if they are equal |
| 384 | { |
| 385 | p0 = !cmp.eq(AH,BH) // Do they have different signs |
| 386 | p0 = dfclass(B,#8) // And is B also infinite? |
| 387 | if (!p0.new) jumpr:t r31 // If not, just a normal inf |
| 388 | } |
| 389 | { |
| 390 | BL = ##0x7f800001 // sNAN |
| 391 | } |
| 392 | { |
| 393 | A = convert_sf2df(BL) // trigger invalid, set NaN |
| 394 | jumpr r31 |
| 395 | } |
| 396 | END(__hexagon_adddf3) |
| 397 | |