1 | //===----------------------Hexagon builtin routine ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // Double Precision Divide |
10 | |
11 | #define A r1:0 |
12 | #define AH r1 |
13 | #define AL r0 |
14 | |
15 | #define B r3:2 |
16 | #define BH r3 |
17 | #define BL r2 |
18 | |
19 | #define Q r5:4 |
20 | #define QH r5 |
21 | #define QL r4 |
22 | |
23 | #define PROD r7:6 |
24 | #define PRODHI r7 |
25 | #define PRODLO r6 |
26 | |
27 | #define SFONE r8 |
28 | #define SFDEN r9 |
29 | #define SFERROR r10 |
30 | #define SFRECIP r11 |
31 | |
32 | #define EXPBA r13:12 |
33 | #define EXPB r13 |
34 | #define EXPA r12 |
35 | |
36 | #define REMSUB2 r15:14 |
37 | |
38 | |
39 | |
40 | #define SIGN r28 |
41 | |
42 | #define Q_POSITIVE p3 |
43 | #define NORMAL p2 |
44 | #define NO_OVF_UNF p1 |
45 | #define P_TMP p0 |
46 | |
47 | #define RECIPEST_SHIFT 3 |
48 | #define QADJ 61 |
49 | |
50 | #define DFCLASS_NORMAL 0x02 |
51 | #define DFCLASS_NUMBER 0x0F |
52 | #define DFCLASS_INFINITE 0x08 |
53 | #define DFCLASS_ZERO 0x01 |
54 | #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO) |
55 | #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE) |
56 | |
57 | #define DF_MANTBITS 52 |
58 | #define DF_EXPBITS 11 |
59 | #define SF_MANTBITS 23 |
60 | #define SF_EXPBITS 8 |
61 | #define DF_BIAS 0x3ff |
62 | |
63 | #define SR_ROUND_OFF 22 |
64 | |
65 | #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG |
66 | #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG |
67 | #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG |
68 | #define END(TAG) .size TAG,.-TAG |
69 | |
70 | .text |
71 | .global __hexagon_divdf3 |
72 | .type __hexagon_divdf3,@function |
73 | Q6_ALIAS(divdf3) |
74 | FAST_ALIAS(divdf3) |
75 | FAST2_ALIAS(divdf3) |
76 | .p2align 5 |
77 | __hexagon_divdf3: |
78 | { |
79 | NORMAL = dfclass(A,#DFCLASS_NORMAL) |
80 | NORMAL = dfclass(B,#DFCLASS_NORMAL) |
81 | EXPBA = combine(BH,AH) |
82 | SIGN = xor(AH,BH) |
83 | } |
84 | #undef A |
85 | #undef AH |
86 | #undef AL |
87 | #undef B |
88 | #undef BH |
89 | #undef BL |
90 | #define REM r1:0 |
91 | #define REMHI r1 |
92 | #define REMLO r0 |
93 | #define DENOM r3:2 |
94 | #define DENOMHI r3 |
95 | #define DENOMLO r2 |
96 | { |
97 | if (!NORMAL) jump .Ldiv_abnormal |
98 | PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) |
99 | SFONE = ##0x3f800001 |
100 | } |
101 | { |
102 | SFDEN = or(SFONE,PRODLO) |
103 | EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) |
104 | EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) |
105 | Q_POSITIVE = cmp.gt(SIGN,#-1) |
106 | } |
107 | #undef SIGN |
108 | #define ONE r28 |
109 | .Ldenorm_continue: |
110 | { |
111 | SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN) |
112 | SFERROR = and(SFONE,#-2) |
113 | ONE = #1 |
114 | EXPA = sub(EXPA,EXPB) |
115 | } |
116 | #undef EXPB |
117 | #define RECIPEST r13 |
118 | { |
119 | SFERROR -= sfmpy(SFRECIP,SFDEN):lib |
120 | REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) |
121 | RECIPEST = ##0x00800000 << RECIPEST_SHIFT |
122 | } |
123 | { |
124 | SFRECIP += sfmpy(SFRECIP,SFERROR):lib |
125 | DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) |
126 | SFERROR = and(SFONE,#-2) |
127 | } |
128 | { |
129 | SFERROR -= sfmpy(SFRECIP,SFDEN):lib |
130 | QH = #-DF_BIAS+1 |
131 | QL = #DF_BIAS-1 |
132 | } |
133 | { |
134 | SFRECIP += sfmpy(SFRECIP,SFERROR):lib |
135 | NO_OVF_UNF = cmp.gt(EXPA,QH) |
136 | NO_OVF_UNF = !cmp.gt(EXPA,QL) |
137 | } |
138 | { |
139 | RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT) |
140 | Q = #0 |
141 | EXPA = add(EXPA,#-QADJ) |
142 | } |
143 | #undef SFERROR |
144 | #undef SFRECIP |
145 | #define TMP r10 |
146 | #define TMP1 r11 |
147 | { |
148 | RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT)) |
149 | } |
150 | |
151 | #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \ |
152 | { \ |
153 | PROD = mpyu(RECIPEST,REMHI); \ |
154 | REM = asl(REM,# ## ( REMSHIFT )); \ |
155 | }; \ |
156 | { \ |
157 | PRODLO = # ## 0; \ |
158 | REM -= mpyu(PRODHI,DENOMLO); \ |
159 | REMSUB2 = mpyu(PRODHI,DENOMHI); \ |
160 | }; \ |
161 | { \ |
162 | Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \ |
163 | REM -= asl(REMSUB2, # ## 32); \ |
164 | EXTRA \ |
165 | } |
166 | |
167 | |
168 | DIV_ITER1B(ASL,14,15,) |
169 | DIV_ITER1B(ASR,1,15,) |
170 | DIV_ITER1B(ASR,16,15,) |
171 | DIV_ITER1B(ASR,31,15,PROD=# ( 0 );) |
172 | |
173 | #undef REMSUB2 |
174 | #define TMPPAIR r15:14 |
175 | #define TMPPAIRHI r15 |
176 | #define TMPPAIRLO r14 |
177 | #undef RECIPEST |
178 | #define EXPB r13 |
179 | { |
180 | // compare or sub with carry |
181 | TMPPAIR = sub(REM,DENOM) |
182 | P_TMP = cmp.gtu(DENOM,REM) |
183 | // set up amt to add to q |
184 | if (!P_TMP.new) PRODLO = #2 |
185 | } |
186 | { |
187 | Q = add(Q,PROD) |
188 | if (!P_TMP) REM = TMPPAIR |
189 | TMPPAIR = #0 |
190 | } |
191 | { |
192 | P_TMP = cmp.eq(REM,TMPPAIR) |
193 | if (!P_TMP.new) QL = or(QL,ONE) |
194 | } |
195 | { |
196 | PROD = neg(Q) |
197 | } |
198 | { |
199 | if (!Q_POSITIVE) Q = PROD |
200 | } |
201 | #undef REM |
202 | #undef REMHI |
203 | #undef REMLO |
204 | #undef DENOM |
205 | #undef DENOMLO |
206 | #undef DENOMHI |
207 | #define A r1:0 |
208 | #define AH r1 |
209 | #define AL r0 |
210 | #define B r3:2 |
211 | #define BH r3 |
212 | #define BL r2 |
213 | { |
214 | A = convert_d2df(Q) |
215 | if (!NO_OVF_UNF) jump .Ldiv_ovf_unf |
216 | } |
217 | { |
218 | AH += asl(EXPA,#DF_MANTBITS-32) |
219 | jumpr r31 |
220 | } |
221 | |
222 | .Ldiv_ovf_unf: |
223 | { |
224 | AH += asl(EXPA,#DF_MANTBITS-32) |
225 | EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) |
226 | } |
227 | { |
228 | PROD = abs(Q) |
229 | EXPA = add(EXPA,EXPB) |
230 | } |
231 | { |
232 | P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow |
233 | if (P_TMP.new) jump:nt .Ldiv_ovf |
234 | } |
235 | { |
236 | P_TMP = cmp.gt(EXPA,#0) |
237 | if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible... |
238 | } |
239 | // Underflow |
240 | // We know what the infinite range exponent should be (EXPA) |
241 | // Q is 2's complement, PROD is abs(Q) |
242 | // Normalize Q, shift right, add a high bit, convert, change exponent |
243 | |
244 | #define FUDGE1 7 // how much to shift right |
245 | #define FUDGE2 4 // how many guard/round to keep at lsbs |
246 | |
247 | { |
248 | EXPB = add(clb(PROD),#-1) // doesn't need to be added in since |
249 | EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent |
250 | TMP = USR |
251 | TMP1 = #63 |
252 | } |
253 | { |
254 | EXPB = min(EXPA,TMP1) |
255 | TMP1 = or(TMP,#0x030) |
256 | PROD = asl(PROD,EXPB) |
257 | EXPA = #0 |
258 | } |
259 | { |
260 | TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out |
261 | PROD = lsr(PROD,EXPB) // shift out bits |
262 | B = #1 |
263 | } |
264 | { |
265 | P_TMP = cmp.gtu(B,TMPPAIR) |
266 | if (!P_TMP.new) PRODLO = or(BL,PRODLO) |
267 | PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2) |
268 | } |
269 | { |
270 | Q = neg(PROD) |
271 | P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1) |
272 | if (!P_TMP.new) TMP = TMP1 |
273 | } |
274 | { |
275 | USR = TMP |
276 | if (Q_POSITIVE) Q = PROD |
277 | TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2) |
278 | } |
279 | { |
280 | A = convert_d2df(Q) |
281 | } |
282 | { |
283 | AH += asl(TMP,#DF_MANTBITS-32) |
284 | jumpr r31 |
285 | } |
286 | |
287 | |
288 | .Lpossible_unf: |
289 | // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal |
290 | // The answer is correct, but we need to raise Underflow |
291 | { |
292 | B = extractu(A,#63,#0) |
293 | TMPPAIR = combine(##0x00100000,#0) // min normal |
294 | TMP = #0x7FFF |
295 | } |
296 | { |
297 | P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value... |
298 | P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)? |
299 | } |
300 | |
301 | #if (__HEXAGON_ARCH__ == 60) |
302 | TMP = USR // If not, just return |
303 | if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact |
304 | // Note that inexact is already set... |
305 | #else |
306 | { |
307 | if (!P_TMP) jumpr r31 // If not, just return |
308 | TMP = USR // Else, we want to set Unf+Inexact |
309 | } // Note that inexact is already set... |
310 | #endif |
311 | { |
312 | TMP = or(TMP,#0x30) |
313 | } |
314 | { |
315 | USR = TMP |
316 | } |
317 | { |
318 | p0 = dfcmp.eq(A,A) |
319 | jumpr r31 |
320 | } |
321 | |
322 | .Ldiv_ovf: |
323 | |
324 | // Raise Overflow, and choose the correct overflow value (saturated normal or infinity) |
325 | |
326 | { |
327 | TMP = USR |
328 | B = combine(##0x7fefffff,#-1) |
329 | AH = mux(Q_POSITIVE,#0,#-1) |
330 | } |
331 | { |
332 | PROD = combine(##0x7ff00000,#0) |
333 | QH = extractu(TMP,#2,#SR_ROUND_OFF) |
334 | TMP = or(TMP,#0x28) |
335 | } |
336 | { |
337 | USR = TMP |
338 | QH ^= lsr(AH,#31) |
339 | QL = QH |
340 | } |
341 | { |
342 | p0 = !cmp.eq(QL,#1) // if not round-to-zero |
343 | p0 = !cmp.eq(QH,#2) // and not rounding the other way |
344 | if (p0.new) B = PROD // go to inf |
345 | p0 = dfcmp.eq(B,B) // get exceptions |
346 | } |
347 | { |
348 | A = insert(B,#63,#0) |
349 | jumpr r31 |
350 | } |
351 | |
352 | #undef ONE |
353 | #define SIGN r28 |
354 | #undef NORMAL |
355 | #undef NO_OVF_UNF |
356 | #define P_INF p1 |
357 | #define P_ZERO p2 |
358 | .Ldiv_abnormal: |
359 | { |
360 | P_TMP = dfclass(A,#DFCLASS_NUMBER) |
361 | P_TMP = dfclass(B,#DFCLASS_NUMBER) |
362 | Q_POSITIVE = cmp.gt(SIGN,#-1) |
363 | } |
364 | { |
365 | P_INF = dfclass(A,#DFCLASS_INFINITE) |
366 | P_INF = dfclass(B,#DFCLASS_INFINITE) |
367 | } |
368 | { |
369 | P_ZERO = dfclass(A,#DFCLASS_ZERO) |
370 | P_ZERO = dfclass(B,#DFCLASS_ZERO) |
371 | } |
372 | { |
373 | if (!P_TMP) jump .Ldiv_nan |
374 | if (P_INF) jump .Ldiv_invalid |
375 | } |
376 | { |
377 | if (P_ZERO) jump .Ldiv_invalid |
378 | } |
379 | { |
380 | P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero |
381 | P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite |
382 | } |
383 | { |
384 | P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite |
385 | P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero |
386 | } |
387 | { |
388 | if (!P_ZERO) jump .Ldiv_zero_result |
389 | if (!P_INF) jump .Ldiv_inf_result |
390 | } |
391 | // Now we've narrowed it down to (de)normal / (de)normal |
392 | // Set up A/EXPA B/EXPB and go back |
393 | #undef P_ZERO |
394 | #undef P_INF |
395 | #define P_TMP2 p1 |
396 | { |
397 | P_TMP = dfclass(A,#DFCLASS_NORMAL) |
398 | P_TMP2 = dfclass(B,#DFCLASS_NORMAL) |
399 | TMP = ##0x00100000 |
400 | } |
401 | { |
402 | EXPBA = combine(BH,AH) |
403 | AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit |
404 | BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit |
405 | } |
406 | { |
407 | if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit |
408 | if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit |
409 | } |
410 | { |
411 | QH = add(clb(A),#-DF_EXPBITS) |
412 | QL = add(clb(B),#-DF_EXPBITS) |
413 | TMP = #1 |
414 | } |
415 | { |
416 | EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) |
417 | EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) |
418 | } |
419 | { |
420 | A = asl(A,QH) |
421 | B = asl(B,QL) |
422 | if (!P_TMP) EXPA = sub(TMP,QH) |
423 | if (!P_TMP2) EXPB = sub(TMP,QL) |
424 | } // recreate values needed by resume coke |
425 | { |
426 | PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) |
427 | } |
428 | { |
429 | SFDEN = or(SFONE,PRODLO) |
430 | jump .Ldenorm_continue |
431 | } |
432 | |
433 | .Ldiv_zero_result: |
434 | { |
435 | AH = xor(AH,BH) |
436 | B = #0 |
437 | } |
438 | { |
439 | A = insert(B,#63,#0) |
440 | jumpr r31 |
441 | } |
442 | .Ldiv_inf_result: |
443 | { |
444 | p2 = dfclass(B,#DFCLASS_ZERO) |
445 | p2 = dfclass(A,#DFCLASS_NONINFINITE) |
446 | } |
447 | { |
448 | TMP = USR |
449 | if (!p2) jump 1f |
450 | AH = xor(AH,BH) |
451 | } |
452 | { |
453 | TMP = or(TMP,#0x04) // DBZ |
454 | } |
455 | { |
456 | USR = TMP |
457 | } |
458 | 1: |
459 | { |
460 | B = combine(##0x7ff00000,#0) |
461 | p0 = dfcmp.uo(B,B) // take possible exception |
462 | } |
463 | { |
464 | A = insert(B,#63,#0) |
465 | jumpr r31 |
466 | } |
467 | .Ldiv_nan: |
468 | { |
469 | p0 = dfclass(A,#0x10) |
470 | p1 = dfclass(B,#0x10) |
471 | if (!p0.new) A = B |
472 | if (!p1.new) B = A |
473 | } |
474 | { |
475 | QH = convert_df2sf(A) // get possible invalid exceptions |
476 | QL = convert_df2sf(B) |
477 | } |
478 | { |
479 | A = #-1 |
480 | jumpr r31 |
481 | } |
482 | |
483 | .Ldiv_invalid: |
484 | { |
485 | TMP = ##0x7f800001 |
486 | } |
487 | { |
488 | A = convert_sf2df(TMP) // get invalid, get DF qNaN |
489 | jumpr r31 |
490 | } |
491 | END(__hexagon_divdf3) |
492 | |