1 | //===----------------------Hexagon builtin routine ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /* ==================================================================== */ |
9 | /* FUNCTIONS Optimized double floating point operators */ |
10 | /* ==================================================================== */ |
11 | /* c = dadd_asm(a, b) */ |
12 | /* ==================================================================== * |
13 | fast2_QDOUBLE fast2_dadd(fast2_QDOUBLE a,fast2_QDOUBLE b) { |
14 | fast2_QDOUBLE c; |
15 | lint manta = a & MANTMASK; |
16 | int expa = Q6_R_sxth_R(a) ; |
17 | lint mantb = b & MANTMASK; |
18 | int expb = Q6_R_sxth_R(b) ; |
19 | int exp, expdiff, j, k, hi, lo, cn; |
20 | lint mant; |
21 | |
22 | expdiff = (int) Q6_P_vabsdiffh_PP(a, b); |
23 | expdiff = Q6_R_sxth_R(expdiff) ; |
24 | if (expdiff > 63) { expdiff = 62;} |
25 | if (expa > expb) { |
26 | exp = expa + 1; |
27 | expa = 1; |
28 | expb = expdiff + 1; |
29 | } else { |
30 | exp = expb + 1; |
31 | expb = 1; |
32 | expa = expdiff + 1; |
33 | } |
34 | mant = (manta>>expa) + (mantb>>expb); |
35 | |
36 | hi = (int) (mant>>32); |
37 | lo = (int) (mant); |
38 | |
39 | k = Q6_R_normamt_R(hi); |
40 | if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo); |
41 | |
42 | mant = (mant << k); |
43 | cn = (mant == 0x8000000000000000LL); |
44 | exp = exp - k + cn; |
45 | |
46 | if (mant == 0 || mant == -1) exp = 0x8001; |
47 | c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); |
48 | return(c); |
49 | } |
50 | * ==================================================================== */ |
51 | .text |
52 | .global fast2_dadd_asm |
53 | .type fast2_dadd_asm, @function |
54 | fast2_dadd_asm: |
55 | #define manta R0 |
56 | #define mantexpa R1:0 |
57 | #define lmanta R1:0 |
58 | #define mantb R2 |
59 | #define mantexpb R3:2 |
60 | #define lmantb R3:2 |
61 | #define expa R4 |
62 | #define expb R5 |
63 | #define mantexpd R7:6 |
64 | #define expd R6 |
65 | #define exp R8 |
66 | #define c63 R9 |
67 | #define lmant R1:0 |
68 | #define manth R1 |
69 | #define mantl R0 |
70 | #define minmin R11:10 // exactly 0x000000000000008001LL |
71 | #define minminl R10 |
72 | #define k R4 |
73 | #define ce P0 |
74 | .falign |
75 | { |
76 | mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL |
77 | c63 = #62 |
78 | expa = SXTH(manta) |
79 | expb = SXTH(mantb) |
80 | } { |
81 | expd = SXTH(expd) |
82 | ce = CMP.GT(expa, expb); |
83 | if ( ce.new) exp = add(expa, #1) |
84 | if (!ce.new) exp = add(expb, #1) |
85 | } { |
86 | if ( ce) expa = #1 |
87 | if (!ce) expb = #1 |
88 | manta.L = #0 |
89 | expd = MIN(expd, c63) |
90 | } { |
91 | if (!ce) expa = add(expd, #1) |
92 | if ( ce) expb = add(expd, #1) |
93 | mantb.L = #0 |
94 | minmin = #0 |
95 | } { |
96 | lmanta = ASR(lmanta, expa) |
97 | lmantb = ASR(lmantb, expb) |
98 | } { |
99 | lmant = add(lmanta, lmantb) |
100 | minminl.L = #0x8001 |
101 | } { |
102 | k = clb(lmant) |
103 | c63 = #58 |
104 | } { |
105 | k = add(k, #-1) |
106 | p0 = cmp.gt(k, c63) |
107 | } { |
108 | mantexpa = ASL(lmant, k) |
109 | exp = SUB(exp, k) |
110 | if(p0) jump .Ldenorma |
111 | } { |
112 | manta = insert(exp, #16, #0) |
113 | jumpr r31 |
114 | } |
115 | .Ldenorma: |
116 | { |
117 | mantexpa = minmin |
118 | jumpr r31 |
119 | } |
120 | /* =================================================================== * |
121 | fast2_QDOUBLE fast2_dsub(fast2_QDOUBLE a,fast2_QDOUBLE b) { |
122 | fast2_QDOUBLE c; |
123 | lint manta = a & MANTMASK; |
124 | int expa = Q6_R_sxth_R(a) ; |
125 | lint mantb = b & MANTMASK; |
126 | int expb = Q6_R_sxth_R(b) ; |
127 | int exp, expdiff, j, k; |
128 | lint mant; |
129 | |
130 | expdiff = (int) Q6_P_vabsdiffh_PP(a, b); |
131 | expdiff = Q6_R_sxth_R(expdiff) ; |
132 | if (expdiff > 63) { expdiff = 62;} |
133 | if (expa > expb) { |
134 | exp = expa + 1; |
135 | expa = 1; |
136 | expb = expdiff + 1; |
137 | } else { |
138 | exp = expb + 1; |
139 | expb = 1; |
140 | expa = expdiff + 1; |
141 | } |
142 | mant = (manta>>expa) - (mantb>>expb); |
143 | k = Q6_R_clb_P(mant)-1; |
144 | mant = (mant << k); |
145 | exp = exp - k; |
146 | if (mant == 0 || mant == -1) exp = 0x8001; |
147 | c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); |
148 | return(c); |
149 | } |
150 | * ==================================================================== */ |
151 | .text |
152 | .global fast2_dsub_asm |
153 | .type fast2_dsub_asm, @function |
154 | fast2_dsub_asm: |
155 | |
156 | #define manta R0 |
157 | #define mantexpa R1:0 |
158 | #define lmanta R1:0 |
159 | #define mantb R2 |
160 | #define mantexpb R3:2 |
161 | #define lmantb R3:2 |
162 | #define expa R4 |
163 | #define expb R5 |
164 | #define mantexpd R7:6 |
165 | #define expd R6 |
166 | #define exp R8 |
167 | #define c63 R9 |
168 | #define lmant R1:0 |
169 | #define manth R1 |
170 | #define mantl R0 |
171 | #define minmin R11:10 // exactly 0x000000000000008001LL |
172 | #define minminl R10 |
173 | #define k R4 |
174 | #define ce P0 |
175 | .falign |
176 | { |
177 | mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL |
178 | c63 = #62 |
179 | expa = SXTH(manta) |
180 | expb = SXTH(mantb) |
181 | } { |
182 | expd = SXTH(expd) |
183 | ce = CMP.GT(expa, expb); |
184 | if ( ce.new) exp = add(expa, #1) |
185 | if (!ce.new) exp = add(expb, #1) |
186 | } { |
187 | if ( ce) expa = #1 |
188 | if (!ce) expb = #1 |
189 | manta.L = #0 |
190 | expd = MIN(expd, c63) |
191 | } { |
192 | if (!ce) expa = add(expd, #1) |
193 | if ( ce) expb = add(expd, #1) |
194 | mantb.L = #0 |
195 | minmin = #0 |
196 | } { |
197 | lmanta = ASR(lmanta, expa) |
198 | lmantb = ASR(lmantb, expb) |
199 | } { |
200 | lmant = sub(lmanta, lmantb) |
201 | minminl.L = #0x8001 |
202 | } { |
203 | k = clb(lmant) |
204 | c63 = #58 |
205 | } { |
206 | k = add(k, #-1) |
207 | p0 = cmp.gt(k, c63) |
208 | } { |
209 | mantexpa = ASL(lmant, k) |
210 | exp = SUB(exp, k) |
211 | if(p0) jump .Ldenorm |
212 | } { |
213 | manta = insert(exp, #16, #0) |
214 | jumpr r31 |
215 | } |
216 | .Ldenorm: |
217 | { |
218 | mantexpa = minmin |
219 | jumpr r31 |
220 | } |
221 | /* ==================================================================== * |
222 | fast2_QDOUBLE fast2_dmpy(fast2_QDOUBLE a,fast2_QDOUBLE b) { |
223 | fast2_QDOUBLE c; |
224 | lint manta = a & MANTMASK; |
225 | int expa = Q6_R_sxth_R(a) ; |
226 | lint mantb = b & MANTMASK; |
227 | int expb = Q6_R_sxth_R(b) ; |
228 | int exp, k; |
229 | lint mant; |
230 | int hia, hib, hi, lo; |
231 | unsigned int loa, lob; |
232 | |
233 | hia = (int)(a >> 32); |
234 | loa = Q6_R_extractu_RII((int)manta, 31, 1); |
235 | hib = (int)(b >> 32); |
236 | lob = Q6_R_extractu_RII((int)mantb, 31, 1); |
237 | |
238 | mant = Q6_P_mpy_RR(hia, lob); |
239 | mant = Q6_P_mpyacc_RR(mant,hib, loa); |
240 | mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1); |
241 | |
242 | hi = (int) (mant>>32); |
243 | |
244 | k = Q6_R_normamt_R(hi); |
245 | mant = mant << k; |
246 | exp = expa + expb - k; |
247 | if (mant == 0 || mant == -1) exp = 0x8001; |
248 | c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); |
249 | return(c); |
250 | } |
251 | * ==================================================================== */ |
252 | .text |
253 | .global fast2_dmpy_asm |
254 | .type fast2_dmpy_asm, @function |
255 | fast2_dmpy_asm: |
256 | |
257 | #define mantal R0 |
258 | #define mantah R1 |
259 | #define mantexpa R1:0 |
260 | #define mantbl R2 |
261 | #define mantbh R3 |
262 | #define mantexpb R3:2 |
263 | #define expa R4 |
264 | #define expb R5 |
265 | #define c8001 R12 |
266 | #define mantexpd R7:6 |
267 | #define mantdh R7 |
268 | #define exp R8 |
269 | #define lmantc R11:10 |
270 | #define kb R9 |
271 | #define guard R11 |
272 | #define mantal_ R12 |
273 | #define mantbl_ R13 |
274 | #define min R15:14 |
275 | #define minh R15 |
276 | |
277 | .falign |
278 | { |
279 | mantbl_= lsr(mantbl, #16) |
280 | expb = sxth(mantbl) |
281 | expa = sxth(mantal) |
282 | mantal_= lsr(mantal, #16) |
283 | } |
284 | { |
285 | lmantc = mpy(mantah, mantbh) |
286 | mantexpd = mpy(mantah, mantbl_) |
287 | mantal.L = #0x0 |
288 | min = #0 |
289 | } |
290 | { |
291 | lmantc = add(lmantc, lmantc) |
292 | mantexpd+= mpy(mantbh, mantal_) |
293 | mantbl.L = #0x0 |
294 | minh.H = #0x8000 |
295 | } |
296 | { |
297 | mantexpd = asr(mantexpd, #15) |
298 | c8001.L = #0x8001 |
299 | p1 = cmp.eq(mantexpa, mantexpb) |
300 | } |
301 | { |
302 | mantexpd = add(mantexpd, lmantc) |
303 | exp = add(expa, expb) |
304 | p2 = cmp.eq(mantexpa, min) |
305 | } |
306 | { |
307 | kb = clb(mantexpd) |
308 | mantexpb = abs(mantexpd) |
309 | guard = #58 |
310 | } |
311 | { |
312 | p1 = and(p1, p2) |
313 | exp = sub(exp, kb) |
314 | kb = add(kb, #-1) |
315 | p0 = cmp.gt(kb, guard) |
316 | } |
317 | { |
318 | exp = add(exp, #1) |
319 | mantexpa = asl(mantexpd, kb) |
320 | if(p1) jump .Lsat //rarely happens |
321 | } |
322 | { |
323 | mantal = insert(exp,#16, #0) |
324 | if(!p0) jumpr r31 |
325 | } |
326 | { |
327 | mantal = insert(c8001,#16, #0) |
328 | jumpr r31 |
329 | } |
330 | .Lsat: |
331 | { |
332 | mantexpa = #-1 |
333 | } |
334 | { |
335 | mantexpa = lsr(mantexpa, #1) |
336 | } |
337 | { |
338 | mantal = insert(exp,#16, #0) |
339 | jumpr r31 |
340 | } |
341 | |
342 | /* ==================================================================== * |
343 | int fast2_qd2f(fast2_QDOUBLE a) { |
344 | int exp; |
345 | long long int manta; |
346 | int ic, rnd, mantb; |
347 | |
348 | manta = a>>32; |
349 | exp = Q6_R_sxth_R(a) ; |
350 | ic = 0x80000000 & manta; |
351 | manta = Q6_R_abs_R_sat(manta); |
352 | mantb = (manta + rnd)>>7; |
353 | rnd = 0x40 |
354 | exp = (exp + 126); |
355 | if((manta & 0xff) == rnd) rnd = 0x00; |
356 | if((manta & 0x7fffffc0) == 0x7fffffc0) { |
357 | manta = 0x0; exp++; |
358 | } else { |
359 | manta= mantb & 0x007fffff; |
360 | } |
361 | exp = (exp << 23) & 0x7fffffc0; |
362 | ic = Q6_R_addacc_RR(ic, exp, manta); |
363 | return (ic); |
364 | } |
365 | * ==================================================================== */ |
366 | |
367 | .text |
368 | .global fast2_qd2f_asm |
369 | .type fast2_qd2f_asm, @function |
370 | fast2_qd2f_asm: |
371 | #define mantah R1 |
372 | #define mantal R0 |
373 | #define cff R0 |
374 | #define mant R3 |
375 | #define expo R4 |
376 | #define rnd R5 |
377 | #define mask R6 |
378 | #define c07f R7 |
379 | #define c80 R0 |
380 | #define mantb R2 |
381 | #define ic R0 |
382 | |
383 | .falign |
384 | { |
385 | mant = abs(mantah):sat |
386 | expo = sxth(mantal) |
387 | rnd = #0x40 |
388 | mask.L = #0xffc0 |
389 | } |
390 | { |
391 | cff = extractu(mant, #8, #0) |
392 | p2 = cmp.gt(expo, #126) |
393 | p3 = cmp.ge(expo, #-126) |
394 | mask.H = #0x7fff |
395 | } |
396 | { |
397 | p1 = cmp.eq(cff,#0x40) |
398 | if(p1.new) rnd = #0 |
399 | expo = add(expo, #126) |
400 | if(!p3) jump .Lmin |
401 | } |
402 | { |
403 | p0 = bitsset(mant, mask) |
404 | c80.L = #0x0000 |
405 | mantb = add(mant, rnd) |
406 | c07f = lsr(mask, #8) |
407 | } |
408 | { |
409 | if(p0) expo = add(expo, #1) |
410 | if(p0) mant = #0 |
411 | mantb = lsr(mantb, #7) |
412 | c80.H = #0x8000 |
413 | } |
414 | { |
415 | ic = and(c80, mantah) |
416 | mask &= asl(expo, #23) |
417 | if(!p0) mant = and(mantb, c07f) |
418 | if(p2) jump .Lmax |
419 | } |
420 | { |
421 | ic += add(mask, mant) |
422 | jumpr r31 |
423 | } |
424 | .Lmax: |
425 | { |
426 | ic.L = #0xffff; |
427 | } |
428 | { |
429 | ic.H = #0x7f7f; |
430 | jumpr r31 |
431 | } |
432 | .Lmin: |
433 | { |
434 | ic = #0x0 |
435 | jumpr r31 |
436 | } |
437 | |
438 | /* ==================================================================== * |
439 | fast2_QDOUBLE fast2_f2qd(int ia) { |
440 | lint exp; |
441 | lint mant; |
442 | fast2_QDOUBLE c; |
443 | |
444 | mant = ((ia << 7) | 0x40000000)&0x7fffff80 ; |
445 | if (ia & 0x80000000) mant = -mant; |
446 | exp = ((ia >> 23) & 0xFFLL) - 126; |
447 | c = (mant<<32) | Q6_R_zxth_R(exp);; |
448 | return(c); |
449 | } |
450 | * ==================================================================== */ |
451 | .text |
452 | .global fast2_f2qd_asm |
453 | .type fast2_f2qd_asm, @function |
454 | fast2_f2qd_asm: |
455 | #define ia R0 |
456 | #define mag R3 |
457 | #define mantr R1 |
458 | #define expr R0 |
459 | #define zero R2 |
460 | #define maxneg R5:4 |
461 | #define maxnegl R4 |
462 | .falign |
463 | { |
464 | mantr = asl(ia, #7) |
465 | p0 = tstbit(ia, #31) |
466 | maxneg = #0 |
467 | mag = add(ia,ia) |
468 | } |
469 | { |
470 | mantr = setbit(mantr, #30) |
471 | expr= extractu(ia,#8,#23) |
472 | maxnegl.L = #0x8001 |
473 | p1 = cmp.eq(mag, #0) |
474 | } |
475 | { |
476 | mantr= extractu(mantr, #31, #0) |
477 | expr= add(expr, #-126) |
478 | zero = #0 |
479 | if(p1) jump .Lminqd |
480 | } |
481 | { |
482 | expr = zxth(expr) |
483 | if(p0) mantr= sub(zero, mantr) |
484 | jumpr r31 |
485 | } |
486 | .Lminqd: |
487 | { |
488 | R1:0 = maxneg |
489 | jumpr r31 |
490 | } |
491 | |