1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Linux/PA-RISC Project (http://www.parisc-linux.org/) |
4 | * |
5 | * Floating-point emulation code |
6 | * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> |
7 | */ |
8 | /* |
9 | * BEGIN_DESC |
10 | * |
11 | * File: |
12 | * @(#) pa/spmath/dfadd.c $Revision: 1.1 $ |
13 | * |
14 | * Purpose: |
15 | * Double_add: add two double precision values. |
16 | * |
17 | * External Interfaces: |
18 | * dbl_fadd(leftptr, rightptr, dstptr, status) |
19 | * |
20 | * Internal Interfaces: |
21 | * |
22 | * Theory: |
23 | * <<please update with a overview of the operation of this file>> |
24 | * |
25 | * END_DESC |
26 | */ |
27 | |
28 | |
29 | #include "float.h" |
30 | #include "dbl_float.h" |
31 | |
32 | /* |
33 | * Double_add: add two double precision values. |
34 | */ |
35 | dbl_fadd( |
36 | dbl_floating_point *leftptr, |
37 | dbl_floating_point *rightptr, |
38 | dbl_floating_point *dstptr, |
39 | unsigned int *status) |
40 | { |
41 | register unsigned int signless_upper_left, signless_upper_right, save; |
42 | register unsigned int leftp1, leftp2, rightp1, rightp2, extent; |
43 | register unsigned int resultp1 = 0, resultp2 = 0; |
44 | |
45 | register int result_exponent, right_exponent, diff_exponent; |
46 | register int sign_save, jumpsize; |
47 | register boolean inexact = FALSE; |
48 | register boolean underflowtrap; |
49 | |
50 | /* Create local copies of the numbers */ |
51 | Dbl_copyfromptr(leftptr,leftp1,leftp2); |
52 | Dbl_copyfromptr(rightptr,rightp1,rightp2); |
53 | |
54 | /* A zero "save" helps discover equal operands (for later), * |
55 | * and is used in swapping operands (if needed). */ |
56 | Dbl_xortointp1(leftp1,rightp1,/*to*/save); |
57 | |
58 | /* |
59 | * check first operand for NaN's or infinity |
60 | */ |
61 | if ((result_exponent = Dbl_exponent(leftp1)) == DBL_INFINITY_EXPONENT) |
62 | { |
63 | if (Dbl_iszero_mantissa(leftp1,leftp2)) |
64 | { |
65 | if (Dbl_isnotnan(rightp1,rightp2)) |
66 | { |
67 | if (Dbl_isinfinity(rightp1,rightp2) && save!=0) |
68 | { |
69 | /* |
70 | * invalid since operands are opposite signed infinity's |
71 | */ |
72 | if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); |
73 | Set_invalidflag(); |
74 | Dbl_makequietnan(resultp1,resultp2); |
75 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
76 | return(NOEXCEPTION); |
77 | } |
78 | /* |
79 | * return infinity |
80 | */ |
81 | Dbl_copytoptr(leftp1,leftp2,dstptr); |
82 | return(NOEXCEPTION); |
83 | } |
84 | } |
85 | else |
86 | { |
87 | /* |
88 | * is NaN; signaling or quiet? |
89 | */ |
90 | if (Dbl_isone_signaling(leftp1)) |
91 | { |
92 | /* trap if INVALIDTRAP enabled */ |
93 | if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); |
94 | /* make NaN quiet */ |
95 | Set_invalidflag(); |
96 | Dbl_set_quiet(leftp1); |
97 | } |
98 | /* |
99 | * is second operand a signaling NaN? |
100 | */ |
101 | else if (Dbl_is_signalingnan(rightp1)) |
102 | { |
103 | /* trap if INVALIDTRAP enabled */ |
104 | if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); |
105 | /* make NaN quiet */ |
106 | Set_invalidflag(); |
107 | Dbl_set_quiet(rightp1); |
108 | Dbl_copytoptr(rightp1,rightp2,dstptr); |
109 | return(NOEXCEPTION); |
110 | } |
111 | /* |
112 | * return quiet NaN |
113 | */ |
114 | Dbl_copytoptr(leftp1,leftp2,dstptr); |
115 | return(NOEXCEPTION); |
116 | } |
117 | } /* End left NaN or Infinity processing */ |
118 | /* |
119 | * check second operand for NaN's or infinity |
120 | */ |
121 | if (Dbl_isinfinity_exponent(rightp1)) |
122 | { |
123 | if (Dbl_iszero_mantissa(rightp1,rightp2)) |
124 | { |
125 | /* return infinity */ |
126 | Dbl_copytoptr(rightp1,rightp2,dstptr); |
127 | return(NOEXCEPTION); |
128 | } |
129 | /* |
130 | * is NaN; signaling or quiet? |
131 | */ |
132 | if (Dbl_isone_signaling(rightp1)) |
133 | { |
134 | /* trap if INVALIDTRAP enabled */ |
135 | if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); |
136 | /* make NaN quiet */ |
137 | Set_invalidflag(); |
138 | Dbl_set_quiet(rightp1); |
139 | } |
140 | /* |
141 | * return quiet NaN |
142 | */ |
143 | Dbl_copytoptr(rightp1,rightp2,dstptr); |
144 | return(NOEXCEPTION); |
145 | } /* End right NaN or Infinity processing */ |
146 | |
147 | /* Invariant: Must be dealing with finite numbers */ |
148 | |
149 | /* Compare operands by removing the sign */ |
150 | Dbl_copytoint_exponentmantissap1(leftp1,signless_upper_left); |
151 | Dbl_copytoint_exponentmantissap1(rightp1,signless_upper_right); |
152 | |
153 | /* sign difference selects add or sub operation. */ |
154 | if(Dbl_ismagnitudeless(leftp2,rightp2,signless_upper_left,signless_upper_right)) |
155 | { |
156 | /* Set the left operand to the larger one by XOR swap * |
157 | * First finish the first word using "save" */ |
158 | Dbl_xorfromintp1(save,rightp1,/*to*/rightp1); |
159 | Dbl_xorfromintp1(save,leftp1,/*to*/leftp1); |
160 | Dbl_swap_lower(leftp2,rightp2); |
161 | result_exponent = Dbl_exponent(leftp1); |
162 | } |
163 | /* Invariant: left is not smaller than right. */ |
164 | |
165 | if((right_exponent = Dbl_exponent(rightp1)) == 0) |
166 | { |
167 | /* Denormalized operands. First look for zeroes */ |
168 | if(Dbl_iszero_mantissa(rightp1,rightp2)) |
169 | { |
170 | /* right is zero */ |
171 | if(Dbl_iszero_exponentmantissa(leftp1,leftp2)) |
172 | { |
173 | /* Both operands are zeros */ |
174 | if(Is_rounding_mode(ROUNDMINUS)) |
175 | { |
176 | Dbl_or_signs(leftp1,/*with*/rightp1); |
177 | } |
178 | else |
179 | { |
180 | Dbl_and_signs(leftp1,/*with*/rightp1); |
181 | } |
182 | } |
183 | else |
184 | { |
185 | /* Left is not a zero and must be the result. Trapped |
186 | * underflows are signaled if left is denormalized. Result |
187 | * is always exact. */ |
188 | if( (result_exponent == 0) && Is_underflowtrap_enabled() ) |
189 | { |
190 | /* need to normalize results mantissa */ |
191 | sign_save = Dbl_signextendedsign(leftp1); |
192 | Dbl_leftshiftby1(leftp1,leftp2); |
193 | Dbl_normalize(leftp1,leftp2,result_exponent); |
194 | Dbl_set_sign(leftp1,/*using*/sign_save); |
195 | Dbl_setwrapped_exponent(leftp1,result_exponent,unfl); |
196 | Dbl_copytoptr(leftp1,leftp2,dstptr); |
197 | /* inexact = FALSE */ |
198 | return(UNDERFLOWEXCEPTION); |
199 | } |
200 | } |
201 | Dbl_copytoptr(leftp1,leftp2,dstptr); |
202 | return(NOEXCEPTION); |
203 | } |
204 | |
205 | /* Neither are zeroes */ |
206 | Dbl_clear_sign(rightp1); /* Exponent is already cleared */ |
207 | if(result_exponent == 0 ) |
208 | { |
209 | /* Both operands are denormalized. The result must be exact |
210 | * and is simply calculated. A sum could become normalized and a |
211 | * difference could cancel to a true zero. */ |
212 | if( (/*signed*/int) save < 0 ) |
213 | { |
214 | Dbl_subtract(leftp1,leftp2,/*minus*/rightp1,rightp2, |
215 | /*into*/resultp1,resultp2); |
216 | if(Dbl_iszero_mantissa(resultp1,resultp2)) |
217 | { |
218 | if(Is_rounding_mode(ROUNDMINUS)) |
219 | { |
220 | Dbl_setone_sign(resultp1); |
221 | } |
222 | else |
223 | { |
224 | Dbl_setzero_sign(resultp1); |
225 | } |
226 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
227 | return(NOEXCEPTION); |
228 | } |
229 | } |
230 | else |
231 | { |
232 | Dbl_addition(leftp1,leftp2,rightp1,rightp2, |
233 | /*into*/resultp1,resultp2); |
234 | if(Dbl_isone_hidden(resultp1)) |
235 | { |
236 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
237 | return(NOEXCEPTION); |
238 | } |
239 | } |
240 | if(Is_underflowtrap_enabled()) |
241 | { |
242 | /* need to normalize result */ |
243 | sign_save = Dbl_signextendedsign(resultp1); |
244 | Dbl_leftshiftby1(resultp1,resultp2); |
245 | Dbl_normalize(resultp1,resultp2,result_exponent); |
246 | Dbl_set_sign(resultp1,/*using*/sign_save); |
247 | Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); |
248 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
249 | /* inexact = FALSE */ |
250 | return(UNDERFLOWEXCEPTION); |
251 | } |
252 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
253 | return(NOEXCEPTION); |
254 | } |
255 | right_exponent = 1; /* Set exponent to reflect different bias |
256 | * with denormalized numbers. */ |
257 | } |
258 | else |
259 | { |
260 | Dbl_clear_signexponent_set_hidden(rightp1); |
261 | } |
262 | Dbl_clear_exponent_set_hidden(leftp1); |
263 | diff_exponent = result_exponent - right_exponent; |
264 | |
265 | /* |
266 | * Special case alignment of operands that would force alignment |
267 | * beyond the extent of the extension. A further optimization |
268 | * could special case this but only reduces the path length for this |
269 | * infrequent case. |
270 | */ |
271 | if(diff_exponent > DBL_THRESHOLD) |
272 | { |
273 | diff_exponent = DBL_THRESHOLD; |
274 | } |
275 | |
276 | /* Align right operand by shifting to right */ |
277 | Dbl_right_align(/*operand*/rightp1,rightp2,/*shifted by*/diff_exponent, |
278 | /*and lower to*/extent); |
279 | |
280 | /* Treat sum and difference of the operands separately. */ |
281 | if( (/*signed*/int) save < 0 ) |
282 | { |
283 | /* |
284 | * Difference of the two operands. Their can be no overflow. A |
285 | * borrow can occur out of the hidden bit and force a post |
286 | * normalization phase. |
287 | */ |
288 | Dbl_subtract_withextension(leftp1,leftp2,/*minus*/rightp1,rightp2, |
289 | /*with*/extent,/*into*/resultp1,resultp2); |
290 | if(Dbl_iszero_hidden(resultp1)) |
291 | { |
292 | /* Handle normalization */ |
293 | /* A straight forward algorithm would now shift the result |
294 | * and extension left until the hidden bit becomes one. Not |
295 | * all of the extension bits need participate in the shift. |
296 | * Only the two most significant bits (round and guard) are |
297 | * needed. If only a single shift is needed then the guard |
298 | * bit becomes a significant low order bit and the extension |
299 | * must participate in the rounding. If more than a single |
300 | * shift is needed, then all bits to the right of the guard |
301 | * bit are zeros, and the guard bit may or may not be zero. */ |
302 | sign_save = Dbl_signextendedsign(resultp1); |
303 | Dbl_leftshiftby1_withextent(resultp1,resultp2,extent,resultp1,resultp2); |
304 | |
305 | /* Need to check for a zero result. The sign and exponent |
306 | * fields have already been zeroed. The more efficient test |
307 | * of the full object can be used. |
308 | */ |
309 | if(Dbl_iszero(resultp1,resultp2)) |
310 | /* Must have been "x-x" or "x+(-x)". */ |
311 | { |
312 | if(Is_rounding_mode(ROUNDMINUS)) Dbl_setone_sign(resultp1); |
313 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
314 | return(NOEXCEPTION); |
315 | } |
316 | result_exponent--; |
317 | /* Look to see if normalization is finished. */ |
318 | if(Dbl_isone_hidden(resultp1)) |
319 | { |
320 | if(result_exponent==0) |
321 | { |
322 | /* Denormalized, exponent should be zero. Left operand * |
323 | * was normalized, so extent (guard, round) was zero */ |
324 | goto underflow; |
325 | } |
326 | else |
327 | { |
328 | /* No further normalization is needed. */ |
329 | Dbl_set_sign(resultp1,/*using*/sign_save); |
330 | Ext_leftshiftby1(extent); |
331 | goto round; |
332 | } |
333 | } |
334 | |
335 | /* Check for denormalized, exponent should be zero. Left * |
336 | * operand was normalized, so extent (guard, round) was zero */ |
337 | if(!(underflowtrap = Is_underflowtrap_enabled()) && |
338 | result_exponent==0) goto underflow; |
339 | |
340 | /* Shift extension to complete one bit of normalization and |
341 | * update exponent. */ |
342 | Ext_leftshiftby1(extent); |
343 | |
344 | /* Discover first one bit to determine shift amount. Use a |
345 | * modified binary search. We have already shifted the result |
346 | * one position right and still not found a one so the remainder |
347 | * of the extension must be zero and simplifies rounding. */ |
348 | /* Scan bytes */ |
349 | while(Dbl_iszero_hiddenhigh7mantissa(resultp1)) |
350 | { |
351 | Dbl_leftshiftby8(resultp1,resultp2); |
352 | if((result_exponent -= 8) <= 0 && !underflowtrap) |
353 | goto underflow; |
354 | } |
355 | /* Now narrow it down to the nibble */ |
356 | if(Dbl_iszero_hiddenhigh3mantissa(resultp1)) |
357 | { |
358 | /* The lower nibble contains the normalizing one */ |
359 | Dbl_leftshiftby4(resultp1,resultp2); |
360 | if((result_exponent -= 4) <= 0 && !underflowtrap) |
361 | goto underflow; |
362 | } |
363 | /* Select case were first bit is set (already normalized) |
364 | * otherwise select the proper shift. */ |
365 | if((jumpsize = Dbl_hiddenhigh3mantissa(resultp1)) > 7) |
366 | { |
367 | /* Already normalized */ |
368 | if(result_exponent <= 0) goto underflow; |
369 | Dbl_set_sign(resultp1,/*using*/sign_save); |
370 | Dbl_set_exponent(resultp1,/*using*/result_exponent); |
371 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
372 | return(NOEXCEPTION); |
373 | } |
374 | Dbl_sethigh4bits(resultp1,/*using*/sign_save); |
375 | switch(jumpsize) |
376 | { |
377 | case 1: |
378 | { |
379 | Dbl_leftshiftby3(resultp1,resultp2); |
380 | result_exponent -= 3; |
381 | break; |
382 | } |
383 | case 2: |
384 | case 3: |
385 | { |
386 | Dbl_leftshiftby2(resultp1,resultp2); |
387 | result_exponent -= 2; |
388 | break; |
389 | } |
390 | case 4: |
391 | case 5: |
392 | case 6: |
393 | case 7: |
394 | { |
395 | Dbl_leftshiftby1(resultp1,resultp2); |
396 | result_exponent -= 1; |
397 | break; |
398 | } |
399 | } |
400 | if(result_exponent > 0) |
401 | { |
402 | Dbl_set_exponent(resultp1,/*using*/result_exponent); |
403 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
404 | return(NOEXCEPTION); /* Sign bit is already set */ |
405 | } |
406 | /* Fixup potential underflows */ |
407 | underflow: |
408 | if(Is_underflowtrap_enabled()) |
409 | { |
410 | Dbl_set_sign(resultp1,sign_save); |
411 | Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); |
412 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
413 | /* inexact = FALSE */ |
414 | return(UNDERFLOWEXCEPTION); |
415 | } |
416 | /* |
417 | * Since we cannot get an inexact denormalized result, |
418 | * we can now return. |
419 | */ |
420 | Dbl_fix_overshift(resultp1,resultp2,(1-result_exponent),extent); |
421 | Dbl_clear_signexponent(resultp1); |
422 | Dbl_set_sign(resultp1,sign_save); |
423 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
424 | return(NOEXCEPTION); |
425 | } /* end if(hidden...)... */ |
426 | /* Fall through and round */ |
427 | } /* end if(save < 0)... */ |
428 | else |
429 | { |
430 | /* Add magnitudes */ |
431 | Dbl_addition(leftp1,leftp2,rightp1,rightp2,/*to*/resultp1,resultp2); |
432 | if(Dbl_isone_hiddenoverflow(resultp1)) |
433 | { |
434 | /* Prenormalization required. */ |
435 | Dbl_rightshiftby1_withextent(resultp2,extent,extent); |
436 | Dbl_arithrightshiftby1(resultp1,resultp2); |
437 | result_exponent++; |
438 | } /* end if hiddenoverflow... */ |
439 | } /* end else ...add magnitudes... */ |
440 | |
441 | /* Round the result. If the extension is all zeros,then the result is |
442 | * exact. Otherwise round in the correct direction. No underflow is |
443 | * possible. If a postnormalization is necessary, then the mantissa is |
444 | * all zeros so no shift is needed. */ |
445 | round: |
446 | if(Ext_isnotzero(extent)) |
447 | { |
448 | inexact = TRUE; |
449 | switch(Rounding_mode()) |
450 | { |
451 | case ROUNDNEAREST: /* The default. */ |
452 | if(Ext_isone_sign(extent)) |
453 | { |
454 | /* at least 1/2 ulp */ |
455 | if(Ext_isnotzero_lower(extent) || |
456 | Dbl_isone_lowmantissap2(resultp2)) |
457 | { |
458 | /* either exactly half way and odd or more than 1/2ulp */ |
459 | Dbl_increment(resultp1,resultp2); |
460 | } |
461 | } |
462 | break; |
463 | |
464 | case ROUNDPLUS: |
465 | if(Dbl_iszero_sign(resultp1)) |
466 | { |
467 | /* Round up positive results */ |
468 | Dbl_increment(resultp1,resultp2); |
469 | } |
470 | break; |
471 | |
472 | case ROUNDMINUS: |
473 | if(Dbl_isone_sign(resultp1)) |
474 | { |
475 | /* Round down negative results */ |
476 | Dbl_increment(resultp1,resultp2); |
477 | } |
478 | |
479 | case ROUNDZERO:; |
480 | /* truncate is simple */ |
481 | } /* end switch... */ |
482 | if(Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; |
483 | } |
484 | if(result_exponent == DBL_INFINITY_EXPONENT) |
485 | { |
486 | /* Overflow */ |
487 | if(Is_overflowtrap_enabled()) |
488 | { |
489 | Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); |
490 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
491 | if (inexact) |
492 | if (Is_inexacttrap_enabled()) |
493 | return(OVERFLOWEXCEPTION | INEXACTEXCEPTION); |
494 | else Set_inexactflag(); |
495 | return(OVERFLOWEXCEPTION); |
496 | } |
497 | else |
498 | { |
499 | inexact = TRUE; |
500 | Set_overflowflag(); |
501 | Dbl_setoverflow(resultp1,resultp2); |
502 | } |
503 | } |
504 | else Dbl_set_exponent(resultp1,result_exponent); |
505 | Dbl_copytoptr(resultp1,resultp2,dstptr); |
506 | if(inexact) |
507 | if(Is_inexacttrap_enabled()) |
508 | return(INEXACTEXCEPTION); |
509 | else Set_inexactflag(); |
510 | return(NOEXCEPTION); |
511 | } |
512 | |