fmpyfadd.c source code [linux/arch/parisc/math-emu/fmpyfadd.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Linux/PA-RISC Project (http://www.parisc-linux.org/)
4	*
5	* Floating-point emulation code
6	* Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
7	*/
8	/*
9	* BEGIN_DESC
10	*
11	* File:
12	* @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
13	*
14	* Purpose:
15	* Double Floating-point Multiply Fused Add
16	* Double Floating-point Multiply Negate Fused Add
17	* Single Floating-point Multiply Fused Add
18	* Single Floating-point Multiply Negate Fused Add
19	*
20	* External Interfaces:
21	* dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
22	* dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
23	* sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
24	* sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
25	*
26	* Internal Interfaces:
27	*
28	* Theory:
29	* <<please update with a overview of the operation of this file>>
30	*
31	* END_DESC
32	*/
33
34
35	#include "float.h"
36	#include "sgl_float.h"
37	#include "dbl_float.h"
38
39
40	/*
41	* Double Floating-point Multiply Fused Add
42	*/
43
44	int
45	dbl_fmpyfadd(
46	dbl_floating_point *src1ptr,
47	dbl_floating_point *src2ptr,
48	dbl_floating_point *src3ptr,
49	unsigned int *status,
50	dbl_floating_point *dstptr)
51	{
52	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
53	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
54	unsigned int rightp1, rightp2, rightp3, rightp4;
55	unsigned int resultp1, resultp2 = `0`, resultp3 = `0`, resultp4 = `0`;
56	register int mpy_exponent, add_exponent, count;
57	boolean inexact = FALSE, is_tiny = FALSE;
58
59	unsigned int signlessleft1, signlessright1, save;
60	register int result_exponent, diff_exponent;
61	int sign_save, jumpsize;
62
63	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
64	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
65	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
66
67	/*
68	* set sign bit of result of multiply
69	*/
70	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
71	Dbl_setnegativezerop1(resultp1);
72	else Dbl_setzerop1(resultp1);
73
74	/*
75	* Generate multiply exponent
76	*/
77	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
78
79	/*
80	* check first operand for NaN's or infinity
81	*/
82	if (Dbl_isinfinity_exponent(opnd1p1)) {
83	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
84	if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
85	Dbl_isnotnan(opnd3p1,opnd3p2)) {
86	if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
87	/*
88	* invalid since operands are infinity
89	* and zero
90	*/
91	if (Is_invalidtrap_enabled())
92	return(OPC_2E_INVALIDEXCEPTION);
93	Set_invalidflag();
94	Dbl_makequietnan(resultp1,resultp2);
95	Dbl_copytoptr(resultp1,resultp2,dstptr);
96	return(NOEXCEPTION);
97	}
98	/*
99	* Check third operand for infinity with a
100	* sign opposite of the multiply result
101	*/
102	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
103	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
104	/*
105	* invalid since attempting a magnitude
106	* subtraction of infinities
107	*/
108	if (Is_invalidtrap_enabled())
109	return(OPC_2E_INVALIDEXCEPTION);
110	Set_invalidflag();
111	Dbl_makequietnan(resultp1,resultp2);
112	Dbl_copytoptr(resultp1,resultp2,dstptr);
113	return(NOEXCEPTION);
114	}
115
116	/*
117	* return infinity
118	*/
119	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
120	Dbl_copytoptr(resultp1,resultp2,dstptr);
121	return(NOEXCEPTION);
122	}
123	}
124	else {
125	/*
126	* is NaN; signaling or quiet?
127	*/
128	if (Dbl_isone_signaling(opnd1p1)) {
129	/ trap if INVALIDTRAP enabled /
130	if (Is_invalidtrap_enabled())
131	return(OPC_2E_INVALIDEXCEPTION);
132	/ make NaN quiet /
133	Set_invalidflag();
134	Dbl_set_quiet(opnd1p1);
135	}
136	/*
137	* is second operand a signaling NaN?
138	*/
139	else if (Dbl_is_signalingnan(opnd2p1)) {
140	/ trap if INVALIDTRAP enabled /
141	if (Is_invalidtrap_enabled())
142	return(OPC_2E_INVALIDEXCEPTION);
143	/ make NaN quiet /
144	Set_invalidflag();
145	Dbl_set_quiet(opnd2p1);
146	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
147	return(NOEXCEPTION);
148	}
149	/*
150	* is third operand a signaling NaN?
151	*/
152	else if (Dbl_is_signalingnan(opnd3p1)) {
153	/ trap if INVALIDTRAP enabled /
154	if (Is_invalidtrap_enabled())
155	return(OPC_2E_INVALIDEXCEPTION);
156	/ make NaN quiet /
157	Set_invalidflag();
158	Dbl_set_quiet(opnd3p1);
159	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
160	return(NOEXCEPTION);
161	}
162	/*
163	* return quiet NaN
164	*/
165	Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
166	return(NOEXCEPTION);
167	}
168	}
169
170	/*
171	* check second operand for NaN's or infinity
172	*/
173	if (Dbl_isinfinity_exponent(opnd2p1)) {
174	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
175	if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
176	if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
177	/*
178	* invalid since multiply operands are
179	* zero & infinity
180	*/
181	if (Is_invalidtrap_enabled())
182	return(OPC_2E_INVALIDEXCEPTION);
183	Set_invalidflag();
184	Dbl_makequietnan(opnd2p1,opnd2p2);
185	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
186	return(NOEXCEPTION);
187	}
188
189	/*
190	* Check third operand for infinity with a
191	* sign opposite of the multiply result
192	*/
193	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
194	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
195	/*
196	* invalid since attempting a magnitude
197	* subtraction of infinities
198	*/
199	if (Is_invalidtrap_enabled())
200	return(OPC_2E_INVALIDEXCEPTION);
201	Set_invalidflag();
202	Dbl_makequietnan(resultp1,resultp2);
203	Dbl_copytoptr(resultp1,resultp2,dstptr);
204	return(NOEXCEPTION);
205	}
206
207	/*
208	* return infinity
209	*/
210	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
211	Dbl_copytoptr(resultp1,resultp2,dstptr);
212	return(NOEXCEPTION);
213	}
214	}
215	else {
216	/*
217	* is NaN; signaling or quiet?
218	*/
219	if (Dbl_isone_signaling(opnd2p1)) {
220	/ trap if INVALIDTRAP enabled /
221	if (Is_invalidtrap_enabled())
222	return(OPC_2E_INVALIDEXCEPTION);
223	/ make NaN quiet /
224	Set_invalidflag();
225	Dbl_set_quiet(opnd2p1);
226	}
227	/*
228	* is third operand a signaling NaN?
229	*/
230	else if (Dbl_is_signalingnan(opnd3p1)) {
231	/ trap if INVALIDTRAP enabled /
232	if (Is_invalidtrap_enabled())
233	return(OPC_2E_INVALIDEXCEPTION);
234	/ make NaN quiet /
235	Set_invalidflag();
236	Dbl_set_quiet(opnd3p1);
237	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
238	return(NOEXCEPTION);
239	}
240	/*
241	* return quiet NaN
242	*/
243	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
244	return(NOEXCEPTION);
245	}
246	}
247
248	/*
249	* check third operand for NaN's or infinity
250	*/
251	if (Dbl_isinfinity_exponent(opnd3p1)) {
252	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
253	/ return infinity /
254	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
255	return(NOEXCEPTION);
256	} else {
257	/*
258	* is NaN; signaling or quiet?
259	*/
260	if (Dbl_isone_signaling(opnd3p1)) {
261	/ trap if INVALIDTRAP enabled /
262	if (Is_invalidtrap_enabled())
263	return(OPC_2E_INVALIDEXCEPTION);
264	/ make NaN quiet /
265	Set_invalidflag();
266	Dbl_set_quiet(opnd3p1);
267	}
268	/*
269	* return quiet NaN
270	*/
271	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
272	return(NOEXCEPTION);
273	}
274	}
275
276	/*
277	* Generate multiply mantissa
278	*/
279	if (Dbl_isnotzero_exponent(opnd1p1)) {
280	/ set hidden bit /
281	Dbl_clear_signexponent_set_hidden(opnd1p1);
282	}
283	else {
284	/ check for zero /
285	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
286	/*
287	* Perform the add opnd3 with zero here.
288	*/
289	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
290	if (Is_rounding_mode(ROUNDMINUS)) {
291	Dbl_or_signs(opnd3p1,resultp1);
292	} else {
293	Dbl_and_signs(opnd3p1,resultp1);
294	}
295	}
296	/*
297	* Now let's check for trapped underflow case.
298	*/
299	else if (Dbl_iszero_exponent(opnd3p1) &&
300	Is_underflowtrap_enabled()) {
301	/ need to normalize results mantissa /
302	sign_save = Dbl_signextendedsign(opnd3p1);
303	result_exponent = `0`;
304	Dbl_leftshiftby1(opnd3p1,opnd3p2);
305	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
306	Dbl_set_sign(opnd3p1,/using/sign_save);
307	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
308	unfl);
309	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
310	/ inexact = FALSE /
311	return(OPC_2E_UNDERFLOWEXCEPTION);
312	}
313	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
314	return(NOEXCEPTION);
315	}
316	/ is denormalized, adjust exponent /
317	Dbl_clear_signexponent(opnd1p1);
318	Dbl_leftshiftby1(opnd1p1,opnd1p2);
319	Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
320	}
321	/ opnd2 needs to have hidden bit set with msb in hidden bit /
322	if (Dbl_isnotzero_exponent(opnd2p1)) {
323	Dbl_clear_signexponent_set_hidden(opnd2p1);
324	}
325	else {
326	/ check for zero /
327	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
328	/*
329	* Perform the add opnd3 with zero here.
330	*/
331	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
332	if (Is_rounding_mode(ROUNDMINUS)) {
333	Dbl_or_signs(opnd3p1,resultp1);
334	} else {
335	Dbl_and_signs(opnd3p1,resultp1);
336	}
337	}
338	/*
339	* Now let's check for trapped underflow case.
340	*/
341	else if (Dbl_iszero_exponent(opnd3p1) &&
342	Is_underflowtrap_enabled()) {
343	/ need to normalize results mantissa /
344	sign_save = Dbl_signextendedsign(opnd3p1);
345	result_exponent = `0`;
346	Dbl_leftshiftby1(opnd3p1,opnd3p2);
347	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
348	Dbl_set_sign(opnd3p1,/using/sign_save);
349	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
350	unfl);
351	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
352	/ inexact = FALSE /
353	return(OPC_2E_UNDERFLOWEXCEPTION);
354	}
355	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
356	return(NOEXCEPTION);
357	}
358	/ is denormalized; want to normalize /
359	Dbl_clear_signexponent(opnd2p1);
360	Dbl_leftshiftby1(opnd2p1,opnd2p2);
361	Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
362	}
363
364	/ Multiply the first two source mantissas together /
365
366	/*
367	* The intermediate result will be kept in tmpres,
368	* which needs enough room for 106 bits of mantissa,
369	* so lets call it a Double extended.
370	*/
371	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
372
373	/*
374	* Four bits at a time are inspected in each loop, and a
375	* simple shift and add multiply algorithm is used.
376	*/
377	for (count = DBL_P-`1`; count >= `0`; count -= `4`) {
378	Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
379	if (Dbit28p2(opnd1p2)) {
380	/ Fourword_add should be an ADD followed by 3 ADDC's /
381	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
382	opnd2p1<<`3` \| opnd2p2>>`29`, opnd2p2<<`3`, `0`, `0`);
383	}
384	if (Dbit29p2(opnd1p2)) {
385	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
386	opnd2p1<<`2` \| opnd2p2>>`30`, opnd2p2<<`2`, `0`, `0`);
387	}
388	if (Dbit30p2(opnd1p2)) {
389	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
390	opnd2p1<<`1` \| opnd2p2>>`31`, opnd2p2<<`1`, `0`, `0`);
391	}
392	if (Dbit31p2(opnd1p2)) {
393	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
394	opnd2p1, opnd2p2, `0`, `0`);
395	}
396	Dbl_rightshiftby4(opnd1p1,opnd1p2);
397	}
398	if (Is_dexthiddenoverflow(tmpresp1)) {
399	/ result mantissa >= 2 (mantissa overflow) /
400	mpy_exponent++;
401	Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
402	}
403
404	/*
405	* Restore the sign of the mpy result which was saved in resultp1.
406	* The exponent will continue to be kept in mpy_exponent.
407	*/
408	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
409
410	/*
411	* No rounding is required, since the result of the multiply
412	* is exact in the extended format.
413	*/
414
415	/*
416	* Now we are ready to perform the add portion of the operation.
417	*
418	* The exponents need to be kept as integers for now, since the
419	* multiply result might not fit into the exponent field. We
420	* can't overflow or underflow because of this yet, since the
421	* add could bring the final result back into range.
422	*/
423	add_exponent = Dbl_exponent(opnd3p1);
424
425	/*
426	* Check for denormalized or zero add operand.
427	*/
428	if (add_exponent == `0`) {
429	/ check for zero /
430	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
431	/ right is zero /
432	/ Left can't be zero and must be result.*
433	*
434	* The final result is now in tmpres and mpy_exponent,
435	* and needs to be rounded and squeezed back into
436	* double precision format from double extended.
437	*/
438	result_exponent = mpy_exponent;
439	Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
440	resultp1,resultp2,resultp3,resultp4);
441	sign_save = Dbl_signextendedsign(resultp1);/save sign/
442	goto round;
443	}
444
445	/*
446	* Neither are zeroes.
447	* Adjust exponent and normalize add operand.
448	*/
449	sign_save = Dbl_signextendedsign(opnd3p1); / save sign /
450	Dbl_clear_signexponent(opnd3p1);
451	Dbl_leftshiftby1(opnd3p1,opnd3p2);
452	Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
453	Dbl_set_sign(opnd3p1,sign_save); / restore sign /
454	} else {
455	Dbl_clear_exponent_set_hidden(opnd3p1);
456	}
457	/*
458	* Copy opnd3 to the double extended variable called right.
459	*/
460	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
461
462	/*
463	* A zero "save" helps discover equal operands (for later),
464	* and is used in swapping operands (if needed).
465	*/
466	Dblext_xortointp1(tmpresp1,rightp1,/to/save);
467
468	/*
469	* Compare magnitude of operands.
470	*/
471	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
472	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
473	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
474	Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
475	/*
476	* Set the left operand to the larger one by XOR swap.
477	* First finish the first word "save".
478	*/
479	Dblext_xorfromintp1(save,rightp1,/to/rightp1);
480	Dblext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
481	Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
482	rightp2,rightp3,rightp4);
483	/ also setup exponents used in rest of routine /
484	diff_exponent = add_exponent - mpy_exponent;
485	result_exponent = add_exponent;
486	} else {
487	/ also setup exponents used in rest of routine /
488	diff_exponent = mpy_exponent - add_exponent;
489	result_exponent = mpy_exponent;
490	}
491	/ Invariant: left is not smaller than right. /
492
493	/*
494	* Special case alignment of operands that would force alignment
495	* beyond the extent of the extension. A further optimization
496	* could special case this but only reduces the path length for
497	* this infrequent case.
498	*/
499	if (diff_exponent > DBLEXT_THRESHOLD) {
500	diff_exponent = DBLEXT_THRESHOLD;
501	}
502
503	/ Align right operand by shifting it to the right /
504	Dblext_clear_sign(rightp1);
505	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
506	/shifted by/diff_exponent);
507
508	/ Treat sum and difference of the operands separately. /
509	if ((int)save < `0`) {
510	/*
511	* Difference of the two operands. Overflow can occur if the
512	* multiply overflowed. A borrow can occur out of the hidden
513	* bit and force a post normalization phase.
514	*/
515	Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
516	rightp1,rightp2,rightp3,rightp4,
517	resultp1,resultp2,resultp3,resultp4);
518	sign_save = Dbl_signextendedsign(resultp1);
519	if (Dbl_iszero_hidden(resultp1)) {
520	/ Handle normalization /
521	/ A straightforward algorithm would now shift the*
522	* result and extension left until the hidden bit
523	* becomes one. Not all of the extension bits need
524	* participate in the shift. Only the two most
525	* significant bits (round and guard) are needed.
526	* If only a single shift is needed then the guard
527	* bit becomes a significant low order bit and the
528	* extension must participate in the rounding.
529	* If more than a single shift is needed, then all
530	* bits to the right of the guard bit are zeros,
531	* and the guard bit may or may not be zero. */
532	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
533	resultp4);
534
535	/ Need to check for a zero result. The sign and*
536	* exponent fields have already been zeroed. The more
537	* efficient test of the full object can be used.
538	*/
539	if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
540	/ Must have been "x-x" or "x+(-x)". /
541	if (Is_rounding_mode(ROUNDMINUS))
542	Dbl_setone_sign(resultp1);
543	Dbl_copytoptr(resultp1,resultp2,dstptr);
544	return(NOEXCEPTION);
545	}
546	result_exponent--;
547
548	/ Look to see if normalization is finished. /
549	if (Dbl_isone_hidden(resultp1)) {
550	/ No further normalization is needed /
551	goto round;
552	}
553
554	/ Discover first one bit to determine shift amount.*
555	* Use a modified binary search. We have already
556	* shifted the result one position right and still
557	* not found a one so the remainder of the extension
558	* must be zero and simplifies rounding. */
559	/ Scan bytes /
560	while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
561	Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
562	result_exponent -= `8`;
563	}
564	/ Now narrow it down to the nibble /
565	if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
566	/ The lower nibble contains the*
567	* normalizing one */
568	Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
569	result_exponent -= `4`;
570	}
571	/ Select case where first bit is set (already*
572	* normalized) otherwise select the proper shift. */
573	jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
574	if (jumpsize <= `7`) switch(jumpsize) {
575	case `1`:
576	Dblext_leftshiftby3(resultp1,resultp2,resultp3,
577	resultp4);
578	result_exponent -= `3`;
579	break;
580	case `2`:
581	case `3`:
582	Dblext_leftshiftby2(resultp1,resultp2,resultp3,
583	resultp4);
584	result_exponent -= `2`;
585	break;
586	case `4`:
587	case `5`:
588	case `6`:
589	case `7`:
590	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
591	resultp4);
592	result_exponent -= `1`;
593	break;
594	}
595	} / end if (hidden...)... /
596	/ Fall through and round /
597	} / end if (save < 0)... /
598	else {
599	/ Add magnitudes /
600	Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
601	rightp1,rightp2,rightp3,rightp4,
602	/to/resultp1,resultp2,resultp3,resultp4);
603	sign_save = Dbl_signextendedsign(resultp1);
604	if (Dbl_isone_hiddenoverflow(resultp1)) {
605	/ Prenormalization required. /
606	Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
607	resultp4);
608	result_exponent++;
609	} / end if hiddenoverflow... /
610	} / end else ...add magnitudes... /
611
612	/ Round the result. If the extension and lower two words are*
613	* all zeros, then the result is exact. Otherwise round in the
614	* correct direction. Underflow is possible. If a postnormalization
615	* is necessary, then the mantissa is all zeros so no shift is needed.
616	*/
617	round:
618	if (result_exponent <= `0` && !Is_underflowtrap_enabled()) {
619	Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
620	result_exponent,is_tiny);
621	}
622	Dbl_set_sign(resultp1,/using/sign_save);
623	if (Dblext_isnotzero_mantissap3(resultp3) \|\|
624	Dblext_isnotzero_mantissap4(resultp4)) {
625	inexact = TRUE;
626	switch(Rounding_mode()) {
627	case ROUNDNEAREST: / The default. /
628	if (Dblext_isone_highp3(resultp3)) {
629	/ at least 1/2 ulp /
630	if (Dblext_isnotzero_low31p3(resultp3) \|\|
631	Dblext_isnotzero_mantissap4(resultp4) \|\|
632	Dblext_isone_lowp2(resultp2)) {
633	/ either exactly half way and odd or*
634	* more than 1/2ulp */
635	Dbl_increment(resultp1,resultp2);
636	}
637	}
638	break;
639
640	case ROUNDPLUS:
641	if (Dbl_iszero_sign(resultp1)) {
642	/ Round up positive results /
643	Dbl_increment(resultp1,resultp2);
644	}
645	break;
646
647	case ROUNDMINUS:
648	if (Dbl_isone_sign(resultp1)) {
649	/ Round down negative results /
650	Dbl_increment(resultp1,resultp2);
651	}
652
653	case ROUNDZERO:;
654	/ truncate is simple /
655	} / end switch... /
656	if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
657	}
658	if (result_exponent >= DBL_INFINITY_EXPONENT) {
659	/ trap if OVERFLOWTRAP enabled /
660	if (Is_overflowtrap_enabled()) {
661	/*
662	* Adjust bias of result
663	*/
664	Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
665	Dbl_copytoptr(resultp1,resultp2,dstptr);
666	if (inexact)
667	if (Is_inexacttrap_enabled())
668	return (OPC_2E_OVERFLOWEXCEPTION \|
669	OPC_2E_INEXACTEXCEPTION);
670	else Set_inexactflag();
671	return (OPC_2E_OVERFLOWEXCEPTION);
672	}
673	inexact = TRUE;
674	Set_overflowflag();
675	/ set result to infinity or largest number /
676	Dbl_setoverflow(resultp1,resultp2);
677
678	} else if (result_exponent <= `0`) { / underflow case /
679	if (Is_underflowtrap_enabled()) {
680	/*
681	* Adjust bias of result
682	*/
683	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
684	Dbl_copytoptr(resultp1,resultp2,dstptr);
685	if (inexact)
686	if (Is_inexacttrap_enabled())
687	return (OPC_2E_UNDERFLOWEXCEPTION \|
688	OPC_2E_INEXACTEXCEPTION);
689	else Set_inexactflag();
690	return(OPC_2E_UNDERFLOWEXCEPTION);
691	}
692	else if (inexact && is_tiny) Set_underflowflag();
693	}
694	else Dbl_set_exponent(resultp1,result_exponent);
695	Dbl_copytoptr(resultp1,resultp2,dstptr);
696	if (inexact)
697	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
698	else Set_inexactflag();
699	return(NOEXCEPTION);
700	}
701
702	/*
703	* Double Floating-point Multiply Negate Fused Add
704	*/
705
706	dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
707
708	dbl_floating_point src1ptr, src2ptr, src3ptr, dstptr;
709	unsigned int *status;
710	{
711	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
712	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
713	unsigned int rightp1, rightp2, rightp3, rightp4;
714	unsigned int resultp1, resultp2 = `0`, resultp3 = `0`, resultp4 = `0`;
715	register int mpy_exponent, add_exponent, count;
716	boolean inexact = FALSE, is_tiny = FALSE;
717
718	unsigned int signlessleft1, signlessright1, save;
719	register int result_exponent, diff_exponent;
720	int sign_save, jumpsize;
721
722	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
723	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
724	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
725
726	/*
727	* set sign bit of result of multiply
728	*/
729	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
730	Dbl_setzerop1(resultp1);
731	else
732	Dbl_setnegativezerop1(resultp1);
733
734	/*
735	* Generate multiply exponent
736	*/
737	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
738
739	/*
740	* check first operand for NaN's or infinity
741	*/
742	if (Dbl_isinfinity_exponent(opnd1p1)) {
743	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
744	if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
745	Dbl_isnotnan(opnd3p1,opnd3p2)) {
746	if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
747	/*
748	* invalid since operands are infinity
749	* and zero
750	*/
751	if (Is_invalidtrap_enabled())
752	return(OPC_2E_INVALIDEXCEPTION);
753	Set_invalidflag();
754	Dbl_makequietnan(resultp1,resultp2);
755	Dbl_copytoptr(resultp1,resultp2,dstptr);
756	return(NOEXCEPTION);
757	}
758	/*
759	* Check third operand for infinity with a
760	* sign opposite of the multiply result
761	*/
762	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
763	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
764	/*
765	* invalid since attempting a magnitude
766	* subtraction of infinities
767	*/
768	if (Is_invalidtrap_enabled())
769	return(OPC_2E_INVALIDEXCEPTION);
770	Set_invalidflag();
771	Dbl_makequietnan(resultp1,resultp2);
772	Dbl_copytoptr(resultp1,resultp2,dstptr);
773	return(NOEXCEPTION);
774	}
775
776	/*
777	* return infinity
778	*/
779	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
780	Dbl_copytoptr(resultp1,resultp2,dstptr);
781	return(NOEXCEPTION);
782	}
783	}
784	else {
785	/*
786	* is NaN; signaling or quiet?
787	*/
788	if (Dbl_isone_signaling(opnd1p1)) {
789	/ trap if INVALIDTRAP enabled /
790	if (Is_invalidtrap_enabled())
791	return(OPC_2E_INVALIDEXCEPTION);
792	/ make NaN quiet /
793	Set_invalidflag();
794	Dbl_set_quiet(opnd1p1);
795	}
796	/*
797	* is second operand a signaling NaN?
798	*/
799	else if (Dbl_is_signalingnan(opnd2p1)) {
800	/ trap if INVALIDTRAP enabled /
801	if (Is_invalidtrap_enabled())
802	return(OPC_2E_INVALIDEXCEPTION);
803	/ make NaN quiet /
804	Set_invalidflag();
805	Dbl_set_quiet(opnd2p1);
806	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
807	return(NOEXCEPTION);
808	}
809	/*
810	* is third operand a signaling NaN?
811	*/
812	else if (Dbl_is_signalingnan(opnd3p1)) {
813	/ trap if INVALIDTRAP enabled /
814	if (Is_invalidtrap_enabled())
815	return(OPC_2E_INVALIDEXCEPTION);
816	/ make NaN quiet /
817	Set_invalidflag();
818	Dbl_set_quiet(opnd3p1);
819	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
820	return(NOEXCEPTION);
821	}
822	/*
823	* return quiet NaN
824	*/
825	Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
826	return(NOEXCEPTION);
827	}
828	}
829
830	/*
831	* check second operand for NaN's or infinity
832	*/
833	if (Dbl_isinfinity_exponent(opnd2p1)) {
834	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
835	if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
836	if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
837	/*
838	* invalid since multiply operands are
839	* zero & infinity
840	*/
841	if (Is_invalidtrap_enabled())
842	return(OPC_2E_INVALIDEXCEPTION);
843	Set_invalidflag();
844	Dbl_makequietnan(opnd2p1,opnd2p2);
845	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
846	return(NOEXCEPTION);
847	}
848
849	/*
850	* Check third operand for infinity with a
851	* sign opposite of the multiply result
852	*/
853	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
854	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
855	/*
856	* invalid since attempting a magnitude
857	* subtraction of infinities
858	*/
859	if (Is_invalidtrap_enabled())
860	return(OPC_2E_INVALIDEXCEPTION);
861	Set_invalidflag();
862	Dbl_makequietnan(resultp1,resultp2);
863	Dbl_copytoptr(resultp1,resultp2,dstptr);
864	return(NOEXCEPTION);
865	}
866
867	/*
868	* return infinity
869	*/
870	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
871	Dbl_copytoptr(resultp1,resultp2,dstptr);
872	return(NOEXCEPTION);
873	}
874	}
875	else {
876	/*
877	* is NaN; signaling or quiet?
878	*/
879	if (Dbl_isone_signaling(opnd2p1)) {
880	/ trap if INVALIDTRAP enabled /
881	if (Is_invalidtrap_enabled())
882	return(OPC_2E_INVALIDEXCEPTION);
883	/ make NaN quiet /
884	Set_invalidflag();
885	Dbl_set_quiet(opnd2p1);
886	}
887	/*
888	* is third operand a signaling NaN?
889	*/
890	else if (Dbl_is_signalingnan(opnd3p1)) {
891	/ trap if INVALIDTRAP enabled /
892	if (Is_invalidtrap_enabled())
893	return(OPC_2E_INVALIDEXCEPTION);
894	/ make NaN quiet /
895	Set_invalidflag();
896	Dbl_set_quiet(opnd3p1);
897	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
898	return(NOEXCEPTION);
899	}
900	/*
901	* return quiet NaN
902	*/
903	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
904	return(NOEXCEPTION);
905	}
906	}
907
908	/*
909	* check third operand for NaN's or infinity
910	*/
911	if (Dbl_isinfinity_exponent(opnd3p1)) {
912	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
913	/ return infinity /
914	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
915	return(NOEXCEPTION);
916	} else {
917	/*
918	* is NaN; signaling or quiet?
919	*/
920	if (Dbl_isone_signaling(opnd3p1)) {
921	/ trap if INVALIDTRAP enabled /
922	if (Is_invalidtrap_enabled())
923	return(OPC_2E_INVALIDEXCEPTION);
924	/ make NaN quiet /
925	Set_invalidflag();
926	Dbl_set_quiet(opnd3p1);
927	}
928	/*
929	* return quiet NaN
930	*/
931	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
932	return(NOEXCEPTION);
933	}
934	}
935
936	/*
937	* Generate multiply mantissa
938	*/
939	if (Dbl_isnotzero_exponent(opnd1p1)) {
940	/ set hidden bit /
941	Dbl_clear_signexponent_set_hidden(opnd1p1);
942	}
943	else {
944	/ check for zero /
945	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
946	/*
947	* Perform the add opnd3 with zero here.
948	*/
949	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
950	if (Is_rounding_mode(ROUNDMINUS)) {
951	Dbl_or_signs(opnd3p1,resultp1);
952	} else {
953	Dbl_and_signs(opnd3p1,resultp1);
954	}
955	}
956	/*
957	* Now let's check for trapped underflow case.
958	*/
959	else if (Dbl_iszero_exponent(opnd3p1) &&
960	Is_underflowtrap_enabled()) {
961	/ need to normalize results mantissa /
962	sign_save = Dbl_signextendedsign(opnd3p1);
963	result_exponent = `0`;
964	Dbl_leftshiftby1(opnd3p1,opnd3p2);
965	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
966	Dbl_set_sign(opnd3p1,/using/sign_save);
967	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
968	unfl);
969	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
970	/ inexact = FALSE /
971	return(OPC_2E_UNDERFLOWEXCEPTION);
972	}
973	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
974	return(NOEXCEPTION);
975	}
976	/ is denormalized, adjust exponent /
977	Dbl_clear_signexponent(opnd1p1);
978	Dbl_leftshiftby1(opnd1p1,opnd1p2);
979	Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
980	}
981	/ opnd2 needs to have hidden bit set with msb in hidden bit /
982	if (Dbl_isnotzero_exponent(opnd2p1)) {
983	Dbl_clear_signexponent_set_hidden(opnd2p1);
984	}
985	else {
986	/ check for zero /
987	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
988	/*
989	* Perform the add opnd3 with zero here.
990	*/
991	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
992	if (Is_rounding_mode(ROUNDMINUS)) {
993	Dbl_or_signs(opnd3p1,resultp1);
994	} else {
995	Dbl_and_signs(opnd3p1,resultp1);
996	}
997	}
998	/*
999	* Now let's check for trapped underflow case.
1000	*/
1001	else if (Dbl_iszero_exponent(opnd3p1) &&
1002	Is_underflowtrap_enabled()) {
1003	/ need to normalize results mantissa /
1004	sign_save = Dbl_signextendedsign(opnd3p1);
1005	result_exponent = `0`;
1006	Dbl_leftshiftby1(opnd3p1,opnd3p2);
1007	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1008	Dbl_set_sign(opnd3p1,/using/sign_save);
1009	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1010	unfl);
1011	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1012	/ inexact = FALSE /
1013	return(OPC_2E_UNDERFLOWEXCEPTION);
1014	}
1015	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1016	return(NOEXCEPTION);
1017	}
1018	/ is denormalized; want to normalize /
1019	Dbl_clear_signexponent(opnd2p1);
1020	Dbl_leftshiftby1(opnd2p1,opnd2p2);
1021	Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1022	}
1023
1024	/ Multiply the first two source mantissas together /
1025
1026	/*
1027	* The intermediate result will be kept in tmpres,
1028	* which needs enough room for 106 bits of mantissa,
1029	* so lets call it a Double extended.
1030	*/
1031	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1032
1033	/*
1034	* Four bits at a time are inspected in each loop, and a
1035	* simple shift and add multiply algorithm is used.
1036	*/
1037	for (count = DBL_P-`1`; count >= `0`; count -= `4`) {
1038	Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1039	if (Dbit28p2(opnd1p2)) {
1040	/ Fourword_add should be an ADD followed by 3 ADDC's /
1041	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1042	opnd2p1<<`3` \| opnd2p2>>`29`, opnd2p2<<`3`, `0`, `0`);
1043	}
1044	if (Dbit29p2(opnd1p2)) {
1045	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1046	opnd2p1<<`2` \| opnd2p2>>`30`, opnd2p2<<`2`, `0`, `0`);
1047	}
1048	if (Dbit30p2(opnd1p2)) {
1049	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1050	opnd2p1<<`1` \| opnd2p2>>`31`, opnd2p2<<`1`, `0`, `0`);
1051	}
1052	if (Dbit31p2(opnd1p2)) {
1053	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1054	opnd2p1, opnd2p2, `0`, `0`);
1055	}
1056	Dbl_rightshiftby4(opnd1p1,opnd1p2);
1057	}
1058	if (Is_dexthiddenoverflow(tmpresp1)) {
1059	/ result mantissa >= 2 (mantissa overflow) /
1060	mpy_exponent++;
1061	Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1062	}
1063
1064	/*
1065	* Restore the sign of the mpy result which was saved in resultp1.
1066	* The exponent will continue to be kept in mpy_exponent.
1067	*/
1068	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1069
1070	/*
1071	* No rounding is required, since the result of the multiply
1072	* is exact in the extended format.
1073	*/
1074
1075	/*
1076	* Now we are ready to perform the add portion of the operation.
1077	*
1078	* The exponents need to be kept as integers for now, since the
1079	* multiply result might not fit into the exponent field. We
1080	* can't overflow or underflow because of this yet, since the
1081	* add could bring the final result back into range.
1082	*/
1083	add_exponent = Dbl_exponent(opnd3p1);
1084
1085	/*
1086	* Check for denormalized or zero add operand.
1087	*/
1088	if (add_exponent == `0`) {
1089	/ check for zero /
1090	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1091	/ right is zero /
1092	/ Left can't be zero and must be result.*
1093	*
1094	* The final result is now in tmpres and mpy_exponent,
1095	* and needs to be rounded and squeezed back into
1096	* double precision format from double extended.
1097	*/
1098	result_exponent = mpy_exponent;
1099	Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1100	resultp1,resultp2,resultp3,resultp4);
1101	sign_save = Dbl_signextendedsign(resultp1);/save sign/
1102	goto round;
1103	}
1104
1105	/*
1106	* Neither are zeroes.
1107	* Adjust exponent and normalize add operand.
1108	*/
1109	sign_save = Dbl_signextendedsign(opnd3p1); / save sign /
1110	Dbl_clear_signexponent(opnd3p1);
1111	Dbl_leftshiftby1(opnd3p1,opnd3p2);
1112	Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1113	Dbl_set_sign(opnd3p1,sign_save); / restore sign /
1114	} else {
1115	Dbl_clear_exponent_set_hidden(opnd3p1);
1116	}
1117	/*
1118	* Copy opnd3 to the double extended variable called right.
1119	*/
1120	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1121
1122	/*
1123	* A zero "save" helps discover equal operands (for later),
1124	* and is used in swapping operands (if needed).
1125	*/
1126	Dblext_xortointp1(tmpresp1,rightp1,/to/save);
1127
1128	/*
1129	* Compare magnitude of operands.
1130	*/
1131	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1132	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1133	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
1134	Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1135	/*
1136	* Set the left operand to the larger one by XOR swap.
1137	* First finish the first word "save".
1138	*/
1139	Dblext_xorfromintp1(save,rightp1,/to/rightp1);
1140	Dblext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
1141	Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1142	rightp2,rightp3,rightp4);
1143	/ also setup exponents used in rest of routine /
1144	diff_exponent = add_exponent - mpy_exponent;
1145	result_exponent = add_exponent;
1146	} else {
1147	/ also setup exponents used in rest of routine /
1148	diff_exponent = mpy_exponent - add_exponent;
1149	result_exponent = mpy_exponent;
1150	}
1151	/ Invariant: left is not smaller than right. /
1152
1153	/*
1154	* Special case alignment of operands that would force alignment
1155	* beyond the extent of the extension. A further optimization
1156	* could special case this but only reduces the path length for
1157	* this infrequent case.
1158	*/
1159	if (diff_exponent > DBLEXT_THRESHOLD) {
1160	diff_exponent = DBLEXT_THRESHOLD;
1161	}
1162
1163	/ Align right operand by shifting it to the right /
1164	Dblext_clear_sign(rightp1);
1165	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1166	/shifted by/diff_exponent);
1167
1168	/ Treat sum and difference of the operands separately. /
1169	if ((int)save < `0`) {
1170	/*
1171	* Difference of the two operands. Overflow can occur if the
1172	* multiply overflowed. A borrow can occur out of the hidden
1173	* bit and force a post normalization phase.
1174	*/
1175	Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1176	rightp1,rightp2,rightp3,rightp4,
1177	resultp1,resultp2,resultp3,resultp4);
1178	sign_save = Dbl_signextendedsign(resultp1);
1179	if (Dbl_iszero_hidden(resultp1)) {
1180	/ Handle normalization /
1181	/ A straightforward algorithm would now shift the*
1182	* result and extension left until the hidden bit
1183	* becomes one. Not all of the extension bits need
1184	* participate in the shift. Only the two most
1185	* significant bits (round and guard) are needed.
1186	* If only a single shift is needed then the guard
1187	* bit becomes a significant low order bit and the
1188	* extension must participate in the rounding.
1189	* If more than a single shift is needed, then all
1190	* bits to the right of the guard bit are zeros,
1191	* and the guard bit may or may not be zero. */
1192	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1193	resultp4);
1194
1195	/ Need to check for a zero result. The sign and*
1196	* exponent fields have already been zeroed. The more
1197	* efficient test of the full object can be used.
1198	*/
1199	if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1200	/ Must have been "x-x" or "x+(-x)". /
1201	if (Is_rounding_mode(ROUNDMINUS))
1202	Dbl_setone_sign(resultp1);
1203	Dbl_copytoptr(resultp1,resultp2,dstptr);
1204	return(NOEXCEPTION);
1205	}
1206	result_exponent--;
1207
1208	/ Look to see if normalization is finished. /
1209	if (Dbl_isone_hidden(resultp1)) {
1210	/ No further normalization is needed /
1211	goto round;
1212	}
1213
1214	/ Discover first one bit to determine shift amount.*
1215	* Use a modified binary search. We have already
1216	* shifted the result one position right and still
1217	* not found a one so the remainder of the extension
1218	* must be zero and simplifies rounding. */
1219	/ Scan bytes /
1220	while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1221	Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1222	result_exponent -= `8`;
1223	}
1224	/ Now narrow it down to the nibble /
1225	if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1226	/ The lower nibble contains the*
1227	* normalizing one */
1228	Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1229	result_exponent -= `4`;
1230	}
1231	/ Select case where first bit is set (already*
1232	* normalized) otherwise select the proper shift. */
1233	jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1234	if (jumpsize <= `7`) switch(jumpsize) {
1235	case `1`:
1236	Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1237	resultp4);
1238	result_exponent -= `3`;
1239	break;
1240	case `2`:
1241	case `3`:
1242	Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1243	resultp4);
1244	result_exponent -= `2`;
1245	break;
1246	case `4`:
1247	case `5`:
1248	case `6`:
1249	case `7`:
1250	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1251	resultp4);
1252	result_exponent -= `1`;
1253	break;
1254	}
1255	} / end if (hidden...)... /
1256	/ Fall through and round /
1257	} / end if (save < 0)... /
1258	else {
1259	/ Add magnitudes /
1260	Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1261	rightp1,rightp2,rightp3,rightp4,
1262	/to/resultp1,resultp2,resultp3,resultp4);
1263	sign_save = Dbl_signextendedsign(resultp1);
1264	if (Dbl_isone_hiddenoverflow(resultp1)) {
1265	/ Prenormalization required. /
1266	Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1267	resultp4);
1268	result_exponent++;
1269	} / end if hiddenoverflow... /
1270	} / end else ...add magnitudes... /
1271
1272	/ Round the result. If the extension and lower two words are*
1273	* all zeros, then the result is exact. Otherwise round in the
1274	* correct direction. Underflow is possible. If a postnormalization
1275	* is necessary, then the mantissa is all zeros so no shift is needed.
1276	*/
1277	round:
1278	if (result_exponent <= `0` && !Is_underflowtrap_enabled()) {
1279	Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1280	result_exponent,is_tiny);
1281	}
1282	Dbl_set_sign(resultp1,/using/sign_save);
1283	if (Dblext_isnotzero_mantissap3(resultp3) \|\|
1284	Dblext_isnotzero_mantissap4(resultp4)) {
1285	inexact = TRUE;
1286	switch(Rounding_mode()) {
1287	case ROUNDNEAREST: / The default. /
1288	if (Dblext_isone_highp3(resultp3)) {
1289	/ at least 1/2 ulp /
1290	if (Dblext_isnotzero_low31p3(resultp3) \|\|
1291	Dblext_isnotzero_mantissap4(resultp4) \|\|
1292	Dblext_isone_lowp2(resultp2)) {
1293	/ either exactly half way and odd or*
1294	* more than 1/2ulp */
1295	Dbl_increment(resultp1,resultp2);
1296	}
1297	}
1298	break;
1299
1300	case ROUNDPLUS:
1301	if (Dbl_iszero_sign(resultp1)) {
1302	/ Round up positive results /
1303	Dbl_increment(resultp1,resultp2);
1304	}
1305	break;
1306
1307	case ROUNDMINUS:
1308	if (Dbl_isone_sign(resultp1)) {
1309	/ Round down negative results /
1310	Dbl_increment(resultp1,resultp2);
1311	}
1312
1313	case ROUNDZERO:;
1314	/ truncate is simple /
1315	} / end switch... /
1316	if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1317	}
1318	if (result_exponent >= DBL_INFINITY_EXPONENT) {
1319	/ Overflow /
1320	if (Is_overflowtrap_enabled()) {
1321	/*
1322	* Adjust bias of result
1323	*/
1324	Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1325	Dbl_copytoptr(resultp1,resultp2,dstptr);
1326	if (inexact)
1327	if (Is_inexacttrap_enabled())
1328	return (OPC_2E_OVERFLOWEXCEPTION \|
1329	OPC_2E_INEXACTEXCEPTION);
1330	else Set_inexactflag();
1331	return (OPC_2E_OVERFLOWEXCEPTION);
1332	}
1333	inexact = TRUE;
1334	Set_overflowflag();
1335	Dbl_setoverflow(resultp1,resultp2);
1336	} else if (result_exponent <= `0`) { / underflow case /
1337	if (Is_underflowtrap_enabled()) {
1338	/*
1339	* Adjust bias of result
1340	*/
1341	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1342	Dbl_copytoptr(resultp1,resultp2,dstptr);
1343	if (inexact)
1344	if (Is_inexacttrap_enabled())
1345	return (OPC_2E_UNDERFLOWEXCEPTION \|
1346	OPC_2E_INEXACTEXCEPTION);
1347	else Set_inexactflag();
1348	return(OPC_2E_UNDERFLOWEXCEPTION);
1349	}
1350	else if (inexact && is_tiny) Set_underflowflag();
1351	}
1352	else Dbl_set_exponent(resultp1,result_exponent);
1353	Dbl_copytoptr(resultp1,resultp2,dstptr);
1354	if (inexact)
1355	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1356	else Set_inexactflag();
1357	return(NOEXCEPTION);
1358	}
1359
1360	/*
1361	* Single Floating-point Multiply Fused Add
1362	*/
1363
1364	sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1365
1366	sgl_floating_point src1ptr, src2ptr, src3ptr, dstptr;
1367	unsigned int *status;
1368	{
1369	unsigned int opnd1, opnd2, opnd3;
1370	register unsigned int tmpresp1, tmpresp2;
1371	unsigned int rightp1, rightp2;
1372	unsigned int resultp1, resultp2 = `0`;
1373	register int mpy_exponent, add_exponent, count;
1374	boolean inexact = FALSE, is_tiny = FALSE;
1375
1376	unsigned int signlessleft1, signlessright1, save;
1377	register int result_exponent, diff_exponent;
1378	int sign_save, jumpsize;
1379
1380	Sgl_copyfromptr(src1ptr,opnd1);
1381	Sgl_copyfromptr(src2ptr,opnd2);
1382	Sgl_copyfromptr(src3ptr,opnd3);
1383
1384	/*
1385	* set sign bit of result of multiply
1386	*/
1387	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1388	Sgl_setnegativezero(resultp1);
1389	else Sgl_setzero(resultp1);
1390
1391	/*
1392	* Generate multiply exponent
1393	*/
1394	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1395
1396	/*
1397	* check first operand for NaN's or infinity
1398	*/
1399	if (Sgl_isinfinity_exponent(opnd1)) {
1400	if (Sgl_iszero_mantissa(opnd1)) {
1401	if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1402	if (Sgl_iszero_exponentmantissa(opnd2)) {
1403	/*
1404	* invalid since operands are infinity
1405	* and zero
1406	*/
1407	if (Is_invalidtrap_enabled())
1408	return(OPC_2E_INVALIDEXCEPTION);
1409	Set_invalidflag();
1410	Sgl_makequietnan(resultp1);
1411	Sgl_copytoptr(resultp1,dstptr);
1412	return(NOEXCEPTION);
1413	}
1414	/*
1415	* Check third operand for infinity with a
1416	* sign opposite of the multiply result
1417	*/
1418	if (Sgl_isinfinity(opnd3) &&
1419	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1420	/*
1421	* invalid since attempting a magnitude
1422	* subtraction of infinities
1423	*/
1424	if (Is_invalidtrap_enabled())
1425	return(OPC_2E_INVALIDEXCEPTION);
1426	Set_invalidflag();
1427	Sgl_makequietnan(resultp1);
1428	Sgl_copytoptr(resultp1,dstptr);
1429	return(NOEXCEPTION);
1430	}
1431
1432	/*
1433	* return infinity
1434	*/
1435	Sgl_setinfinity_exponentmantissa(resultp1);
1436	Sgl_copytoptr(resultp1,dstptr);
1437	return(NOEXCEPTION);
1438	}
1439	}
1440	else {
1441	/*
1442	* is NaN; signaling or quiet?
1443	*/
1444	if (Sgl_isone_signaling(opnd1)) {
1445	/ trap if INVALIDTRAP enabled /
1446	if (Is_invalidtrap_enabled())
1447	return(OPC_2E_INVALIDEXCEPTION);
1448	/ make NaN quiet /
1449	Set_invalidflag();
1450	Sgl_set_quiet(opnd1);
1451	}
1452	/*
1453	* is second operand a signaling NaN?
1454	*/
1455	else if (Sgl_is_signalingnan(opnd2)) {
1456	/ trap if INVALIDTRAP enabled /
1457	if (Is_invalidtrap_enabled())
1458	return(OPC_2E_INVALIDEXCEPTION);
1459	/ make NaN quiet /
1460	Set_invalidflag();
1461	Sgl_set_quiet(opnd2);
1462	Sgl_copytoptr(opnd2,dstptr);
1463	return(NOEXCEPTION);
1464	}
1465	/*
1466	* is third operand a signaling NaN?
1467	*/
1468	else if (Sgl_is_signalingnan(opnd3)) {
1469	/ trap if INVALIDTRAP enabled /
1470	if (Is_invalidtrap_enabled())
1471	return(OPC_2E_INVALIDEXCEPTION);
1472	/ make NaN quiet /
1473	Set_invalidflag();
1474	Sgl_set_quiet(opnd3);
1475	Sgl_copytoptr(opnd3,dstptr);
1476	return(NOEXCEPTION);
1477	}
1478	/*
1479	* return quiet NaN
1480	*/
1481	Sgl_copytoptr(opnd1,dstptr);
1482	return(NOEXCEPTION);
1483	}
1484	}
1485
1486	/*
1487	* check second operand for NaN's or infinity
1488	*/
1489	if (Sgl_isinfinity_exponent(opnd2)) {
1490	if (Sgl_iszero_mantissa(opnd2)) {
1491	if (Sgl_isnotnan(opnd3)) {
1492	if (Sgl_iszero_exponentmantissa(opnd1)) {
1493	/*
1494	* invalid since multiply operands are
1495	* zero & infinity
1496	*/
1497	if (Is_invalidtrap_enabled())
1498	return(OPC_2E_INVALIDEXCEPTION);
1499	Set_invalidflag();
1500	Sgl_makequietnan(opnd2);
1501	Sgl_copytoptr(opnd2,dstptr);
1502	return(NOEXCEPTION);
1503	}
1504
1505	/*
1506	* Check third operand for infinity with a
1507	* sign opposite of the multiply result
1508	*/
1509	if (Sgl_isinfinity(opnd3) &&
1510	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1511	/*
1512	* invalid since attempting a magnitude
1513	* subtraction of infinities
1514	*/
1515	if (Is_invalidtrap_enabled())
1516	return(OPC_2E_INVALIDEXCEPTION);
1517	Set_invalidflag();
1518	Sgl_makequietnan(resultp1);
1519	Sgl_copytoptr(resultp1,dstptr);
1520	return(NOEXCEPTION);
1521	}
1522
1523	/*
1524	* return infinity
1525	*/
1526	Sgl_setinfinity_exponentmantissa(resultp1);
1527	Sgl_copytoptr(resultp1,dstptr);
1528	return(NOEXCEPTION);
1529	}
1530	}
1531	else {
1532	/*
1533	* is NaN; signaling or quiet?
1534	*/
1535	if (Sgl_isone_signaling(opnd2)) {
1536	/ trap if INVALIDTRAP enabled /
1537	if (Is_invalidtrap_enabled())
1538	return(OPC_2E_INVALIDEXCEPTION);
1539	/ make NaN quiet /
1540	Set_invalidflag();
1541	Sgl_set_quiet(opnd2);
1542	}
1543	/*
1544	* is third operand a signaling NaN?
1545	*/
1546	else if (Sgl_is_signalingnan(opnd3)) {
1547	/ trap if INVALIDTRAP enabled /
1548	if (Is_invalidtrap_enabled())
1549	return(OPC_2E_INVALIDEXCEPTION);
1550	/ make NaN quiet /
1551	Set_invalidflag();
1552	Sgl_set_quiet(opnd3);
1553	Sgl_copytoptr(opnd3,dstptr);
1554	return(NOEXCEPTION);
1555	}
1556	/*
1557	* return quiet NaN
1558	*/
1559	Sgl_copytoptr(opnd2,dstptr);
1560	return(NOEXCEPTION);
1561	}
1562	}
1563
1564	/*
1565	* check third operand for NaN's or infinity
1566	*/
1567	if (Sgl_isinfinity_exponent(opnd3)) {
1568	if (Sgl_iszero_mantissa(opnd3)) {
1569	/ return infinity /
1570	Sgl_copytoptr(opnd3,dstptr);
1571	return(NOEXCEPTION);
1572	} else {
1573	/*
1574	* is NaN; signaling or quiet?
1575	*/
1576	if (Sgl_isone_signaling(opnd3)) {
1577	/ trap if INVALIDTRAP enabled /
1578	if (Is_invalidtrap_enabled())
1579	return(OPC_2E_INVALIDEXCEPTION);
1580	/ make NaN quiet /
1581	Set_invalidflag();
1582	Sgl_set_quiet(opnd3);
1583	}
1584	/*
1585	* return quiet NaN
1586	*/
1587	Sgl_copytoptr(opnd3,dstptr);
1588	return(NOEXCEPTION);
1589	}
1590	}
1591
1592	/*
1593	* Generate multiply mantissa
1594	*/
1595	if (Sgl_isnotzero_exponent(opnd1)) {
1596	/ set hidden bit /
1597	Sgl_clear_signexponent_set_hidden(opnd1);
1598	}
1599	else {
1600	/ check for zero /
1601	if (Sgl_iszero_mantissa(opnd1)) {
1602	/*
1603	* Perform the add opnd3 with zero here.
1604	*/
1605	if (Sgl_iszero_exponentmantissa(opnd3)) {
1606	if (Is_rounding_mode(ROUNDMINUS)) {
1607	Sgl_or_signs(opnd3,resultp1);
1608	} else {
1609	Sgl_and_signs(opnd3,resultp1);
1610	}
1611	}
1612	/*
1613	* Now let's check for trapped underflow case.
1614	*/
1615	else if (Sgl_iszero_exponent(opnd3) &&
1616	Is_underflowtrap_enabled()) {
1617	/ need to normalize results mantissa /
1618	sign_save = Sgl_signextendedsign(opnd3);
1619	result_exponent = `0`;
1620	Sgl_leftshiftby1(opnd3);
1621	Sgl_normalize(opnd3,result_exponent);
1622	Sgl_set_sign(opnd3,/using/sign_save);
1623	Sgl_setwrapped_exponent(opnd3,result_exponent,
1624	unfl);
1625	Sgl_copytoptr(opnd3,dstptr);
1626	/ inexact = FALSE /
1627	return(OPC_2E_UNDERFLOWEXCEPTION);
1628	}
1629	Sgl_copytoptr(opnd3,dstptr);
1630	return(NOEXCEPTION);
1631	}
1632	/ is denormalized, adjust exponent /
1633	Sgl_clear_signexponent(opnd1);
1634	Sgl_leftshiftby1(opnd1);
1635	Sgl_normalize(opnd1,mpy_exponent);
1636	}
1637	/ opnd2 needs to have hidden bit set with msb in hidden bit /
1638	if (Sgl_isnotzero_exponent(opnd2)) {
1639	Sgl_clear_signexponent_set_hidden(opnd2);
1640	}
1641	else {
1642	/ check for zero /
1643	if (Sgl_iszero_mantissa(opnd2)) {
1644	/*
1645	* Perform the add opnd3 with zero here.
1646	*/
1647	if (Sgl_iszero_exponentmantissa(opnd3)) {
1648	if (Is_rounding_mode(ROUNDMINUS)) {
1649	Sgl_or_signs(opnd3,resultp1);
1650	} else {
1651	Sgl_and_signs(opnd3,resultp1);
1652	}
1653	}
1654	/*
1655	* Now let's check for trapped underflow case.
1656	*/
1657	else if (Sgl_iszero_exponent(opnd3) &&
1658	Is_underflowtrap_enabled()) {
1659	/ need to normalize results mantissa /
1660	sign_save = Sgl_signextendedsign(opnd3);
1661	result_exponent = `0`;
1662	Sgl_leftshiftby1(opnd3);
1663	Sgl_normalize(opnd3,result_exponent);
1664	Sgl_set_sign(opnd3,/using/sign_save);
1665	Sgl_setwrapped_exponent(opnd3,result_exponent,
1666	unfl);
1667	Sgl_copytoptr(opnd3,dstptr);
1668	/ inexact = FALSE /
1669	return(OPC_2E_UNDERFLOWEXCEPTION);
1670	}
1671	Sgl_copytoptr(opnd3,dstptr);
1672	return(NOEXCEPTION);
1673	}
1674	/ is denormalized; want to normalize /
1675	Sgl_clear_signexponent(opnd2);
1676	Sgl_leftshiftby1(opnd2);
1677	Sgl_normalize(opnd2,mpy_exponent);
1678	}
1679
1680	/ Multiply the first two source mantissas together /
1681
1682	/*
1683	* The intermediate result will be kept in tmpres,
1684	* which needs enough room for 106 bits of mantissa,
1685	* so lets call it a Double extended.
1686	*/
1687	Sglext_setzero(tmpresp1,tmpresp2);
1688
1689	/*
1690	* Four bits at a time are inspected in each loop, and a
1691	* simple shift and add multiply algorithm is used.
1692	*/
1693	for (count = SGL_P-`1`; count >= `0`; count -= `4`) {
1694	Sglext_rightshiftby4(tmpresp1,tmpresp2);
1695	if (Sbit28(opnd1)) {
1696	/ Twoword_add should be an ADD followed by 2 ADDC's /
1697	Twoword_add(tmpresp1, tmpresp2, opnd2<<`3`, `0`);
1698	}
1699	if (Sbit29(opnd1)) {
1700	Twoword_add(tmpresp1, tmpresp2, opnd2<<`2`, `0`);
1701	}
1702	if (Sbit30(opnd1)) {
1703	Twoword_add(tmpresp1, tmpresp2, opnd2<<`1`, `0`);
1704	}
1705	if (Sbit31(opnd1)) {
1706	Twoword_add(tmpresp1, tmpresp2, opnd2, `0`);
1707	}
1708	Sgl_rightshiftby4(opnd1);
1709	}
1710	if (Is_sexthiddenoverflow(tmpresp1)) {
1711	/ result mantissa >= 2 (mantissa overflow) /
1712	mpy_exponent++;
1713	Sglext_rightshiftby4(tmpresp1,tmpresp2);
1714	} else {
1715	Sglext_rightshiftby3(tmpresp1,tmpresp2);
1716	}
1717
1718	/*
1719	* Restore the sign of the mpy result which was saved in resultp1.
1720	* The exponent will continue to be kept in mpy_exponent.
1721	*/
1722	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1723
1724	/*
1725	* No rounding is required, since the result of the multiply
1726	* is exact in the extended format.
1727	*/
1728
1729	/*
1730	* Now we are ready to perform the add portion of the operation.
1731	*
1732	* The exponents need to be kept as integers for now, since the
1733	* multiply result might not fit into the exponent field. We
1734	* can't overflow or underflow because of this yet, since the
1735	* add could bring the final result back into range.
1736	*/
1737	add_exponent = Sgl_exponent(opnd3);
1738
1739	/*
1740	* Check for denormalized or zero add operand.
1741	*/
1742	if (add_exponent == `0`) {
1743	/ check for zero /
1744	if (Sgl_iszero_mantissa(opnd3)) {
1745	/ right is zero /
1746	/ Left can't be zero and must be result.*
1747	*
1748	* The final result is now in tmpres and mpy_exponent,
1749	* and needs to be rounded and squeezed back into
1750	* double precision format from double extended.
1751	*/
1752	result_exponent = mpy_exponent;
1753	Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1754	sign_save = Sgl_signextendedsign(resultp1);/save sign/
1755	goto round;
1756	}
1757
1758	/*
1759	* Neither are zeroes.
1760	* Adjust exponent and normalize add operand.
1761	*/
1762	sign_save = Sgl_signextendedsign(opnd3); / save sign /
1763	Sgl_clear_signexponent(opnd3);
1764	Sgl_leftshiftby1(opnd3);
1765	Sgl_normalize(opnd3,add_exponent);
1766	Sgl_set_sign(opnd3,sign_save); / restore sign /
1767	} else {
1768	Sgl_clear_exponent_set_hidden(opnd3);
1769	}
1770	/*
1771	* Copy opnd3 to the double extended variable called right.
1772	*/
1773	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1774
1775	/*
1776	* A zero "save" helps discover equal operands (for later),
1777	* and is used in swapping operands (if needed).
1778	*/
1779	Sglext_xortointp1(tmpresp1,rightp1,/to/save);
1780
1781	/*
1782	* Compare magnitude of operands.
1783	*/
1784	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1785	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1786	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
1787	Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1788	/*
1789	* Set the left operand to the larger one by XOR swap.
1790	* First finish the first word "save".
1791	*/
1792	Sglext_xorfromintp1(save,rightp1,/to/rightp1);
1793	Sglext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
1794	Sglext_swap_lower(tmpresp2,rightp2);
1795	/ also setup exponents used in rest of routine /
1796	diff_exponent = add_exponent - mpy_exponent;
1797	result_exponent = add_exponent;
1798	} else {
1799	/ also setup exponents used in rest of routine /
1800	diff_exponent = mpy_exponent - add_exponent;
1801	result_exponent = mpy_exponent;
1802	}
1803	/ Invariant: left is not smaller than right. /
1804
1805	/*
1806	* Special case alignment of operands that would force alignment
1807	* beyond the extent of the extension. A further optimization
1808	* could special case this but only reduces the path length for
1809	* this infrequent case.
1810	*/
1811	if (diff_exponent > SGLEXT_THRESHOLD) {
1812	diff_exponent = SGLEXT_THRESHOLD;
1813	}
1814
1815	/ Align right operand by shifting it to the right /
1816	Sglext_clear_sign(rightp1);
1817	Sglext_right_align(rightp1,rightp2,/shifted by/diff_exponent);
1818
1819	/ Treat sum and difference of the operands separately. /
1820	if ((int)save < `0`) {
1821	/*
1822	* Difference of the two operands. Overflow can occur if the
1823	* multiply overflowed. A borrow can occur out of the hidden
1824	* bit and force a post normalization phase.
1825	*/
1826	Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1827	resultp1,resultp2);
1828	sign_save = Sgl_signextendedsign(resultp1);
1829	if (Sgl_iszero_hidden(resultp1)) {
1830	/ Handle normalization /
1831	/ A straightforward algorithm would now shift the*
1832	* result and extension left until the hidden bit
1833	* becomes one. Not all of the extension bits need
1834	* participate in the shift. Only the two most
1835	* significant bits (round and guard) are needed.
1836	* If only a single shift is needed then the guard
1837	* bit becomes a significant low order bit and the
1838	* extension must participate in the rounding.
1839	* If more than a single shift is needed, then all
1840	* bits to the right of the guard bit are zeros,
1841	* and the guard bit may or may not be zero. */
1842	Sglext_leftshiftby1(resultp1,resultp2);
1843
1844	/ Need to check for a zero result. The sign and*
1845	* exponent fields have already been zeroed. The more
1846	* efficient test of the full object can be used.
1847	*/
1848	if (Sglext_iszero(resultp1,resultp2)) {
1849	/ Must have been "x-x" or "x+(-x)". /
1850	if (Is_rounding_mode(ROUNDMINUS))
1851	Sgl_setone_sign(resultp1);
1852	Sgl_copytoptr(resultp1,dstptr);
1853	return(NOEXCEPTION);
1854	}
1855	result_exponent--;
1856
1857	/ Look to see if normalization is finished. /
1858	if (Sgl_isone_hidden(resultp1)) {
1859	/ No further normalization is needed /
1860	goto round;
1861	}
1862
1863	/ Discover first one bit to determine shift amount.*
1864	* Use a modified binary search. We have already
1865	* shifted the result one position right and still
1866	* not found a one so the remainder of the extension
1867	* must be zero and simplifies rounding. */
1868	/ Scan bytes /
1869	while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1870	Sglext_leftshiftby8(resultp1,resultp2);
1871	result_exponent -= `8`;
1872	}
1873	/ Now narrow it down to the nibble /
1874	if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1875	/ The lower nibble contains the*
1876	* normalizing one */
1877	Sglext_leftshiftby4(resultp1,resultp2);
1878	result_exponent -= `4`;
1879	}
1880	/ Select case where first bit is set (already*
1881	* normalized) otherwise select the proper shift. */
1882	jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1883	if (jumpsize <= `7`) switch(jumpsize) {
1884	case `1`:
1885	Sglext_leftshiftby3(resultp1,resultp2);
1886	result_exponent -= `3`;
1887	break;
1888	case `2`:
1889	case `3`:
1890	Sglext_leftshiftby2(resultp1,resultp2);
1891	result_exponent -= `2`;
1892	break;
1893	case `4`:
1894	case `5`:
1895	case `6`:
1896	case `7`:
1897	Sglext_leftshiftby1(resultp1,resultp2);
1898	result_exponent -= `1`;
1899	break;
1900	}
1901	} / end if (hidden...)... /
1902	/ Fall through and round /
1903	} / end if (save < 0)... /
1904	else {
1905	/ Add magnitudes /
1906	Sglext_addition(tmpresp1,tmpresp2,
1907	rightp1,rightp2, /to/resultp1,resultp2);
1908	sign_save = Sgl_signextendedsign(resultp1);
1909	if (Sgl_isone_hiddenoverflow(resultp1)) {
1910	/ Prenormalization required. /
1911	Sglext_arithrightshiftby1(resultp1,resultp2);
1912	result_exponent++;
1913	} / end if hiddenoverflow... /
1914	} / end else ...add magnitudes... /
1915
1916	/ Round the result. If the extension and lower two words are*
1917	* all zeros, then the result is exact. Otherwise round in the
1918	* correct direction. Underflow is possible. If a postnormalization
1919	* is necessary, then the mantissa is all zeros so no shift is needed.
1920	*/
1921	round:
1922	if (result_exponent <= `0` && !Is_underflowtrap_enabled()) {
1923	Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1924	}
1925	Sgl_set_sign(resultp1,/using/sign_save);
1926	if (Sglext_isnotzero_mantissap2(resultp2)) {
1927	inexact = TRUE;
1928	switch(Rounding_mode()) {
1929	case ROUNDNEAREST: / The default. /
1930	if (Sglext_isone_highp2(resultp2)) {
1931	/ at least 1/2 ulp /
1932	if (Sglext_isnotzero_low31p2(resultp2) \|\|
1933	Sglext_isone_lowp1(resultp1)) {
1934	/ either exactly half way and odd or*
1935	* more than 1/2ulp */
1936	Sgl_increment(resultp1);
1937	}
1938	}
1939	break;
1940
1941	case ROUNDPLUS:
1942	if (Sgl_iszero_sign(resultp1)) {
1943	/ Round up positive results /
1944	Sgl_increment(resultp1);
1945	}
1946	break;
1947
1948	case ROUNDMINUS:
1949	if (Sgl_isone_sign(resultp1)) {
1950	/ Round down negative results /
1951	Sgl_increment(resultp1);
1952	}
1953
1954	case ROUNDZERO:;
1955	/ truncate is simple /
1956	} / end switch... /
1957	if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1958	}
1959	if (result_exponent >= SGL_INFINITY_EXPONENT) {
1960	/ Overflow /
1961	if (Is_overflowtrap_enabled()) {
1962	/*
1963	* Adjust bias of result
1964	*/
1965	Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1966	Sgl_copytoptr(resultp1,dstptr);
1967	if (inexact)
1968	if (Is_inexacttrap_enabled())
1969	return (OPC_2E_OVERFLOWEXCEPTION \|
1970	OPC_2E_INEXACTEXCEPTION);
1971	else Set_inexactflag();
1972	return (OPC_2E_OVERFLOWEXCEPTION);
1973	}
1974	inexact = TRUE;
1975	Set_overflowflag();
1976	Sgl_setoverflow(resultp1);
1977	} else if (result_exponent <= `0`) { / underflow case /
1978	if (Is_underflowtrap_enabled()) {
1979	/*
1980	* Adjust bias of result
1981	*/
1982	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1983	Sgl_copytoptr(resultp1,dstptr);
1984	if (inexact)
1985	if (Is_inexacttrap_enabled())
1986	return (OPC_2E_UNDERFLOWEXCEPTION \|
1987	OPC_2E_INEXACTEXCEPTION);
1988	else Set_inexactflag();
1989	return(OPC_2E_UNDERFLOWEXCEPTION);
1990	}
1991	else if (inexact && is_tiny) Set_underflowflag();
1992	}
1993	else Sgl_set_exponent(resultp1,result_exponent);
1994	Sgl_copytoptr(resultp1,dstptr);
1995	if (inexact)
1996	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1997	else Set_inexactflag();
1998	return(NOEXCEPTION);
1999	}
2000
2001	/*
2002	* Single Floating-point Multiply Negate Fused Add
2003	*/
2004
2005	sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2006
2007	sgl_floating_point src1ptr, src2ptr, src3ptr, dstptr;
2008	unsigned int *status;
2009	{
2010	unsigned int opnd1, opnd2, opnd3;
2011	register unsigned int tmpresp1, tmpresp2;
2012	unsigned int rightp1, rightp2;
2013	unsigned int resultp1, resultp2 = `0`;
2014	register int mpy_exponent, add_exponent, count;
2015	boolean inexact = FALSE, is_tiny = FALSE;
2016
2017	unsigned int signlessleft1, signlessright1, save;
2018	register int result_exponent, diff_exponent;
2019	int sign_save, jumpsize;
2020
2021	Sgl_copyfromptr(src1ptr,opnd1);
2022	Sgl_copyfromptr(src2ptr,opnd2);
2023	Sgl_copyfromptr(src3ptr,opnd3);
2024
2025	/*
2026	* set sign bit of result of multiply
2027	*/
2028	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2029	Sgl_setzero(resultp1);
2030	else
2031	Sgl_setnegativezero(resultp1);
2032
2033	/*
2034	* Generate multiply exponent
2035	*/
2036	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2037
2038	/*
2039	* check first operand for NaN's or infinity
2040	*/
2041	if (Sgl_isinfinity_exponent(opnd1)) {
2042	if (Sgl_iszero_mantissa(opnd1)) {
2043	if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2044	if (Sgl_iszero_exponentmantissa(opnd2)) {
2045	/*
2046	* invalid since operands are infinity
2047	* and zero
2048	*/
2049	if (Is_invalidtrap_enabled())
2050	return(OPC_2E_INVALIDEXCEPTION);
2051	Set_invalidflag();
2052	Sgl_makequietnan(resultp1);
2053	Sgl_copytoptr(resultp1,dstptr);
2054	return(NOEXCEPTION);
2055	}
2056	/*
2057	* Check third operand for infinity with a
2058	* sign opposite of the multiply result
2059	*/
2060	if (Sgl_isinfinity(opnd3) &&
2061	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2062	/*
2063	* invalid since attempting a magnitude
2064	* subtraction of infinities
2065	*/
2066	if (Is_invalidtrap_enabled())
2067	return(OPC_2E_INVALIDEXCEPTION);
2068	Set_invalidflag();
2069	Sgl_makequietnan(resultp1);
2070	Sgl_copytoptr(resultp1,dstptr);
2071	return(NOEXCEPTION);
2072	}
2073
2074	/*
2075	* return infinity
2076	*/
2077	Sgl_setinfinity_exponentmantissa(resultp1);
2078	Sgl_copytoptr(resultp1,dstptr);
2079	return(NOEXCEPTION);
2080	}
2081	}
2082	else {
2083	/*
2084	* is NaN; signaling or quiet?
2085	*/
2086	if (Sgl_isone_signaling(opnd1)) {
2087	/ trap if INVALIDTRAP enabled /
2088	if (Is_invalidtrap_enabled())
2089	return(OPC_2E_INVALIDEXCEPTION);
2090	/ make NaN quiet /
2091	Set_invalidflag();
2092	Sgl_set_quiet(opnd1);
2093	}
2094	/*
2095	* is second operand a signaling NaN?
2096	*/
2097	else if (Sgl_is_signalingnan(opnd2)) {
2098	/ trap if INVALIDTRAP enabled /
2099	if (Is_invalidtrap_enabled())
2100	return(OPC_2E_INVALIDEXCEPTION);
2101	/ make NaN quiet /
2102	Set_invalidflag();
2103	Sgl_set_quiet(opnd2);
2104	Sgl_copytoptr(opnd2,dstptr);
2105	return(NOEXCEPTION);
2106	}
2107	/*
2108	* is third operand a signaling NaN?
2109	*/
2110	else if (Sgl_is_signalingnan(opnd3)) {
2111	/ trap if INVALIDTRAP enabled /
2112	if (Is_invalidtrap_enabled())
2113	return(OPC_2E_INVALIDEXCEPTION);
2114	/ make NaN quiet /
2115	Set_invalidflag();
2116	Sgl_set_quiet(opnd3);
2117	Sgl_copytoptr(opnd3,dstptr);
2118	return(NOEXCEPTION);
2119	}
2120	/*
2121	* return quiet NaN
2122	*/
2123	Sgl_copytoptr(opnd1,dstptr);
2124	return(NOEXCEPTION);
2125	}
2126	}
2127
2128	/*
2129	* check second operand for NaN's or infinity
2130	*/
2131	if (Sgl_isinfinity_exponent(opnd2)) {
2132	if (Sgl_iszero_mantissa(opnd2)) {
2133	if (Sgl_isnotnan(opnd3)) {
2134	if (Sgl_iszero_exponentmantissa(opnd1)) {
2135	/*
2136	* invalid since multiply operands are
2137	* zero & infinity
2138	*/
2139	if (Is_invalidtrap_enabled())
2140	return(OPC_2E_INVALIDEXCEPTION);
2141	Set_invalidflag();
2142	Sgl_makequietnan(opnd2);
2143	Sgl_copytoptr(opnd2,dstptr);
2144	return(NOEXCEPTION);
2145	}
2146
2147	/*
2148	* Check third operand for infinity with a
2149	* sign opposite of the multiply result
2150	*/
2151	if (Sgl_isinfinity(opnd3) &&
2152	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2153	/*
2154	* invalid since attempting a magnitude
2155	* subtraction of infinities
2156	*/
2157	if (Is_invalidtrap_enabled())
2158	return(OPC_2E_INVALIDEXCEPTION);
2159	Set_invalidflag();
2160	Sgl_makequietnan(resultp1);
2161	Sgl_copytoptr(resultp1,dstptr);
2162	return(NOEXCEPTION);
2163	}
2164
2165	/*
2166	* return infinity
2167	*/
2168	Sgl_setinfinity_exponentmantissa(resultp1);
2169	Sgl_copytoptr(resultp1,dstptr);
2170	return(NOEXCEPTION);
2171	}
2172	}
2173	else {
2174	/*
2175	* is NaN; signaling or quiet?
2176	*/
2177	if (Sgl_isone_signaling(opnd2)) {
2178	/ trap if INVALIDTRAP enabled /
2179	if (Is_invalidtrap_enabled())
2180	return(OPC_2E_INVALIDEXCEPTION);
2181	/ make NaN quiet /
2182	Set_invalidflag();
2183	Sgl_set_quiet(opnd2);
2184	}
2185	/*
2186	* is third operand a signaling NaN?
2187	*/
2188	else if (Sgl_is_signalingnan(opnd3)) {
2189	/ trap if INVALIDTRAP enabled /
2190	if (Is_invalidtrap_enabled())
2191	return(OPC_2E_INVALIDEXCEPTION);
2192	/ make NaN quiet /
2193	Set_invalidflag();
2194	Sgl_set_quiet(opnd3);
2195	Sgl_copytoptr(opnd3,dstptr);
2196	return(NOEXCEPTION);
2197	}
2198	/*
2199	* return quiet NaN
2200	*/
2201	Sgl_copytoptr(opnd2,dstptr);
2202	return(NOEXCEPTION);
2203	}
2204	}
2205
2206	/*
2207	* check third operand for NaN's or infinity
2208	*/
2209	if (Sgl_isinfinity_exponent(opnd3)) {
2210	if (Sgl_iszero_mantissa(opnd3)) {
2211	/ return infinity /
2212	Sgl_copytoptr(opnd3,dstptr);
2213	return(NOEXCEPTION);
2214	} else {
2215	/*
2216	* is NaN; signaling or quiet?
2217	*/
2218	if (Sgl_isone_signaling(opnd3)) {
2219	/ trap if INVALIDTRAP enabled /
2220	if (Is_invalidtrap_enabled())
2221	return(OPC_2E_INVALIDEXCEPTION);
2222	/ make NaN quiet /
2223	Set_invalidflag();
2224	Sgl_set_quiet(opnd3);
2225	}
2226	/*
2227	* return quiet NaN
2228	*/
2229	Sgl_copytoptr(opnd3,dstptr);
2230	return(NOEXCEPTION);
2231	}
2232	}
2233
2234	/*
2235	* Generate multiply mantissa
2236	*/
2237	if (Sgl_isnotzero_exponent(opnd1)) {
2238	/ set hidden bit /
2239	Sgl_clear_signexponent_set_hidden(opnd1);
2240	}
2241	else {
2242	/ check for zero /
2243	if (Sgl_iszero_mantissa(opnd1)) {
2244	/*
2245	* Perform the add opnd3 with zero here.
2246	*/
2247	if (Sgl_iszero_exponentmantissa(opnd3)) {
2248	if (Is_rounding_mode(ROUNDMINUS)) {
2249	Sgl_or_signs(opnd3,resultp1);
2250	} else {
2251	Sgl_and_signs(opnd3,resultp1);
2252	}
2253	}
2254	/*
2255	* Now let's check for trapped underflow case.
2256	*/
2257	else if (Sgl_iszero_exponent(opnd3) &&
2258	Is_underflowtrap_enabled()) {
2259	/ need to normalize results mantissa /
2260	sign_save = Sgl_signextendedsign(opnd3);
2261	result_exponent = `0`;
2262	Sgl_leftshiftby1(opnd3);
2263	Sgl_normalize(opnd3,result_exponent);
2264	Sgl_set_sign(opnd3,/using/sign_save);
2265	Sgl_setwrapped_exponent(opnd3,result_exponent,
2266	unfl);
2267	Sgl_copytoptr(opnd3,dstptr);
2268	/ inexact = FALSE /
2269	return(OPC_2E_UNDERFLOWEXCEPTION);
2270	}
2271	Sgl_copytoptr(opnd3,dstptr);
2272	return(NOEXCEPTION);
2273	}
2274	/ is denormalized, adjust exponent /
2275	Sgl_clear_signexponent(opnd1);
2276	Sgl_leftshiftby1(opnd1);
2277	Sgl_normalize(opnd1,mpy_exponent);
2278	}
2279	/ opnd2 needs to have hidden bit set with msb in hidden bit /
2280	if (Sgl_isnotzero_exponent(opnd2)) {
2281	Sgl_clear_signexponent_set_hidden(opnd2);
2282	}
2283	else {
2284	/ check for zero /
2285	if (Sgl_iszero_mantissa(opnd2)) {
2286	/*
2287	* Perform the add opnd3 with zero here.
2288	*/
2289	if (Sgl_iszero_exponentmantissa(opnd3)) {
2290	if (Is_rounding_mode(ROUNDMINUS)) {
2291	Sgl_or_signs(opnd3,resultp1);
2292	} else {
2293	Sgl_and_signs(opnd3,resultp1);
2294	}
2295	}
2296	/*
2297	* Now let's check for trapped underflow case.
2298	*/
2299	else if (Sgl_iszero_exponent(opnd3) &&
2300	Is_underflowtrap_enabled()) {
2301	/ need to normalize results mantissa /
2302	sign_save = Sgl_signextendedsign(opnd3);
2303	result_exponent = `0`;
2304	Sgl_leftshiftby1(opnd3);
2305	Sgl_normalize(opnd3,result_exponent);
2306	Sgl_set_sign(opnd3,/using/sign_save);
2307	Sgl_setwrapped_exponent(opnd3,result_exponent,
2308	unfl);
2309	Sgl_copytoptr(opnd3,dstptr);
2310	/ inexact = FALSE /
2311	return(OPC_2E_UNDERFLOWEXCEPTION);
2312	}
2313	Sgl_copytoptr(opnd3,dstptr);
2314	return(NOEXCEPTION);
2315	}
2316	/ is denormalized; want to normalize /
2317	Sgl_clear_signexponent(opnd2);
2318	Sgl_leftshiftby1(opnd2);
2319	Sgl_normalize(opnd2,mpy_exponent);
2320	}
2321
2322	/ Multiply the first two source mantissas together /
2323
2324	/*
2325	* The intermediate result will be kept in tmpres,
2326	* which needs enough room for 106 bits of mantissa,
2327	* so lets call it a Double extended.
2328	*/
2329	Sglext_setzero(tmpresp1,tmpresp2);
2330
2331	/*
2332	* Four bits at a time are inspected in each loop, and a
2333	* simple shift and add multiply algorithm is used.
2334	*/
2335	for (count = SGL_P-`1`; count >= `0`; count -= `4`) {
2336	Sglext_rightshiftby4(tmpresp1,tmpresp2);
2337	if (Sbit28(opnd1)) {
2338	/ Twoword_add should be an ADD followed by 2 ADDC's /
2339	Twoword_add(tmpresp1, tmpresp2, opnd2<<`3`, `0`);
2340	}
2341	if (Sbit29(opnd1)) {
2342	Twoword_add(tmpresp1, tmpresp2, opnd2<<`2`, `0`);
2343	}
2344	if (Sbit30(opnd1)) {
2345	Twoword_add(tmpresp1, tmpresp2, opnd2<<`1`, `0`);
2346	}
2347	if (Sbit31(opnd1)) {
2348	Twoword_add(tmpresp1, tmpresp2, opnd2, `0`);
2349	}
2350	Sgl_rightshiftby4(opnd1);
2351	}
2352	if (Is_sexthiddenoverflow(tmpresp1)) {
2353	/ result mantissa >= 2 (mantissa overflow) /
2354	mpy_exponent++;
2355	Sglext_rightshiftby4(tmpresp1,tmpresp2);
2356	} else {
2357	Sglext_rightshiftby3(tmpresp1,tmpresp2);
2358	}
2359
2360	/*
2361	* Restore the sign of the mpy result which was saved in resultp1.
2362	* The exponent will continue to be kept in mpy_exponent.
2363	*/
2364	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2365
2366	/*
2367	* No rounding is required, since the result of the multiply
2368	* is exact in the extended format.
2369	*/
2370
2371	/*
2372	* Now we are ready to perform the add portion of the operation.
2373	*
2374	* The exponents need to be kept as integers for now, since the
2375	* multiply result might not fit into the exponent field. We
2376	* can't overflow or underflow because of this yet, since the
2377	* add could bring the final result back into range.
2378	*/
2379	add_exponent = Sgl_exponent(opnd3);
2380
2381	/*
2382	* Check for denormalized or zero add operand.
2383	*/
2384	if (add_exponent == `0`) {
2385	/ check for zero /
2386	if (Sgl_iszero_mantissa(opnd3)) {
2387	/ right is zero /
2388	/ Left can't be zero and must be result.*
2389	*
2390	* The final result is now in tmpres and mpy_exponent,
2391	* and needs to be rounded and squeezed back into
2392	* double precision format from double extended.
2393	*/
2394	result_exponent = mpy_exponent;
2395	Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2396	sign_save = Sgl_signextendedsign(resultp1);/save sign/
2397	goto round;
2398	}
2399
2400	/*
2401	* Neither are zeroes.
2402	* Adjust exponent and normalize add operand.
2403	*/
2404	sign_save = Sgl_signextendedsign(opnd3); / save sign /
2405	Sgl_clear_signexponent(opnd3);
2406	Sgl_leftshiftby1(opnd3);
2407	Sgl_normalize(opnd3,add_exponent);
2408	Sgl_set_sign(opnd3,sign_save); / restore sign /
2409	} else {
2410	Sgl_clear_exponent_set_hidden(opnd3);
2411	}
2412	/*
2413	* Copy opnd3 to the double extended variable called right.
2414	*/
2415	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2416
2417	/*
2418	* A zero "save" helps discover equal operands (for later),
2419	* and is used in swapping operands (if needed).
2420	*/
2421	Sglext_xortointp1(tmpresp1,rightp1,/to/save);
2422
2423	/*
2424	* Compare magnitude of operands.
2425	*/
2426	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2427	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2428	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
2429	Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2430	/*
2431	* Set the left operand to the larger one by XOR swap.
2432	* First finish the first word "save".
2433	*/
2434	Sglext_xorfromintp1(save,rightp1,/to/rightp1);
2435	Sglext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
2436	Sglext_swap_lower(tmpresp2,rightp2);
2437	/ also setup exponents used in rest of routine /
2438	diff_exponent = add_exponent - mpy_exponent;
2439	result_exponent = add_exponent;
2440	} else {
2441	/ also setup exponents used in rest of routine /
2442	diff_exponent = mpy_exponent - add_exponent;
2443	result_exponent = mpy_exponent;
2444	}
2445	/ Invariant: left is not smaller than right. /
2446
2447	/*
2448	* Special case alignment of operands that would force alignment
2449	* beyond the extent of the extension. A further optimization
2450	* could special case this but only reduces the path length for
2451	* this infrequent case.
2452	*/
2453	if (diff_exponent > SGLEXT_THRESHOLD) {
2454	diff_exponent = SGLEXT_THRESHOLD;
2455	}
2456
2457	/ Align right operand by shifting it to the right /
2458	Sglext_clear_sign(rightp1);
2459	Sglext_right_align(rightp1,rightp2,/shifted by/diff_exponent);
2460
2461	/ Treat sum and difference of the operands separately. /
2462	if ((int)save < `0`) {
2463	/*
2464	* Difference of the two operands. Overflow can occur if the
2465	* multiply overflowed. A borrow can occur out of the hidden
2466	* bit and force a post normalization phase.
2467	*/
2468	Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2469	resultp1,resultp2);
2470	sign_save = Sgl_signextendedsign(resultp1);
2471	if (Sgl_iszero_hidden(resultp1)) {
2472	/ Handle normalization /
2473	/ A straightforward algorithm would now shift the*
2474	* result and extension left until the hidden bit
2475	* becomes one. Not all of the extension bits need
2476	* participate in the shift. Only the two most
2477	* significant bits (round and guard) are needed.
2478	* If only a single shift is needed then the guard
2479	* bit becomes a significant low order bit and the
2480	* extension must participate in the rounding.
2481	* If more than a single shift is needed, then all
2482	* bits to the right of the guard bit are zeros,
2483	* and the guard bit may or may not be zero. */
2484	Sglext_leftshiftby1(resultp1,resultp2);
2485
2486	/ Need to check for a zero result. The sign and*
2487	* exponent fields have already been zeroed. The more
2488	* efficient test of the full object can be used.
2489	*/
2490	if (Sglext_iszero(resultp1,resultp2)) {
2491	/ Must have been "x-x" or "x+(-x)". /
2492	if (Is_rounding_mode(ROUNDMINUS))
2493	Sgl_setone_sign(resultp1);
2494	Sgl_copytoptr(resultp1,dstptr);
2495	return(NOEXCEPTION);
2496	}
2497	result_exponent--;
2498
2499	/ Look to see if normalization is finished. /
2500	if (Sgl_isone_hidden(resultp1)) {
2501	/ No further normalization is needed /
2502	goto round;
2503	}
2504
2505	/ Discover first one bit to determine shift amount.*
2506	* Use a modified binary search. We have already
2507	* shifted the result one position right and still
2508	* not found a one so the remainder of the extension
2509	* must be zero and simplifies rounding. */
2510	/ Scan bytes /
2511	while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2512	Sglext_leftshiftby8(resultp1,resultp2);
2513	result_exponent -= `8`;
2514	}
2515	/ Now narrow it down to the nibble /
2516	if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2517	/ The lower nibble contains the*
2518	* normalizing one */
2519	Sglext_leftshiftby4(resultp1,resultp2);
2520	result_exponent -= `4`;
2521	}
2522	/ Select case where first bit is set (already*
2523	* normalized) otherwise select the proper shift. */
2524	jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2525	if (jumpsize <= `7`) switch(jumpsize) {
2526	case `1`:
2527	Sglext_leftshiftby3(resultp1,resultp2);
2528	result_exponent -= `3`;
2529	break;
2530	case `2`:
2531	case `3`:
2532	Sglext_leftshiftby2(resultp1,resultp2);
2533	result_exponent -= `2`;
2534	break;
2535	case `4`:
2536	case `5`:
2537	case `6`:
2538	case `7`:
2539	Sglext_leftshiftby1(resultp1,resultp2);
2540	result_exponent -= `1`;
2541	break;
2542	}
2543	} / end if (hidden...)... /
2544	/ Fall through and round /
2545	} / end if (save < 0)... /
2546	else {
2547	/ Add magnitudes /
2548	Sglext_addition(tmpresp1,tmpresp2,
2549	rightp1,rightp2, /to/resultp1,resultp2);
2550	sign_save = Sgl_signextendedsign(resultp1);
2551	if (Sgl_isone_hiddenoverflow(resultp1)) {
2552	/ Prenormalization required. /
2553	Sglext_arithrightshiftby1(resultp1,resultp2);
2554	result_exponent++;
2555	} / end if hiddenoverflow... /
2556	} / end else ...add magnitudes... /
2557
2558	/ Round the result. If the extension and lower two words are*
2559	* all zeros, then the result is exact. Otherwise round in the
2560	* correct direction. Underflow is possible. If a postnormalization
2561	* is necessary, then the mantissa is all zeros so no shift is needed.
2562	*/
2563	round:
2564	if (result_exponent <= `0` && !Is_underflowtrap_enabled()) {
2565	Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2566	}
2567	Sgl_set_sign(resultp1,/using/sign_save);
2568	if (Sglext_isnotzero_mantissap2(resultp2)) {
2569	inexact = TRUE;
2570	switch(Rounding_mode()) {
2571	case ROUNDNEAREST: / The default. /
2572	if (Sglext_isone_highp2(resultp2)) {
2573	/ at least 1/2 ulp /
2574	if (Sglext_isnotzero_low31p2(resultp2) \|\|
2575	Sglext_isone_lowp1(resultp1)) {
2576	/ either exactly half way and odd or*
2577	* more than 1/2ulp */
2578	Sgl_increment(resultp1);
2579	}
2580	}
2581	break;
2582
2583	case ROUNDPLUS:
2584	if (Sgl_iszero_sign(resultp1)) {
2585	/ Round up positive results /
2586	Sgl_increment(resultp1);
2587	}
2588	break;
2589
2590	case ROUNDMINUS:
2591	if (Sgl_isone_sign(resultp1)) {
2592	/ Round down negative results /
2593	Sgl_increment(resultp1);
2594	}
2595
2596	case ROUNDZERO:;
2597	/ truncate is simple /
2598	} / end switch... /
2599	if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2600	}
2601	if (result_exponent >= SGL_INFINITY_EXPONENT) {
2602	/ Overflow /
2603	if (Is_overflowtrap_enabled()) {
2604	/*
2605	* Adjust bias of result
2606	*/
2607	Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2608	Sgl_copytoptr(resultp1,dstptr);
2609	if (inexact)
2610	if (Is_inexacttrap_enabled())
2611	return (OPC_2E_OVERFLOWEXCEPTION \|
2612	OPC_2E_INEXACTEXCEPTION);
2613	else Set_inexactflag();
2614	return (OPC_2E_OVERFLOWEXCEPTION);
2615	}
2616	inexact = TRUE;
2617	Set_overflowflag();
2618	Sgl_setoverflow(resultp1);
2619	} else if (result_exponent <= `0`) { / underflow case /
2620	if (Is_underflowtrap_enabled()) {
2621	/*
2622	* Adjust bias of result
2623	*/
2624	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2625	Sgl_copytoptr(resultp1,dstptr);
2626	if (inexact)
2627	if (Is_inexacttrap_enabled())
2628	return (OPC_2E_UNDERFLOWEXCEPTION \|
2629	OPC_2E_INEXACTEXCEPTION);
2630	else Set_inexactflag();
2631	return(OPC_2E_UNDERFLOWEXCEPTION);
2632	}
2633	else if (inexact && is_tiny) Set_underflowflag();
2634	}
2635	else Sgl_set_exponent(resultp1,result_exponent);
2636	Sgl_copytoptr(resultp1,dstptr);
2637	if (inexact)
2638	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2639	else Set_inexactflag();
2640	return(NOEXCEPTION);
2641	}
2642
2643

source code of linux/arch/parisc/math-emu/fmpyfadd.c