softfloat.c source code [linux/arch/arm/nwfpe/softfloat.c]

1	/*
2	===============================================================================
3
4	This C source file is part of the SoftFloat IEC/IEEE Floating-point
5	Arithmetic Package, Release 2.
6
7	Written by John R. Hauser. This work was made possible in part by the
8	International Computer Science Institute, located at Suite 600, 1947 Center
9	Street, Berkeley, California 94704. Funding was partially provided by the
10	National Science Foundation under grant MIP-9311980. The original version
11	of this code was written as part of a project to build a fixed-point vector
12	processor in collaboration with the University of California at Berkeley,
13	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
14	is available through the web page
15	http://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
16
17	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
18	has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
19	TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
20	PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
21	AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
22
23	Derivative works are acceptable, even for commercial purposes, so long as
24	(1) they include prominent notice that the work is derivative, and (2) they
25	include prominent notice akin to these three paragraphs for those parts of
26	this code that are retained.
27
28	===============================================================================
29	*/
30
31	#include <asm/div64.h>
32
33	#include "fpa11.h"
34	//#include "milieu.h"
35	//#include "softfloat.h"
36
37	/*
38	-------------------------------------------------------------------------------
39	Primitive arithmetic functions, including multi-word arithmetic, and
40	division and square root approximations. (Can be specialized to target if
41	desired.)
42	-------------------------------------------------------------------------------
43	*/
44	#include "softfloat-macros"
45
46	/*
47	-------------------------------------------------------------------------------
48	Functions and definitions to determine: (1) whether tininess for underflow
49	is detected before or after rounding by default, (2) what (if anything)
50	happens when exceptions are raised, (3) how signaling NaNs are distinguished
51	from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52	are propagated from function inputs to output. These details are target-
53	specific.
54	-------------------------------------------------------------------------------
55	*/
56	#include "softfloat-specialize"
57
58	/*
59	-------------------------------------------------------------------------------
60	Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
61	and 7, and returns the properly rounded 32-bit integer corresponding to the
62	input. If `zSign' is nonzero, the input is negated before being converted
63	to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point
64	input is simply rounded to an integer, with the inexact exception raised if
65	the input cannot be represented exactly as an integer. If the fixed-point
66	input is too large, however, the invalid exception is raised and the largest
67	positive or negative integer is returned.
68	-------------------------------------------------------------------------------
69	*/
70	static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ )
71	{
72	int8 roundingMode;
73	flag roundNearestEven;
74	int8 roundIncrement, roundBits;
75	int32 z;
76
77	roundingMode = roundData->mode;
78	roundNearestEven = ( roundingMode == float_round_nearest_even );
79	roundIncrement = `0x40`;
80	if ( ! roundNearestEven ) {
81	if ( roundingMode == float_round_to_zero ) {
82	roundIncrement = `0`;
83	}
84	else {
85	roundIncrement = `0x7F`;
86	if ( zSign ) {
87	if ( roundingMode == float_round_up ) roundIncrement = `0`;
88	}
89	else {
90	if ( roundingMode == float_round_down ) roundIncrement = `0`;
91	}
92	}
93	}
94	roundBits = absZ & `0x7F`;
95	absZ = ( absZ + roundIncrement )>>`7`;
96	absZ &= ~ ( ( ( roundBits ^ `0x40` ) == `0` ) & roundNearestEven );
97	z = absZ;
98	if ( zSign ) z = - z;
99	if ( ( absZ>>`32` ) \|\| ( z && ( ( z < `0` ) ^ zSign ) ) ) {
100	roundData->exception \|= float_flag_invalid;
101	return zSign ? `0x80000000` : `0x7FFFFFFF`;
102	}
103	if ( roundBits ) roundData->exception \|= float_flag_inexact;
104	return z;
105
106	}
107
108	/*
109	-------------------------------------------------------------------------------
110	Returns the fraction bits of the single-precision floating-point value `a'.
111	-------------------------------------------------------------------------------
112	*/
113	INLINE bits32 extractFloat32Frac( float32 a )
114	{
115
116	return a & `0x007FFFFF`;
117
118	}
119
120	/*
121	-------------------------------------------------------------------------------
122	Returns the exponent bits of the single-precision floating-point value `a'.
123	-------------------------------------------------------------------------------
124	*/
125	INLINE int16 extractFloat32Exp( float32 a )
126	{
127
128	return ( a>>`23` ) & `0xFF`;
129
130	}
131
132	/*
133	-------------------------------------------------------------------------------
134	Returns the sign bit of the single-precision floating-point value `a'.
135	-------------------------------------------------------------------------------
136	*/
137	#if 0 /* in softfloat.h */
138	INLINE flag extractFloat32Sign( float32 a )
139	{
140
141	return a>>`31`;
142
143	}
144	#endif
145
146	/*
147	-------------------------------------------------------------------------------
148	Normalizes the subnormal single-precision floating-point value represented
149	by the denormalized significand `aSig'. The normalized exponent and
150	significand are stored at the locations pointed to by `zExpPtr' and
151	`zSigPtr', respectively.
152	-------------------------------------------------------------------------------
153	*/
154	static void
155	normalizeFloat32Subnormal( bits32 aSig, int16 zExpPtr, bits32 zSigPtr )
156	{
157	int8 shiftCount;
158
159	shiftCount = countLeadingZeros32( a: aSig ) - `8`;
160	*zSigPtr = aSig<<shiftCount;
161	*zExpPtr = `1` - shiftCount;
162
163	}
164
165	/*
166	-------------------------------------------------------------------------------
167	Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
168	single-precision floating-point value, returning the result. After being
169	shifted into the proper positions, the three fields are simply added
170	together to form the result. This means that any integer portion of `zSig'
171	will be added into the exponent. Since a properly normalized significand
172	will have an integer portion equal to 1, the `zExp' input should be 1 less
173	than the desired result exponent whenever `zSig' is a complete, normalized
174	significand.
175	-------------------------------------------------------------------------------
176	*/
177	INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
178	{
179	#if 0
180	float32 f;
181	__asm__("@ packFloat32 \n\
182	mov %0, %1, asl #31 \n\
183	orr %0, %2, asl #23 \n\
184	orr %0, %3"
185	: / no outputs /
186	: "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
187	: "cc");
188	return f;
189	#else
190	return ( ( (bits32) zSign )<<`31` ) + ( ( (bits32) zExp )<<`23` ) + zSig;
191	#endif
192	}
193
194	/*
195	-------------------------------------------------------------------------------
196	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
197	and significand `zSig', and returns the proper single-precision floating-
198	point value corresponding to the abstract input. Ordinarily, the abstract
199	value is simply rounded and packed into the single-precision format, with
200	the inexact exception raised if the abstract input cannot be represented
201	exactly. If the abstract value is too large, however, the overflow and
202	inexact exceptions are raised and an infinity or maximal finite value is
203	returned. If the abstract value is too small, the input value is rounded to
204	a subnormal number, and the underflow and inexact exceptions are raised if
205	the abstract input cannot be represented exactly as a subnormal single-
206	precision floating-point number.
207	The input significand `zSig' has its binary point between bits 30
208	and 29, which is 7 bits to the left of the usual location. This shifted
209	significand must be normalized or smaller. If `zSig' is not normalized,
210	`zExp' must be 0; in that case, the result returned is a subnormal number,
211	and it must not require rounding. In the usual case that `zSig' is
212	normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
213	The handling of underflow and overflow follows the IEC/IEEE Standard for
214	Binary Floating-point Arithmetic.
215	-------------------------------------------------------------------------------
216	*/
217	static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
218	{
219	int8 roundingMode;
220	flag roundNearestEven;
221	int8 roundIncrement, roundBits;
222	flag isTiny;
223
224	roundingMode = roundData->mode;
225	roundNearestEven = ( roundingMode == float_round_nearest_even );
226	roundIncrement = `0x40`;
227	if ( ! roundNearestEven ) {
228	if ( roundingMode == float_round_to_zero ) {
229	roundIncrement = `0`;
230	}
231	else {
232	roundIncrement = `0x7F`;
233	if ( zSign ) {
234	if ( roundingMode == float_round_up ) roundIncrement = `0`;
235	}
236	else {
237	if ( roundingMode == float_round_down ) roundIncrement = `0`;
238	}
239	}
240	}
241	roundBits = zSig & `0x7F`;
242	if ( `0xFD` <= (bits16) zExp ) {
243	if ( ( `0xFD` < zExp )
244	\|\| ( ( zExp == `0xFD` )
245	&& ( (sbits32) ( zSig + roundIncrement ) < `0` ) )
246	) {
247	roundData->exception \|= float_flag_overflow \| float_flag_inexact;
248	return packFloat32( zSign, zExp: `0xFF`, zSig: `0` ) - ( roundIncrement == `0` );
249	}
250	if ( zExp < `0` ) {
251	isTiny =
252	( float_detect_tininess == float_tininess_before_rounding )
253	\|\| ( zExp < -`1` )
254	\|\| ( zSig + roundIncrement < `0x80000000` );
255	shift32RightJamming( a: zSig, count: - zExp, zPtr: &zSig );
256	zExp = `0`;
257	roundBits = zSig & `0x7F`;
258	if ( isTiny && roundBits ) roundData->exception \|= float_flag_underflow;
259	}
260	}
261	if ( roundBits ) roundData->exception \|= float_flag_inexact;
262	zSig = ( zSig + roundIncrement )>>`7`;
263	zSig &= ~ ( ( ( roundBits ^ `0x40` ) == `0` ) & roundNearestEven );
264	if ( zSig == `0` ) zExp = `0`;
265	return packFloat32( zSign, zExp, zSig );
266
267	}
268
269	/*
270	-------------------------------------------------------------------------------
271	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
272	and significand `zSig', and returns the proper single-precision floating-
273	point value corresponding to the abstract input. This routine is just like
274	`roundAndPackFloat32' except that `zSig' does not have to be normalized in
275	any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
276	point exponent.
277	-------------------------------------------------------------------------------
278	*/
279	static float32
280	normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
281	{
282	int8 shiftCount;
283
284	shiftCount = countLeadingZeros32( a: zSig ) - `1`;
285	return roundAndPackFloat32( roundData, zSign, zExp: zExp - shiftCount, zSig: zSig<<shiftCount );
286
287	}
288
289	/*
290	-------------------------------------------------------------------------------
291	Returns the fraction bits of the double-precision floating-point value `a'.
292	-------------------------------------------------------------------------------
293	*/
294	INLINE bits64 extractFloat64Frac( float64 a )
295	{
296
297	return a & LIT64( `0x000FFFFFFFFFFFFF` );
298
299	}
300
301	/*
302	-------------------------------------------------------------------------------
303	Returns the exponent bits of the double-precision floating-point value `a'.
304	-------------------------------------------------------------------------------
305	*/
306	INLINE int16 extractFloat64Exp( float64 a )
307	{
308
309	return ( a>>`52` ) & `0x7FF`;
310
311	}
312
313	/*
314	-------------------------------------------------------------------------------
315	Returns the sign bit of the double-precision floating-point value `a'.
316	-------------------------------------------------------------------------------
317	*/
318	#if 0 /* in softfloat.h */
319	INLINE flag extractFloat64Sign( float64 a )
320	{
321
322	return a>>`63`;
323
324	}
325	#endif
326
327	/*
328	-------------------------------------------------------------------------------
329	Normalizes the subnormal double-precision floating-point value represented
330	by the denormalized significand `aSig'. The normalized exponent and
331	significand are stored at the locations pointed to by `zExpPtr' and
332	`zSigPtr', respectively.
333	-------------------------------------------------------------------------------
334	*/
335	static void
336	normalizeFloat64Subnormal( bits64 aSig, int16 zExpPtr, bits64 zSigPtr )
337	{
338	int8 shiftCount;
339
340	shiftCount = countLeadingZeros64( a: aSig ) - `11`;
341	*zSigPtr = aSig<<shiftCount;
342	*zExpPtr = `1` - shiftCount;
343
344	}
345
346	/*
347	-------------------------------------------------------------------------------
348	Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
349	double-precision floating-point value, returning the result. After being
350	shifted into the proper positions, the three fields are simply added
351	together to form the result. This means that any integer portion of `zSig'
352	will be added into the exponent. Since a properly normalized significand
353	will have an integer portion equal to 1, the `zExp' input should be 1 less
354	than the desired result exponent whenever `zSig' is a complete, normalized
355	significand.
356	-------------------------------------------------------------------------------
357	*/
358	INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
359	{
360
361	return ( ( (bits64) zSign )<<`63` ) + ( ( (bits64) zExp )<<`52` ) + zSig;
362
363	}
364
365	/*
366	-------------------------------------------------------------------------------
367	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
368	and significand `zSig', and returns the proper double-precision floating-
369	point value corresponding to the abstract input. Ordinarily, the abstract
370	value is simply rounded and packed into the double-precision format, with
371	the inexact exception raised if the abstract input cannot be represented
372	exactly. If the abstract value is too large, however, the overflow and
373	inexact exceptions are raised and an infinity or maximal finite value is
374	returned. If the abstract value is too small, the input value is rounded to
375	a subnormal number, and the underflow and inexact exceptions are raised if
376	the abstract input cannot be represented exactly as a subnormal double-
377	precision floating-point number.
378	The input significand `zSig' has its binary point between bits 62
379	and 61, which is 10 bits to the left of the usual location. This shifted
380	significand must be normalized or smaller. If `zSig' is not normalized,
381	`zExp' must be 0; in that case, the result returned is a subnormal number,
382	and it must not require rounding. In the usual case that `zSig' is
383	normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
384	The handling of underflow and overflow follows the IEC/IEEE Standard for
385	Binary Floating-point Arithmetic.
386	-------------------------------------------------------------------------------
387	*/
388	static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
389	{
390	int8 roundingMode;
391	flag roundNearestEven;
392	int16 roundIncrement, roundBits;
393	flag isTiny;
394
395	roundingMode = roundData->mode;
396	roundNearestEven = ( roundingMode == float_round_nearest_even );
397	roundIncrement = `0x200`;
398	if ( ! roundNearestEven ) {
399	if ( roundingMode == float_round_to_zero ) {
400	roundIncrement = `0`;
401	}
402	else {
403	roundIncrement = `0x3FF`;
404	if ( zSign ) {
405	if ( roundingMode == float_round_up ) roundIncrement = `0`;
406	}
407	else {
408	if ( roundingMode == float_round_down ) roundIncrement = `0`;
409	}
410	}
411	}
412	roundBits = zSig & `0x3FF`;
413	if ( `0x7FD` <= (bits16) zExp ) {
414	if ( ( `0x7FD` < zExp )
415	\|\| ( ( zExp == `0x7FD` )
416	&& ( (sbits64) ( zSig + roundIncrement ) < `0` ) )
417	) {
418	//register int lr = __builtin_return_address(0);
419	//printk("roundAndPackFloat64 called from 0x%08x\n",lr);
420	roundData->exception \|= float_flag_overflow \| float_flag_inexact;
421	return packFloat64( zSign, zExp: `0x7FF`, zSig: `0` ) - ( roundIncrement == `0` );
422	}
423	if ( zExp < `0` ) {
424	isTiny =
425	( float_detect_tininess == float_tininess_before_rounding )
426	\|\| ( zExp < -`1` )
427	\|\| ( zSig + roundIncrement < LIT64( `0x8000000000000000` ) );
428	shift64RightJamming( a: zSig, count: - zExp, zPtr: &zSig );
429	zExp = `0`;
430	roundBits = zSig & `0x3FF`;
431	if ( isTiny && roundBits ) roundData->exception \|= float_flag_underflow;
432	}
433	}
434	if ( roundBits ) roundData->exception \|= float_flag_inexact;
435	zSig = ( zSig + roundIncrement )>>`10`;
436	zSig &= ~ ( ( ( roundBits ^ `0x200` ) == `0` ) & roundNearestEven );
437	if ( zSig == `0` ) zExp = `0`;
438	return packFloat64( zSign, zExp, zSig );
439
440	}
441
442	/*
443	-------------------------------------------------------------------------------
444	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
445	and significand `zSig', and returns the proper double-precision floating-
446	point value corresponding to the abstract input. This routine is just like
447	`roundAndPackFloat64' except that `zSig' does not have to be normalized in
448	any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
449	point exponent.
450	-------------------------------------------------------------------------------
451	*/
452	static float64
453	normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
454	{
455	int8 shiftCount;
456
457	shiftCount = countLeadingZeros64( a: zSig ) - `1`;
458	return roundAndPackFloat64( roundData, zSign, zExp: zExp - shiftCount, zSig: zSig<<shiftCount );
459
460	}
461
462	#ifdef FLOATX80
463
464	/*
465	-------------------------------------------------------------------------------
466	Returns the fraction bits of the extended double-precision floating-point
467	value `a'.
468	-------------------------------------------------------------------------------
469	*/
470	INLINE bits64 extractFloatx80Frac( floatx80 a )
471	{
472
473	return a.low;
474
475	}
476
477	/*
478	-------------------------------------------------------------------------------
479	Returns the exponent bits of the extended double-precision floating-point
480	value `a'.
481	-------------------------------------------------------------------------------
482	*/
483	INLINE int32 extractFloatx80Exp( floatx80 a )
484	{
485
486	return a.high & `0x7FFF`;
487
488	}
489
490	/*
491	-------------------------------------------------------------------------------
492	Returns the sign bit of the extended double-precision floating-point value
493	`a'.
494	-------------------------------------------------------------------------------
495	*/
496	INLINE flag extractFloatx80Sign( floatx80 a )
497	{
498
499	return a.high>>`15`;
500
501	}
502
503	/*
504	-------------------------------------------------------------------------------
505	Normalizes the subnormal extended double-precision floating-point value
506	represented by the denormalized significand `aSig'. The normalized exponent
507	and significand are stored at the locations pointed to by `zExpPtr' and
508	`zSigPtr', respectively.
509	-------------------------------------------------------------------------------
510	*/
511	static void
512	normalizeFloatx80Subnormal( bits64 aSig, int32 zExpPtr, bits64 zSigPtr )
513	{
514	int8 shiftCount;
515
516	shiftCount = countLeadingZeros64( aSig );
517	*zSigPtr = aSig<<shiftCount;
518	*zExpPtr = `1` - shiftCount;
519
520	}
521
522	/*
523	-------------------------------------------------------------------------------
524	Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
525	extended double-precision floating-point value, returning the result.
526	-------------------------------------------------------------------------------
527	*/
528	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
529	{
530	floatx80 z;
531
532	z.low = zSig;
533	z.high = ( ( (bits16) zSign )<<`15` ) + zExp;
534	z.__padding = `0`;
535	return z;
536
537	}
538
539	/*
540	-------------------------------------------------------------------------------
541	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
542	and extended significand formed by the concatenation of `zSig0' and `zSig1',
543	and returns the proper extended double-precision floating-point value
544	corresponding to the abstract input. Ordinarily, the abstract value is
545	rounded and packed into the extended double-precision format, with the
546	inexact exception raised if the abstract input cannot be represented
547	exactly. If the abstract value is too large, however, the overflow and
548	inexact exceptions are raised and an infinity or maximal finite value is
549	returned. If the abstract value is too small, the input value is rounded to
550	a subnormal number, and the underflow and inexact exceptions are raised if
551	the abstract input cannot be represented exactly as a subnormal extended
552	double-precision floating-point number.
553	If `roundingPrecision' is 32 or 64, the result is rounded to the same
554	number of bits as single or double precision, respectively. Otherwise, the
555	result is rounded to the full precision of the extended double-precision
556	format.
557	The input significand must be normalized or smaller. If the input
558	significand is not normalized, `zExp' must be 0; in that case, the result
559	returned is a subnormal number, and it must not require rounding. The
560	handling of underflow and overflow follows the IEC/IEEE Standard for Binary
561	Floating-point Arithmetic.
562	-------------------------------------------------------------------------------
563	*/
564	static floatx80
565	roundAndPackFloatx80(
566	struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
567	)
568	{
569	int8 roundingMode, roundingPrecision;
570	flag roundNearestEven, increment, isTiny;
571	int64 roundIncrement, roundMask, roundBits;
572
573	roundingMode = roundData->mode;
574	roundingPrecision = roundData->precision;
575	roundNearestEven = ( roundingMode == float_round_nearest_even );
576	if ( roundingPrecision == `80` ) goto precision80;
577	if ( roundingPrecision == `64` ) {
578	roundIncrement = LIT64( `0x0000000000000400` );
579	roundMask = LIT64( `0x00000000000007FF` );
580	}
581	else if ( roundingPrecision == `32` ) {
582	roundIncrement = LIT64( `0x0000008000000000` );
583	roundMask = LIT64( `0x000000FFFFFFFFFF` );
584	}
585	else {
586	goto precision80;
587	}
588	zSig0 \|= ( zSig1 != `0` );
589	if ( ! roundNearestEven ) {
590	if ( roundingMode == float_round_to_zero ) {
591	roundIncrement = `0`;
592	}
593	else {
594	roundIncrement = roundMask;
595	if ( zSign ) {
596	if ( roundingMode == float_round_up ) roundIncrement = `0`;
597	}
598	else {
599	if ( roundingMode == float_round_down ) roundIncrement = `0`;
600	}
601	}
602	}
603	roundBits = zSig0 & roundMask;
604	if ( `0x7FFD` <= (bits32) ( zExp - `1` ) ) {
605	if ( ( `0x7FFE` < zExp )
606	\|\| ( ( zExp == `0x7FFE` ) && ( zSig0 + roundIncrement < zSig0 ) )
607	) {
608	goto overflow;
609	}
610	if ( zExp <= `0` ) {
611	isTiny =
612	( float_detect_tininess == float_tininess_before_rounding )
613	\|\| ( zExp < `0` )
614	\|\| ( zSig0 <= zSig0 + roundIncrement );
615	shift64RightJamming( zSig0, `1` - zExp, &zSig0 );
616	zExp = `0`;
617	roundBits = zSig0 & roundMask;
618	if ( isTiny && roundBits ) roundData->exception \|= float_flag_underflow;
619	if ( roundBits ) roundData->exception \|= float_flag_inexact;
620	zSig0 += roundIncrement;
621	if ( (sbits64) zSig0 < `0` ) zExp = `1`;
622	roundIncrement = roundMask + `1`;
623	if ( roundNearestEven && ( roundBits<<`1` == roundIncrement ) ) {
624	roundMask \|= roundIncrement;
625	}
626	zSig0 &= ~ roundMask;
627	return packFloatx80( zSign, zExp, zSig0 );
628	}
629	}
630	if ( roundBits ) roundData->exception \|= float_flag_inexact;
631	zSig0 += roundIncrement;
632	if ( zSig0 < roundIncrement ) {
633	++zExp;
634	zSig0 = LIT64( `0x8000000000000000` );
635	}
636	roundIncrement = roundMask + `1`;
637	if ( roundNearestEven && ( roundBits<<`1` == roundIncrement ) ) {
638	roundMask \|= roundIncrement;
639	}
640	zSig0 &= ~ roundMask;
641	if ( zSig0 == `0` ) zExp = `0`;
642	return packFloatx80( zSign, zExp, zSig0 );
643	precision80:
644	increment = ( (sbits64) zSig1 < `0` );
645	if ( ! roundNearestEven ) {
646	if ( roundingMode == float_round_to_zero ) {
647	increment = `0`;
648	}
649	else {
650	if ( zSign ) {
651	increment = ( roundingMode == float_round_down ) && zSig1;
652	}
653	else {
654	increment = ( roundingMode == float_round_up ) && zSig1;
655	}
656	}
657	}
658	if ( `0x7FFD` <= (bits32) ( zExp - `1` ) ) {
659	if ( ( `0x7FFE` < zExp )
660	\|\| ( ( zExp == `0x7FFE` )
661	&& ( zSig0 == LIT64( `0xFFFFFFFFFFFFFFFF` ) )
662	&& increment
663	)
664	) {
665	roundMask = `0`;
666	overflow:
667	roundData->exception \|= float_flag_overflow \| float_flag_inexact;
668	if ( ( roundingMode == float_round_to_zero )
669	\|\| ( zSign && ( roundingMode == float_round_up ) )
670	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
671	) {
672	return packFloatx80( zSign, `0x7FFE`, ~ roundMask );
673	}
674	return packFloatx80( zSign, `0x7FFF`, LIT64( `0x8000000000000000` ) );
675	}
676	if ( zExp <= `0` ) {
677	isTiny =
678	( float_detect_tininess == float_tininess_before_rounding )
679	\|\| ( zExp < `0` )
680	\|\| ! increment
681	\|\| ( zSig0 < LIT64( `0xFFFFFFFFFFFFFFFF` ) );
682	shift64ExtraRightJamming( zSig0, zSig1, `1` - zExp, &zSig0, &zSig1 );
683	zExp = `0`;
684	if ( isTiny && zSig1 ) roundData->exception \|= float_flag_underflow;
685	if ( zSig1 ) roundData->exception \|= float_flag_inexact;
686	if ( roundNearestEven ) {
687	increment = ( (sbits64) zSig1 < `0` );
688	}
689	else {
690	if ( zSign ) {
691	increment = ( roundingMode == float_round_down ) && zSig1;
692	}
693	else {
694	increment = ( roundingMode == float_round_up ) && zSig1;
695	}
696	}
697	if ( increment ) {
698	++zSig0;
699	zSig0 &= ~ ( ( zSig1 + zSig1 == `0` ) & roundNearestEven );
700	if ( (sbits64) zSig0 < `0` ) zExp = `1`;
701	}
702	return packFloatx80( zSign, zExp, zSig0 );
703	}
704	}
705	if ( zSig1 ) roundData->exception \|= float_flag_inexact;
706	if ( increment ) {
707	++zSig0;
708	if ( zSig0 == `0` ) {
709	++zExp;
710	zSig0 = LIT64( `0x8000000000000000` );
711	}
712	else {
713	zSig0 &= ~ ( ( zSig1 + zSig1 == `0` ) & roundNearestEven );
714	}
715	}
716	else {
717	if ( zSig0 == `0` ) zExp = `0`;
718	}
719
720	return packFloatx80( zSign, zExp, zSig0 );
721	}
722
723	/*
724	-------------------------------------------------------------------------------
725	Takes an abstract floating-point value having sign `zSign', exponent
726	`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
727	and returns the proper extended double-precision floating-point value
728	corresponding to the abstract input. This routine is just like
729	`roundAndPackFloatx80' except that the input significand does not have to be
730	normalized.
731	-------------------------------------------------------------------------------
732	*/
733	static floatx80
734	normalizeRoundAndPackFloatx80(
735	struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
736	)
737	{
738	int8 shiftCount;
739
740	if ( zSig0 == `0` ) {
741	zSig0 = zSig1;
742	zSig1 = `0`;
743	zExp -= `64`;
744	}
745	shiftCount = countLeadingZeros64( zSig0 );
746	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
747	zExp -= shiftCount;
748	return
749	roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
750
751	}
752
753	#endif
754
755	/*
756	-------------------------------------------------------------------------------
757	Returns the result of converting the 32-bit two's complement integer `a' to
758	the single-precision floating-point format. The conversion is performed
759	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
760	-------------------------------------------------------------------------------
761	*/
762	float32 int32_to_float32(struct roundingData *roundData, int32 a)
763	{
764	flag zSign;
765
766	if ( a == `0` ) return `0`;
767	if ( a == `0x80000000` ) return packFloat32( zSign: `1`, zExp: `0x9E`, zSig: `0` );
768	zSign = ( a < `0` );
769	return normalizeRoundAndPackFloat32( roundData, zSign, zExp: `0x9C`, zSig: zSign ? - a : a );
770
771	}
772
773	/*
774	-------------------------------------------------------------------------------
775	Returns the result of converting the 32-bit two's complement integer `a' to
776	the double-precision floating-point format. The conversion is performed
777	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
778	-------------------------------------------------------------------------------
779	*/
780	float64 int32_to_float64( int32 a )
781	{
782	flag aSign;
783	uint32 absA;
784	int8 shiftCount;
785	bits64 zSig;
786
787	if ( a == `0` ) return `0`;
788	aSign = ( a < `0` );
789	absA = aSign ? - a : a;
790	shiftCount = countLeadingZeros32( a: absA ) + `21`;
791	zSig = absA;
792	return packFloat64( zSign: aSign, zExp: `0x432` - shiftCount, zSig: zSig<<shiftCount );
793
794	}
795
796	#ifdef FLOATX80
797
798	/*
799	-------------------------------------------------------------------------------
800	Returns the result of converting the 32-bit two's complement integer `a'
801	to the extended double-precision floating-point format. The conversion
802	is performed according to the IEC/IEEE Standard for Binary Floating-point
803	Arithmetic.
804	-------------------------------------------------------------------------------
805	*/
806	floatx80 int32_to_floatx80( int32 a )
807	{
808	flag zSign;
809	uint32 absA;
810	int8 shiftCount;
811	bits64 zSig;
812
813	if ( a == `0` ) return packFloatx80( `0`, `0`, `0` );
814	zSign = ( a < `0` );
815	absA = zSign ? - a : a;
816	shiftCount = countLeadingZeros32( absA ) + `32`;
817	zSig = absA;
818	return packFloatx80( zSign, `0x403E` - shiftCount, zSig<<shiftCount );
819
820	}
821
822	#endif
823
824	/*
825	-------------------------------------------------------------------------------
826	Returns the result of converting the single-precision floating-point value
827	`a' to the 32-bit two's complement integer format. The conversion is
828	performed according to the IEC/IEEE Standard for Binary Floating-point
829	Arithmetic---which means in particular that the conversion is rounded
830	according to the current rounding mode. If `a' is a NaN, the largest
831	positive integer is returned. Otherwise, if the conversion overflows, the
832	largest integer with the same sign as `a' is returned.
833	-------------------------------------------------------------------------------
834	*/
835	int32 float32_to_int32( struct roundingData *roundData, float32 a )
836	{
837	flag aSign;
838	int16 aExp, shiftCount;
839	bits32 aSig;
840	bits64 zSig;
841
842	aSig = extractFloat32Frac( a );
843	aExp = extractFloat32Exp( a );
844	aSign = extractFloat32Sign( a );
845	if ( ( aExp == `0x7FF` ) && aSig ) aSign = `0`;
846	if ( aExp ) aSig \|= `0x00800000`;
847	shiftCount = `0xAF` - aExp;
848	zSig = aSig;
849	zSig <<= `32`;
850	if ( `0` < shiftCount ) shift64RightJamming( a: zSig, count: shiftCount, zPtr: &zSig );
851	return roundAndPackInt32( roundData, zSign: aSign, absZ: zSig );
852
853	}
854
855	/*
856	-------------------------------------------------------------------------------
857	Returns the result of converting the single-precision floating-point value
858	`a' to the 32-bit two's complement integer format. The conversion is
859	performed according to the IEC/IEEE Standard for Binary Floating-point
860	Arithmetic, except that the conversion is always rounded toward zero. If
861	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
862	conversion overflows, the largest integer with the same sign as `a' is
863	returned.
864	-------------------------------------------------------------------------------
865	*/
866	int32 float32_to_int32_round_to_zero( float32 a )
867	{
868	flag aSign;
869	int16 aExp, shiftCount;
870	bits32 aSig;
871	int32 z;
872
873	aSig = extractFloat32Frac( a );
874	aExp = extractFloat32Exp( a );
875	aSign = extractFloat32Sign( a );
876	shiftCount = aExp - `0x9E`;
877	if ( `0` <= shiftCount ) {
878	if ( a == `0xCF000000` ) return `0x80000000`;
879	float_raise( float_flag_invalid );
880	if ( ! aSign \|\| ( ( aExp == `0xFF` ) && aSig ) ) return `0x7FFFFFFF`;
881	return `0x80000000`;
882	}
883	else if ( aExp <= `0x7E` ) {
884	if ( aExp \| aSig ) float_raise( float_flag_inexact );
885	return `0`;
886	}
887	aSig = ( aSig \| `0x00800000` )<<`8`;
888	z = aSig>>( - shiftCount );
889	if ( (bits32) ( aSig<<( shiftCount & `31` ) ) ) {
890	float_raise( float_flag_inexact );
891	}
892	return aSign ? - z : z;
893
894	}
895
896	/*
897	-------------------------------------------------------------------------------
898	Returns the result of converting the single-precision floating-point value
899	`a' to the double-precision floating-point format. The conversion is
900	performed according to the IEC/IEEE Standard for Binary Floating-point
901	Arithmetic.
902	-------------------------------------------------------------------------------
903	*/
904	float64 float32_to_float64( float32 a )
905	{
906	flag aSign;
907	int16 aExp;
908	bits32 aSig;
909
910	aSig = extractFloat32Frac( a );
911	aExp = extractFloat32Exp( a );
912	aSign = extractFloat32Sign( a );
913	if ( aExp == `0xFF` ) {
914	if ( aSig ) return commonNaNToFloat64( a: float32ToCommonNaN( a ) );
915	return packFloat64( zSign: aSign, zExp: `0x7FF`, zSig: `0` );
916	}
917	if ( aExp == `0` ) {
918	if ( aSig == `0` ) return packFloat64( zSign: aSign, zExp: `0`, zSig: `0` );
919	normalizeFloat32Subnormal( aSig, zExpPtr: &aExp, zSigPtr: &aSig );
920	--aExp;
921	}
922	return packFloat64( zSign: aSign, zExp: aExp + `0x380`, zSig: ( (bits64) aSig )<<`29` );
923
924	}
925
926	#ifdef FLOATX80
927
928	/*
929	-------------------------------------------------------------------------------
930	Returns the result of converting the single-precision floating-point value
931	`a' to the extended double-precision floating-point format. The conversion
932	is performed according to the IEC/IEEE Standard for Binary Floating-point
933	Arithmetic.
934	-------------------------------------------------------------------------------
935	*/
936	floatx80 float32_to_floatx80( float32 a )
937	{
938	flag aSign;
939	int16 aExp;
940	bits32 aSig;
941
942	aSig = extractFloat32Frac( a );
943	aExp = extractFloat32Exp( a );
944	aSign = extractFloat32Sign( a );
945	if ( aExp == `0xFF` ) {
946	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
947	return packFloatx80( aSign, `0x7FFF`, LIT64( `0x8000000000000000` ) );
948	}
949	if ( aExp == `0` ) {
950	if ( aSig == `0` ) return packFloatx80( aSign, `0`, `0` );
951	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
952	}
953	aSig \|= `0x00800000`;
954	return packFloatx80( aSign, aExp + `0x3F80`, ( (bits64) aSig )<<`40` );
955
956	}
957
958	#endif
959
960	/*
961	-------------------------------------------------------------------------------
962	Rounds the single-precision floating-point value `a' to an integer, and
963	returns the result as a single-precision floating-point value. The
964	operation is performed according to the IEC/IEEE Standard for Binary
965	Floating-point Arithmetic.
966	-------------------------------------------------------------------------------
967	*/
968	float32 float32_round_to_int( struct roundingData *roundData, float32 a )
969	{
970	flag aSign;
971	int16 aExp;
972	bits32 lastBitMask, roundBitsMask;
973	int8 roundingMode;
974	float32 z;
975
976	aExp = extractFloat32Exp( a );
977	if ( `0x96` <= aExp ) {
978	if ( ( aExp == `0xFF` ) && extractFloat32Frac( a ) ) {
979	return propagateFloat32NaN( a, b: a );
980	}
981	return a;
982	}
983	roundingMode = roundData->mode;
984	if ( aExp <= `0x7E` ) {
985	if ( (bits32) ( a<<`1` ) == `0` ) return a;
986	roundData->exception \|= float_flag_inexact;
987	aSign = extractFloat32Sign( a );
988	switch ( roundingMode ) {
989	case float_round_nearest_even:
990	if ( ( aExp == `0x7E` ) && extractFloat32Frac( a ) ) {
991	return packFloat32( zSign: aSign, zExp: `0x7F`, zSig: `0` );
992	}
993	break;
994	case float_round_down:
995	return aSign ? `0xBF800000` : `0`;
996	case float_round_up:
997	return aSign ? `0x80000000` : `0x3F800000`;
998	}
999	return packFloat32( zSign: aSign, zExp: `0`, zSig: `0` );
1000	}
1001	lastBitMask = `1`;
1002	lastBitMask <<= `0x96` - aExp;
1003	roundBitsMask = lastBitMask - `1`;
1004	z = a;
1005	if ( roundingMode == float_round_nearest_even ) {
1006	z += lastBitMask>>`1`;
1007	if ( ( z & roundBitsMask ) == `0` ) z &= ~ lastBitMask;
1008	}
1009	else if ( roundingMode != float_round_to_zero ) {
1010	if ( extractFloat32Sign( a: z ) ^ ( roundingMode == float_round_up ) ) {
1011	z += roundBitsMask;
1012	}
1013	}
1014	z &= ~ roundBitsMask;
1015	if ( z != a ) roundData->exception \|= float_flag_inexact;
1016	return z;
1017
1018	}
1019
1020	/*
1021	-------------------------------------------------------------------------------
1022	Returns the result of adding the absolute values of the single-precision
1023	floating-point values `a' and `b'. If `zSign' is true, the sum is negated
1024	before being returned. `zSign' is ignored if the result is a NaN. The
1025	addition is performed according to the IEC/IEEE Standard for Binary
1026	Floating-point Arithmetic.
1027	-------------------------------------------------------------------------------
1028	*/
1029	static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
1030	{
1031	int16 aExp, bExp, zExp;
1032	bits32 aSig, bSig, zSig;
1033	int16 expDiff;
1034
1035	aSig = extractFloat32Frac( a );
1036	aExp = extractFloat32Exp( a );
1037	bSig = extractFloat32Frac( a: b );
1038	bExp = extractFloat32Exp( a: b );
1039	expDiff = aExp - bExp;
1040	aSig <<= `6`;
1041	bSig <<= `6`;
1042	if ( `0` < expDiff ) {
1043	if ( aExp == `0xFF` ) {
1044	if ( aSig ) return propagateFloat32NaN( a, b );
1045	return a;
1046	}
1047	if ( bExp == `0` ) {
1048	--expDiff;
1049	}
1050	else {
1051	bSig \|= `0x20000000`;
1052	}
1053	shift32RightJamming( a: bSig, count: expDiff, zPtr: &bSig );
1054	zExp = aExp;
1055	}
1056	else if ( expDiff < `0` ) {
1057	if ( bExp == `0xFF` ) {
1058	if ( bSig ) return propagateFloat32NaN( a, b );
1059	return packFloat32( zSign, zExp: `0xFF`, zSig: `0` );
1060	}
1061	if ( aExp == `0` ) {
1062	++expDiff;
1063	}
1064	else {
1065	aSig \|= `0x20000000`;
1066	}
1067	shift32RightJamming( a: aSig, count: - expDiff, zPtr: &aSig );
1068	zExp = bExp;
1069	}
1070	else {
1071	if ( aExp == `0xFF` ) {
1072	if ( aSig \| bSig ) return propagateFloat32NaN( a, b );
1073	return a;
1074	}
1075	if ( aExp == `0` ) return packFloat32( zSign, zExp: `0`, zSig: ( aSig + bSig )>>`6` );
1076	zSig = `0x40000000` + aSig + bSig;
1077	zExp = aExp;
1078	goto roundAndPack;
1079	}
1080	aSig \|= `0x20000000`;
1081	zSig = ( aSig + bSig )<<`1`;
1082	--zExp;
1083	if ( (sbits32) zSig < `0` ) {
1084	zSig = aSig + bSig;
1085	++zExp;
1086	}
1087	roundAndPack:
1088	return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1089
1090	}
1091
1092	/*
1093	-------------------------------------------------------------------------------
1094	Returns the result of subtracting the absolute values of the single-
1095	precision floating-point values `a' and `b'. If `zSign' is true, the
1096	difference is negated before being returned. `zSign' is ignored if the
1097	result is a NaN. The subtraction is performed according to the IEC/IEEE
1098	Standard for Binary Floating-point Arithmetic.
1099	-------------------------------------------------------------------------------
1100	*/
1101	static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
1102	{
1103	int16 aExp, bExp, zExp;
1104	bits32 aSig, bSig, zSig;
1105	int16 expDiff;
1106
1107	aSig = extractFloat32Frac( a );
1108	aExp = extractFloat32Exp( a );
1109	bSig = extractFloat32Frac( a: b );
1110	bExp = extractFloat32Exp( a: b );
1111	expDiff = aExp - bExp;
1112	aSig <<= `7`;
1113	bSig <<= `7`;
1114	if ( `0` < expDiff ) goto aExpBigger;
1115	if ( expDiff < `0` ) goto bExpBigger;
1116	if ( aExp == `0xFF` ) {
1117	if ( aSig \| bSig ) return propagateFloat32NaN( a, b );
1118	roundData->exception \|= float_flag_invalid;
1119	return float32_default_nan;
1120	}
1121	if ( aExp == `0` ) {
1122	aExp = `1`;
1123	bExp = `1`;
1124	}
1125	if ( bSig < aSig ) goto aBigger;
1126	if ( aSig < bSig ) goto bBigger;
1127	return packFloat32( zSign: roundData->mode == float_round_down, zExp: `0`, zSig: `0` );
1128	bExpBigger:
1129	if ( bExp == `0xFF` ) {
1130	if ( bSig ) return propagateFloat32NaN( a, b );
1131	return packFloat32( zSign: zSign ^ `1`, zExp: `0xFF`, zSig: `0` );
1132	}
1133	if ( aExp == `0` ) {
1134	++expDiff;
1135	}
1136	else {
1137	aSig \|= `0x40000000`;
1138	}
1139	shift32RightJamming( a: aSig, count: - expDiff, zPtr: &aSig );
1140	bSig \|= `0x40000000`;
1141	bBigger:
1142	zSig = bSig - aSig;
1143	zExp = bExp;
1144	zSign ^= `1`;
1145	goto normalizeRoundAndPack;
1146	aExpBigger:
1147	if ( aExp == `0xFF` ) {
1148	if ( aSig ) return propagateFloat32NaN( a, b );
1149	return a;
1150	}
1151	if ( bExp == `0` ) {
1152	--expDiff;
1153	}
1154	else {
1155	bSig \|= `0x40000000`;
1156	}
1157	shift32RightJamming( a: bSig, count: expDiff, zPtr: &bSig );
1158	aSig \|= `0x40000000`;
1159	aBigger:
1160	zSig = aSig - bSig;
1161	zExp = aExp;
1162	normalizeRoundAndPack:
1163	--zExp;
1164	return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
1165
1166	}
1167
1168	/*
1169	-------------------------------------------------------------------------------
1170	Returns the result of adding the single-precision floating-point values `a'
1171	and `b'. The operation is performed according to the IEC/IEEE Standard for
1172	Binary Floating-point Arithmetic.
1173	-------------------------------------------------------------------------------
1174	*/
1175	float32 float32_add( struct roundingData *roundData, float32 a, float32 b )
1176	{
1177	flag aSign, bSign;
1178
1179	aSign = extractFloat32Sign( a );
1180	bSign = extractFloat32Sign( a: b );
1181	if ( aSign == bSign ) {
1182	return addFloat32Sigs( roundData, a, b, zSign: aSign );
1183	}
1184	else {
1185	return subFloat32Sigs( roundData, a, b, zSign: aSign );
1186	}
1187
1188	}
1189
1190	/*
1191	-------------------------------------------------------------------------------
1192	Returns the result of subtracting the single-precision floating-point values
1193	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1194	for Binary Floating-point Arithmetic.
1195	-------------------------------------------------------------------------------
1196	*/
1197	float32 float32_sub( struct roundingData *roundData, float32 a, float32 b )
1198	{
1199	flag aSign, bSign;
1200
1201	aSign = extractFloat32Sign( a );
1202	bSign = extractFloat32Sign( a: b );
1203	if ( aSign == bSign ) {
1204	return subFloat32Sigs( roundData, a, b, zSign: aSign );
1205	}
1206	else {
1207	return addFloat32Sigs( roundData, a, b, zSign: aSign );
1208	}
1209
1210	}
1211
1212	/*
1213	-------------------------------------------------------------------------------
1214	Returns the result of multiplying the single-precision floating-point values
1215	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1216	for Binary Floating-point Arithmetic.
1217	-------------------------------------------------------------------------------
1218	*/
1219	float32 float32_mul( struct roundingData *roundData, float32 a, float32 b )
1220	{
1221	flag aSign, bSign, zSign;
1222	int16 aExp, bExp, zExp;
1223	bits32 aSig, bSig;
1224	bits64 zSig64;
1225	bits32 zSig;
1226
1227	aSig = extractFloat32Frac( a );
1228	aExp = extractFloat32Exp( a );
1229	aSign = extractFloat32Sign( a );
1230	bSig = extractFloat32Frac( a: b );
1231	bExp = extractFloat32Exp( a: b );
1232	bSign = extractFloat32Sign( a: b );
1233	zSign = aSign ^ bSign;
1234	if ( aExp == `0xFF` ) {
1235	if ( aSig \|\| ( ( bExp == `0xFF` ) && bSig ) ) {
1236	return propagateFloat32NaN( a, b );
1237	}
1238	if ( ( bExp \| bSig ) == `0` ) {
1239	roundData->exception \|= float_flag_invalid;
1240	return float32_default_nan;
1241	}
1242	return packFloat32( zSign, zExp: `0xFF`, zSig: `0` );
1243	}
1244	if ( bExp == `0xFF` ) {
1245	if ( bSig ) return propagateFloat32NaN( a, b );
1246	if ( ( aExp \| aSig ) == `0` ) {
1247	roundData->exception \|= float_flag_invalid;
1248	return float32_default_nan;
1249	}
1250	return packFloat32( zSign, zExp: `0xFF`, zSig: `0` );
1251	}
1252	if ( aExp == `0` ) {
1253	if ( aSig == `0` ) return packFloat32( zSign, zExp: `0`, zSig: `0` );
1254	normalizeFloat32Subnormal( aSig, zExpPtr: &aExp, zSigPtr: &aSig );
1255	}
1256	if ( bExp == `0` ) {
1257	if ( bSig == `0` ) return packFloat32( zSign, zExp: `0`, zSig: `0` );
1258	normalizeFloat32Subnormal( aSig: bSig, zExpPtr: &bExp, zSigPtr: &bSig );
1259	}
1260	zExp = aExp + bExp - `0x7F`;
1261	aSig = ( aSig \| `0x00800000` )<<`7`;
1262	bSig = ( bSig \| `0x00800000` )<<`8`;
1263	shift64RightJamming( a: ( (bits64) aSig ) * bSig, count: `32`, zPtr: &zSig64 );
1264	zSig = zSig64;
1265	if ( `0` <= (sbits32) ( zSig<<`1` ) ) {
1266	zSig <<= `1`;
1267	--zExp;
1268	}
1269	return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1270
1271	}
1272
1273	/*
1274	-------------------------------------------------------------------------------
1275	Returns the result of dividing the single-precision floating-point value `a'
1276	by the corresponding value `b'. The operation is performed according to the
1277	IEC/IEEE Standard for Binary Floating-point Arithmetic.
1278	-------------------------------------------------------------------------------
1279	*/
1280	float32 float32_div( struct roundingData *roundData, float32 a, float32 b )
1281	{
1282	flag aSign, bSign, zSign;
1283	int16 aExp, bExp, zExp;
1284	bits32 aSig, bSig, zSig;
1285
1286	aSig = extractFloat32Frac( a );
1287	aExp = extractFloat32Exp( a );
1288	aSign = extractFloat32Sign( a );
1289	bSig = extractFloat32Frac( a: b );
1290	bExp = extractFloat32Exp( a: b );
1291	bSign = extractFloat32Sign( a: b );
1292	zSign = aSign ^ bSign;
1293	if ( aExp == `0xFF` ) {
1294	if ( aSig ) return propagateFloat32NaN( a, b );
1295	if ( bExp == `0xFF` ) {
1296	if ( bSig ) return propagateFloat32NaN( a, b );
1297	roundData->exception \|= float_flag_invalid;
1298	return float32_default_nan;
1299	}
1300	return packFloat32( zSign, zExp: `0xFF`, zSig: `0` );
1301	}
1302	if ( bExp == `0xFF` ) {
1303	if ( bSig ) return propagateFloat32NaN( a, b );
1304	return packFloat32( zSign, zExp: `0`, zSig: `0` );
1305	}
1306	if ( bExp == `0` ) {
1307	if ( bSig == `0` ) {
1308	if ( ( aExp \| aSig ) == `0` ) {
1309	roundData->exception \|= float_flag_invalid;
1310	return float32_default_nan;
1311	}
1312	roundData->exception \|= float_flag_divbyzero;
1313	return packFloat32( zSign, zExp: `0xFF`, zSig: `0` );
1314	}
1315	normalizeFloat32Subnormal( aSig: bSig, zExpPtr: &bExp, zSigPtr: &bSig );
1316	}
1317	if ( aExp == `0` ) {
1318	if ( aSig == `0` ) return packFloat32( zSign, zExp: `0`, zSig: `0` );
1319	normalizeFloat32Subnormal( aSig, zExpPtr: &aExp, zSigPtr: &aSig );
1320	}
1321	zExp = aExp - bExp + `0x7D`;
1322	aSig = ( aSig \| `0x00800000` )<<`7`;
1323	bSig = ( bSig \| `0x00800000` )<<`8`;
1324	if ( bSig <= ( aSig + aSig ) ) {
1325	aSig >>= `1`;
1326	++zExp;
1327	}
1328	{
1329	bits64 tmp = ( (bits64) aSig )<<`32`;
1330	do_div( tmp, bSig );
1331	zSig = tmp;
1332	}
1333	if ( ( zSig & `0x3F` ) == `0` ) {
1334	zSig \|= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<`32` );
1335	}
1336	return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1337
1338	}
1339
1340	/*
1341	-------------------------------------------------------------------------------
1342	Returns the remainder of the single-precision floating-point value `a'
1343	with respect to the corresponding value `b'. The operation is performed
1344	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1345	-------------------------------------------------------------------------------
1346	*/
1347	float32 float32_rem( struct roundingData *roundData, float32 a, float32 b )
1348	{
1349	flag aSign, bSign, zSign;
1350	int16 aExp, bExp, expDiff;
1351	bits32 aSig, bSig;
1352	bits32 q;
1353	bits64 aSig64, bSig64, q64;
1354	bits32 alternateASig;
1355	sbits32 sigMean;
1356
1357	aSig = extractFloat32Frac( a );
1358	aExp = extractFloat32Exp( a );
1359	aSign = extractFloat32Sign( a );
1360	bSig = extractFloat32Frac( a: b );
1361	bExp = extractFloat32Exp( a: b );
1362	bSign = extractFloat32Sign( a: b );
1363	if ( aExp == `0xFF` ) {
1364	if ( aSig \|\| ( ( bExp == `0xFF` ) && bSig ) ) {
1365	return propagateFloat32NaN( a, b );
1366	}
1367	roundData->exception \|= float_flag_invalid;
1368	return float32_default_nan;
1369	}
1370	if ( bExp == `0xFF` ) {
1371	if ( bSig ) return propagateFloat32NaN( a, b );
1372	return a;
1373	}
1374	if ( bExp == `0` ) {
1375	if ( bSig == `0` ) {
1376	roundData->exception \|= float_flag_invalid;
1377	return float32_default_nan;
1378	}
1379	normalizeFloat32Subnormal( aSig: bSig, zExpPtr: &bExp, zSigPtr: &bSig );
1380	}
1381	if ( aExp == `0` ) {
1382	if ( aSig == `0` ) return a;
1383	normalizeFloat32Subnormal( aSig, zExpPtr: &aExp, zSigPtr: &aSig );
1384	}
1385	expDiff = aExp - bExp;
1386	aSig \|= `0x00800000`;
1387	bSig \|= `0x00800000`;
1388	if ( expDiff < `32` ) {
1389	aSig <<= `8`;
1390	bSig <<= `8`;
1391	if ( expDiff < `0` ) {
1392	if ( expDiff < -`1` ) return a;
1393	aSig >>= `1`;
1394	}
1395	q = ( bSig <= aSig );
1396	if ( q ) aSig -= bSig;
1397	if ( `0` < expDiff ) {
1398	bits64 tmp = ( (bits64) aSig )<<`32`;
1399	do_div( tmp, bSig );
1400	q = tmp;
1401	q >>= `32` - expDiff;
1402	bSig >>= `2`;
1403	aSig = ( ( aSig>>`1` )<<( expDiff - `1` ) ) - bSig * q;
1404	}
1405	else {
1406	aSig >>= `2`;
1407	bSig >>= `2`;
1408	}
1409	}
1410	else {
1411	if ( bSig <= aSig ) aSig -= bSig;
1412	aSig64 = ( (bits64) aSig )<<`40`;
1413	bSig64 = ( (bits64) bSig )<<`40`;
1414	expDiff -= `64`;
1415	while ( `0` < expDiff ) {
1416	q64 = estimateDiv128To64( a0: aSig64, a1: `0`, b: bSig64 );
1417	q64 = ( `2` < q64 ) ? q64 - `2` : `0`;
1418	aSig64 = - ( ( bSig * q64 )<<`38` );
1419	expDiff -= `62`;
1420	}
1421	expDiff += `64`;
1422	q64 = estimateDiv128To64( a0: aSig64, a1: `0`, b: bSig64 );
1423	q64 = ( `2` < q64 ) ? q64 - `2` : `0`;
1424	q = q64>>( `64` - expDiff );
1425	bSig <<= `6`;
1426	aSig = ( ( aSig64>>`33` )<<( expDiff - `1` ) ) - bSig * q;
1427	}
1428	do {
1429	alternateASig = aSig;
1430	++q;
1431	aSig -= bSig;
1432	} while ( `0` <= (sbits32) aSig );
1433	sigMean = aSig + alternateASig;
1434	if ( ( sigMean < `0` ) \|\| ( ( sigMean == `0` ) && ( q & `1` ) ) ) {
1435	aSig = alternateASig;
1436	}
1437	zSign = ( (sbits32) aSig < `0` );
1438	if ( zSign ) aSig = - aSig;
1439	return normalizeRoundAndPackFloat32( roundData, zSign: aSign ^ zSign, zExp: bExp, zSig: aSig );
1440
1441	}
1442
1443	/*
1444	-------------------------------------------------------------------------------
1445	Returns the square root of the single-precision floating-point value `a'.
1446	The operation is performed according to the IEC/IEEE Standard for Binary
1447	Floating-point Arithmetic.
1448	-------------------------------------------------------------------------------
1449	*/
1450	float32 float32_sqrt( struct roundingData *roundData, float32 a )
1451	{
1452	flag aSign;
1453	int16 aExp, zExp;
1454	bits32 aSig, zSig;
1455	bits64 rem, term;
1456
1457	aSig = extractFloat32Frac( a );
1458	aExp = extractFloat32Exp( a );
1459	aSign = extractFloat32Sign( a );
1460	if ( aExp == `0xFF` ) {
1461	if ( aSig ) return propagateFloat32NaN( a, b: `0` );
1462	if ( ! aSign ) return a;
1463	roundData->exception \|= float_flag_invalid;
1464	return float32_default_nan;
1465	}
1466	if ( aSign ) {
1467	if ( ( aExp \| aSig ) == `0` ) return a;
1468	roundData->exception \|= float_flag_invalid;
1469	return float32_default_nan;
1470	}
1471	if ( aExp == `0` ) {
1472	if ( aSig == `0` ) return `0`;
1473	normalizeFloat32Subnormal( aSig, zExpPtr: &aExp, zSigPtr: &aSig );
1474	}
1475	zExp = ( ( aExp - `0x7F` )>>`1` ) + `0x7E`;
1476	aSig = ( aSig \| `0x00800000` )<<`8`;
1477	zSig = estimateSqrt32( aExp, a: aSig ) + `2`;
1478	if ( ( zSig & `0x7F` ) <= `5` ) {
1479	if ( zSig < `2` ) {
1480	zSig = `0xFFFFFFFF`;
1481	}
1482	else {
1483	aSig >>= aExp & `1`;
1484	term = ( (bits64) zSig ) * zSig;
1485	rem = ( ( (bits64) aSig )<<`32` ) - term;
1486	while ( (sbits64) rem < `0` ) {
1487	--zSig;
1488	rem += ( ( (bits64) zSig )<<`1` ) \| `1`;
1489	}
1490	zSig \|= ( rem != `0` );
1491	}
1492	}
1493	shift32RightJamming( a: zSig, count: `1`, zPtr: &zSig );
1494	return roundAndPackFloat32( roundData, zSign: `0`, zExp, zSig );
1495
1496	}
1497
1498	/*
1499	-------------------------------------------------------------------------------
1500	Returns 1 if the single-precision floating-point value `a' is equal to the
1501	corresponding value `b', and 0 otherwise. The comparison is performed
1502	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1503	-------------------------------------------------------------------------------
1504	*/
1505	flag float32_eq( float32 a, float32 b )
1506	{
1507
1508	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
1509	\|\| ( ( extractFloat32Exp( a: b ) == `0xFF` ) && extractFloat32Frac( a: b ) )
1510	) {
1511	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( a: b ) ) {
1512	float_raise( float_flag_invalid );
1513	}
1514	return `0`;
1515	}
1516	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<`1` ) == `0` );
1517
1518	}
1519
1520	/*
1521	-------------------------------------------------------------------------------
1522	Returns 1 if the single-precision floating-point value `a' is less than or
1523	equal to the corresponding value `b', and 0 otherwise. The comparison is
1524	performed according to the IEC/IEEE Standard for Binary Floating-point
1525	Arithmetic.
1526	-------------------------------------------------------------------------------
1527	*/
1528	flag float32_le( float32 a, float32 b )
1529	{
1530	flag aSign, bSign;
1531
1532	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
1533	\|\| ( ( extractFloat32Exp( a: b ) == `0xFF` ) && extractFloat32Frac( a: b ) )
1534	) {
1535	float_raise( float_flag_invalid );
1536	return `0`;
1537	}
1538	aSign = extractFloat32Sign( a );
1539	bSign = extractFloat32Sign( a: b );
1540	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<`1` ) == `0` );
1541	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
1542
1543	}
1544
1545	/*
1546	-------------------------------------------------------------------------------
1547	Returns 1 if the single-precision floating-point value `a' is less than
1548	the corresponding value `b', and 0 otherwise. The comparison is performed
1549	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1550	-------------------------------------------------------------------------------
1551	*/
1552	flag float32_lt( float32 a, float32 b )
1553	{
1554	flag aSign, bSign;
1555
1556	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
1557	\|\| ( ( extractFloat32Exp( a: b ) == `0xFF` ) && extractFloat32Frac( a: b ) )
1558	) {
1559	float_raise( float_flag_invalid );
1560	return `0`;
1561	}
1562	aSign = extractFloat32Sign( a );
1563	bSign = extractFloat32Sign( a: b );
1564	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<`1` ) != `0` );
1565	return ( a != b ) && ( aSign ^ ( a < b ) );
1566
1567	}
1568
1569	/*
1570	-------------------------------------------------------------------------------
1571	Returns 1 if the single-precision floating-point value `a' is equal to the
1572	corresponding value `b', and 0 otherwise. The invalid exception is raised
1573	if either operand is a NaN. Otherwise, the comparison is performed
1574	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1575	-------------------------------------------------------------------------------
1576	*/
1577	flag float32_eq_signaling( float32 a, float32 b )
1578	{
1579
1580	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
1581	\|\| ( ( extractFloat32Exp( a: b ) == `0xFF` ) && extractFloat32Frac( a: b ) )
1582	) {
1583	float_raise( float_flag_invalid );
1584	return `0`;
1585	}
1586	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<`1` ) == `0` );
1587
1588	}
1589
1590	/*
1591	-------------------------------------------------------------------------------
1592	Returns 1 if the single-precision floating-point value `a' is less than or
1593	equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
1594	cause an exception. Otherwise, the comparison is performed according to the
1595	IEC/IEEE Standard for Binary Floating-point Arithmetic.
1596	-------------------------------------------------------------------------------
1597	*/
1598	flag float32_le_quiet( float32 a, float32 b )
1599	{
1600	flag aSign, bSign;
1601	//int16 aExp, bExp;
1602
1603	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
1604	\|\| ( ( extractFloat32Exp( a: b ) == `0xFF` ) && extractFloat32Frac( a: b ) )
1605	) {
1606	/ Do nothing, even if NaN as we're quiet /
1607	return `0`;
1608	}
1609	aSign = extractFloat32Sign( a );
1610	bSign = extractFloat32Sign( a: b );
1611	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<`1` ) == `0` );
1612	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
1613
1614	}
1615
1616	/*
1617	-------------------------------------------------------------------------------
1618	Returns 1 if the single-precision floating-point value `a' is less than
1619	the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
1620	exception. Otherwise, the comparison is performed according to the IEC/IEEE
1621	Standard for Binary Floating-point Arithmetic.
1622	-------------------------------------------------------------------------------
1623	*/
1624	flag float32_lt_quiet( float32 a, float32 b )
1625	{
1626	flag aSign, bSign;
1627
1628	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
1629	\|\| ( ( extractFloat32Exp( a: b ) == `0xFF` ) && extractFloat32Frac( a: b ) )
1630	) {
1631	/ Do nothing, even if NaN as we're quiet /
1632	return `0`;
1633	}
1634	aSign = extractFloat32Sign( a );
1635	bSign = extractFloat32Sign( a: b );
1636	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<`1` ) != `0` );
1637	return ( a != b ) && ( aSign ^ ( a < b ) );
1638
1639	}
1640
1641	/*
1642	-------------------------------------------------------------------------------
1643	Returns the result of converting the double-precision floating-point value
1644	`a' to the 32-bit two's complement integer format. The conversion is
1645	performed according to the IEC/IEEE Standard for Binary Floating-point
1646	Arithmetic---which means in particular that the conversion is rounded
1647	according to the current rounding mode. If `a' is a NaN, the largest
1648	positive integer is returned. Otherwise, if the conversion overflows, the
1649	largest integer with the same sign as `a' is returned.
1650	-------------------------------------------------------------------------------
1651	*/
1652	int32 float64_to_int32( struct roundingData *roundData, float64 a )
1653	{
1654	flag aSign;
1655	int16 aExp, shiftCount;
1656	bits64 aSig;
1657
1658	aSig = extractFloat64Frac( a );
1659	aExp = extractFloat64Exp( a );
1660	aSign = extractFloat64Sign( a );
1661	if ( ( aExp == `0x7FF` ) && aSig ) aSign = `0`;
1662	if ( aExp ) aSig \|= LIT64( `0x0010000000000000` );
1663	shiftCount = `0x42C` - aExp;
1664	if ( `0` < shiftCount ) shift64RightJamming( a: aSig, count: shiftCount, zPtr: &aSig );
1665	return roundAndPackInt32( roundData, zSign: aSign, absZ: aSig );
1666
1667	}
1668
1669	/*
1670	-------------------------------------------------------------------------------
1671	Returns the result of converting the double-precision floating-point value
1672	`a' to the 32-bit two's complement integer format. The conversion is
1673	performed according to the IEC/IEEE Standard for Binary Floating-point
1674	Arithmetic, except that the conversion is always rounded toward zero. If
1675	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
1676	conversion overflows, the largest integer with the same sign as `a' is
1677	returned.
1678	-------------------------------------------------------------------------------
1679	*/
1680	int32 float64_to_int32_round_to_zero( float64 a )
1681	{
1682	flag aSign;
1683	int16 aExp, shiftCount;
1684	bits64 aSig, savedASig;
1685	int32 z;
1686
1687	aSig = extractFloat64Frac( a );
1688	aExp = extractFloat64Exp( a );
1689	aSign = extractFloat64Sign( a );
1690	shiftCount = `0x433` - aExp;
1691	if ( shiftCount < `21` ) {
1692	if ( ( aExp == `0x7FF` ) && aSig ) aSign = `0`;
1693	goto invalid;
1694	}
1695	else if ( `52` < shiftCount ) {
1696	if ( aExp \|\| aSig ) float_raise( float_flag_inexact );
1697	return `0`;
1698	}
1699	aSig \|= LIT64( `0x0010000000000000` );
1700	savedASig = aSig;
1701	aSig >>= shiftCount;
1702	z = aSig;
1703	if ( aSign ) z = - z;
1704	if ( ( z < `0` ) ^ aSign ) {
1705	invalid:
1706	float_raise( float_flag_invalid );
1707	return aSign ? `0x80000000` : `0x7FFFFFFF`;
1708	}
1709	if ( ( aSig<<shiftCount ) != savedASig ) {
1710	float_raise( float_flag_inexact );
1711	}
1712	return z;
1713
1714	}
1715
1716	/*
1717	-------------------------------------------------------------------------------
1718	Returns the result of converting the double-precision floating-point value
1719	`a' to the 32-bit two's complement unsigned integer format. The conversion
1720	is performed according to the IEC/IEEE Standard for Binary Floating-point
1721	Arithmetic---which means in particular that the conversion is rounded
1722	according to the current rounding mode. If `a' is a NaN, the largest
1723	positive integer is returned. Otherwise, if the conversion overflows, the
1724	largest positive integer is returned.
1725	-------------------------------------------------------------------------------
1726	*/
1727	int32 float64_to_uint32( struct roundingData *roundData, float64 a )
1728	{
1729	flag aSign;
1730	int16 aExp, shiftCount;
1731	bits64 aSig;
1732
1733	aSig = extractFloat64Frac( a );
1734	aExp = extractFloat64Exp( a );
1735	aSign = `0`; //extractFloat64Sign( a );
1736	//if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1737	if ( aExp ) aSig \|= LIT64( `0x0010000000000000` );
1738	shiftCount = `0x42C` - aExp;
1739	if ( `0` < shiftCount ) shift64RightJamming( a: aSig, count: shiftCount, zPtr: &aSig );
1740	return roundAndPackInt32( roundData, zSign: aSign, absZ: aSig );
1741	}
1742
1743	/*
1744	-------------------------------------------------------------------------------
1745	Returns the result of converting the double-precision floating-point value
1746	`a' to the 32-bit two's complement integer format. The conversion is
1747	performed according to the IEC/IEEE Standard for Binary Floating-point
1748	Arithmetic, except that the conversion is always rounded toward zero. If
1749	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
1750	conversion overflows, the largest positive integer is returned.
1751	-------------------------------------------------------------------------------
1752	*/
1753	int32 float64_to_uint32_round_to_zero( float64 a )
1754	{
1755	flag aSign;
1756	int16 aExp, shiftCount;
1757	bits64 aSig, savedASig;
1758	int32 z;
1759
1760	aSig = extractFloat64Frac( a );
1761	aExp = extractFloat64Exp( a );
1762	aSign = extractFloat64Sign( a );
1763	shiftCount = `0x433` - aExp;
1764	if ( shiftCount < `21` ) {
1765	if ( ( aExp == `0x7FF` ) && aSig ) aSign = `0`;
1766	goto invalid;
1767	}
1768	else if ( `52` < shiftCount ) {
1769	if ( aExp \|\| aSig ) float_raise( float_flag_inexact );
1770	return `0`;
1771	}
1772	aSig \|= LIT64( `0x0010000000000000` );
1773	savedASig = aSig;
1774	aSig >>= shiftCount;
1775	z = aSig;
1776	if ( aSign ) z = - z;
1777	if ( ( z < `0` ) ^ aSign ) {
1778	invalid:
1779	float_raise( float_flag_invalid );
1780	return aSign ? `0x80000000` : `0x7FFFFFFF`;
1781	}
1782	if ( ( aSig<<shiftCount ) != savedASig ) {
1783	float_raise( float_flag_inexact );
1784	}
1785	return z;
1786	}
1787
1788	/*
1789	-------------------------------------------------------------------------------
1790	Returns the result of converting the double-precision floating-point value
1791	`a' to the single-precision floating-point format. The conversion is
1792	performed according to the IEC/IEEE Standard for Binary Floating-point
1793	Arithmetic.
1794	-------------------------------------------------------------------------------
1795	*/
1796	float32 float64_to_float32( struct roundingData *roundData, float64 a )
1797	{
1798	flag aSign;
1799	int16 aExp;
1800	bits64 aSig;
1801	bits32 zSig;
1802
1803	aSig = extractFloat64Frac( a );
1804	aExp = extractFloat64Exp( a );
1805	aSign = extractFloat64Sign( a );
1806	if ( aExp == `0x7FF` ) {
1807	if ( aSig ) return commonNaNToFloat32( a: float64ToCommonNaN( a ) );
1808	return packFloat32( zSign: aSign, zExp: `0xFF`, zSig: `0` );
1809	}
1810	shift64RightJamming( a: aSig, count: `22`, zPtr: &aSig );
1811	zSig = aSig;
1812	if ( aExp \|\| zSig ) {
1813	zSig \|= `0x40000000`;
1814	aExp -= `0x381`;
1815	}
1816	return roundAndPackFloat32( roundData, zSign: aSign, zExp: aExp, zSig );
1817
1818	}
1819
1820	#ifdef FLOATX80
1821
1822	/*
1823	-------------------------------------------------------------------------------
1824	Returns the result of converting the double-precision floating-point value
1825	`a' to the extended double-precision floating-point format. The conversion
1826	is performed according to the IEC/IEEE Standard for Binary Floating-point
1827	Arithmetic.
1828	-------------------------------------------------------------------------------
1829	*/
1830	floatx80 float64_to_floatx80( float64 a )
1831	{
1832	flag aSign;
1833	int16 aExp;
1834	bits64 aSig;
1835
1836	aSig = extractFloat64Frac( a );
1837	aExp = extractFloat64Exp( a );
1838	aSign = extractFloat64Sign( a );
1839	if ( aExp == `0x7FF` ) {
1840	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
1841	return packFloatx80( aSign, `0x7FFF`, LIT64( `0x8000000000000000` ) );
1842	}
1843	if ( aExp == `0` ) {
1844	if ( aSig == `0` ) return packFloatx80( aSign, `0`, `0` );
1845	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1846	}
1847	return
1848	packFloatx80(
1849	aSign, aExp + `0x3C00`, ( aSig \| LIT64( `0x0010000000000000` ) )<<`11` );
1850
1851	}
1852
1853	#endif
1854
1855	/*
1856	-------------------------------------------------------------------------------
1857	Rounds the double-precision floating-point value `a' to an integer, and
1858	returns the result as a double-precision floating-point value. The
1859	operation is performed according to the IEC/IEEE Standard for Binary
1860	Floating-point Arithmetic.
1861	-------------------------------------------------------------------------------
1862	*/
1863	float64 float64_round_to_int( struct roundingData *roundData, float64 a )
1864	{
1865	flag aSign;
1866	int16 aExp;
1867	bits64 lastBitMask, roundBitsMask;
1868	int8 roundingMode;
1869	float64 z;
1870
1871	aExp = extractFloat64Exp( a );
1872	if ( `0x433` <= aExp ) {
1873	if ( ( aExp == `0x7FF` ) && extractFloat64Frac( a ) ) {
1874	return propagateFloat64NaN( a, b: a );
1875	}
1876	return a;
1877	}
1878	if ( aExp <= `0x3FE` ) {
1879	if ( (bits64) ( a<<`1` ) == `0` ) return a;
1880	roundData->exception \|= float_flag_inexact;
1881	aSign = extractFloat64Sign( a );
1882	switch ( roundData->mode ) {
1883	case float_round_nearest_even:
1884	if ( ( aExp == `0x3FE` ) && extractFloat64Frac( a ) ) {
1885	return packFloat64( zSign: aSign, zExp: `0x3FF`, zSig: `0` );
1886	}
1887	break;
1888	case float_round_down:
1889	return aSign ? LIT64( `0xBFF0000000000000` ) : `0`;
1890	case float_round_up:
1891	return
1892	aSign ? LIT64( `0x8000000000000000` ) : LIT64( `0x3FF0000000000000` );
1893	}
1894	return packFloat64( zSign: aSign, zExp: `0`, zSig: `0` );
1895	}
1896	lastBitMask = `1`;
1897	lastBitMask <<= `0x433` - aExp;
1898	roundBitsMask = lastBitMask - `1`;
1899	z = a;
1900	roundingMode = roundData->mode;
1901	if ( roundingMode == float_round_nearest_even ) {
1902	z += lastBitMask>>`1`;
1903	if ( ( z & roundBitsMask ) == `0` ) z &= ~ lastBitMask;
1904	}
1905	else if ( roundingMode != float_round_to_zero ) {
1906	if ( extractFloat64Sign( a: z ) ^ ( roundingMode == float_round_up ) ) {
1907	z += roundBitsMask;
1908	}
1909	}
1910	z &= ~ roundBitsMask;
1911	if ( z != a ) roundData->exception \|= float_flag_inexact;
1912	return z;
1913
1914	}
1915
1916	/*
1917	-------------------------------------------------------------------------------
1918	Returns the result of adding the absolute values of the double-precision
1919	floating-point values `a' and `b'. If `zSign' is true, the sum is negated
1920	before being returned. `zSign' is ignored if the result is a NaN. The
1921	addition is performed according to the IEC/IEEE Standard for Binary
1922	Floating-point Arithmetic.
1923	-------------------------------------------------------------------------------
1924	*/
1925	static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
1926	{
1927	int16 aExp, bExp, zExp;
1928	bits64 aSig, bSig, zSig;
1929	int16 expDiff;
1930
1931	aSig = extractFloat64Frac( a );
1932	aExp = extractFloat64Exp( a );
1933	bSig = extractFloat64Frac( a: b );
1934	bExp = extractFloat64Exp( a: b );
1935	expDiff = aExp - bExp;
1936	aSig <<= `9`;
1937	bSig <<= `9`;
1938	if ( `0` < expDiff ) {
1939	if ( aExp == `0x7FF` ) {
1940	if ( aSig ) return propagateFloat64NaN( a, b );
1941	return a;
1942	}
1943	if ( bExp == `0` ) {
1944	--expDiff;
1945	}
1946	else {
1947	bSig \|= LIT64( `0x2000000000000000` );
1948	}
1949	shift64RightJamming( a: bSig, count: expDiff, zPtr: &bSig );
1950	zExp = aExp;
1951	}
1952	else if ( expDiff < `0` ) {
1953	if ( bExp == `0x7FF` ) {
1954	if ( bSig ) return propagateFloat64NaN( a, b );
1955	return packFloat64( zSign, zExp: `0x7FF`, zSig: `0` );
1956	}
1957	if ( aExp == `0` ) {
1958	++expDiff;
1959	}
1960	else {
1961	aSig \|= LIT64( `0x2000000000000000` );
1962	}
1963	shift64RightJamming( a: aSig, count: - expDiff, zPtr: &aSig );
1964	zExp = bExp;
1965	}
1966	else {
1967	if ( aExp == `0x7FF` ) {
1968	if ( aSig \| bSig ) return propagateFloat64NaN( a, b );
1969	return a;
1970	}
1971	if ( aExp == `0` ) return packFloat64( zSign, zExp: `0`, zSig: ( aSig + bSig )>>`9` );
1972	zSig = LIT64( `0x4000000000000000` ) + aSig + bSig;
1973	zExp = aExp;
1974	goto roundAndPack;
1975	}
1976	aSig \|= LIT64( `0x2000000000000000` );
1977	zSig = ( aSig + bSig )<<`1`;
1978	--zExp;
1979	if ( (sbits64) zSig < `0` ) {
1980	zSig = aSig + bSig;
1981	++zExp;
1982	}
1983	roundAndPack:
1984	return roundAndPackFloat64( roundData, zSign, zExp, zSig );
1985
1986	}
1987
1988	/*
1989	-------------------------------------------------------------------------------
1990	Returns the result of subtracting the absolute values of the double-
1991	precision floating-point values `a' and `b'. If `zSign' is true, the
1992	difference is negated before being returned. `zSign' is ignored if the
1993	result is a NaN. The subtraction is performed according to the IEC/IEEE
1994	Standard for Binary Floating-point Arithmetic.
1995	-------------------------------------------------------------------------------
1996	*/
1997	static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
1998	{
1999	int16 aExp, bExp, zExp;
2000	bits64 aSig, bSig, zSig;
2001	int16 expDiff;
2002
2003	aSig = extractFloat64Frac( a );
2004	aExp = extractFloat64Exp( a );
2005	bSig = extractFloat64Frac( a: b );
2006	bExp = extractFloat64Exp( a: b );
2007	expDiff = aExp - bExp;
2008	aSig <<= `10`;
2009	bSig <<= `10`;
2010	if ( `0` < expDiff ) goto aExpBigger;
2011	if ( expDiff < `0` ) goto bExpBigger;
2012	if ( aExp == `0x7FF` ) {
2013	if ( aSig \| bSig ) return propagateFloat64NaN( a, b );
2014	roundData->exception \|= float_flag_invalid;
2015	return float64_default_nan;
2016	}
2017	if ( aExp == `0` ) {
2018	aExp = `1`;
2019	bExp = `1`;
2020	}
2021	if ( bSig < aSig ) goto aBigger;
2022	if ( aSig < bSig ) goto bBigger;
2023	return packFloat64( zSign: roundData->mode == float_round_down, zExp: `0`, zSig: `0` );
2024	bExpBigger:
2025	if ( bExp == `0x7FF` ) {
2026	if ( bSig ) return propagateFloat64NaN( a, b );
2027	return packFloat64( zSign: zSign ^ `1`, zExp: `0x7FF`, zSig: `0` );
2028	}
2029	if ( aExp == `0` ) {
2030	++expDiff;
2031	}
2032	else {
2033	aSig \|= LIT64( `0x4000000000000000` );
2034	}
2035	shift64RightJamming( a: aSig, count: - expDiff, zPtr: &aSig );
2036	bSig \|= LIT64( `0x4000000000000000` );
2037	bBigger:
2038	zSig = bSig - aSig;
2039	zExp = bExp;
2040	zSign ^= `1`;
2041	goto normalizeRoundAndPack;
2042	aExpBigger:
2043	if ( aExp == `0x7FF` ) {
2044	if ( aSig ) return propagateFloat64NaN( a, b );
2045	return a;
2046	}
2047	if ( bExp == `0` ) {
2048	--expDiff;
2049	}
2050	else {
2051	bSig \|= LIT64( `0x4000000000000000` );
2052	}
2053	shift64RightJamming( a: bSig, count: expDiff, zPtr: &bSig );
2054	aSig \|= LIT64( `0x4000000000000000` );
2055	aBigger:
2056	zSig = aSig - bSig;
2057	zExp = aExp;
2058	normalizeRoundAndPack:
2059	--zExp;
2060	return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
2061
2062	}
2063
2064	/*
2065	-------------------------------------------------------------------------------
2066	Returns the result of adding the double-precision floating-point values `a'
2067	and `b'. The operation is performed according to the IEC/IEEE Standard for
2068	Binary Floating-point Arithmetic.
2069	-------------------------------------------------------------------------------
2070	*/
2071	float64 float64_add( struct roundingData *roundData, float64 a, float64 b )
2072	{
2073	flag aSign, bSign;
2074
2075	aSign = extractFloat64Sign( a );
2076	bSign = extractFloat64Sign( a: b );
2077	if ( aSign == bSign ) {
2078	return addFloat64Sigs( roundData, a, b, zSign: aSign );
2079	}
2080	else {
2081	return subFloat64Sigs( roundData, a, b, zSign: aSign );
2082	}
2083
2084	}
2085
2086	/*
2087	-------------------------------------------------------------------------------
2088	Returns the result of subtracting the double-precision floating-point values
2089	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
2090	for Binary Floating-point Arithmetic.
2091	-------------------------------------------------------------------------------
2092	*/
2093	float64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
2094	{
2095	flag aSign, bSign;
2096
2097	aSign = extractFloat64Sign( a );
2098	bSign = extractFloat64Sign( a: b );
2099	if ( aSign == bSign ) {
2100	return subFloat64Sigs( roundData, a, b, zSign: aSign );
2101	}
2102	else {
2103	return addFloat64Sigs( roundData, a, b, zSign: aSign );
2104	}
2105
2106	}
2107
2108	/*
2109	-------------------------------------------------------------------------------
2110	Returns the result of multiplying the double-precision floating-point values
2111	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
2112	for Binary Floating-point Arithmetic.
2113	-------------------------------------------------------------------------------
2114	*/
2115	float64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
2116	{
2117	flag aSign, bSign, zSign;
2118	int16 aExp, bExp, zExp;
2119	bits64 aSig, bSig, zSig0, zSig1;
2120
2121	aSig = extractFloat64Frac( a );
2122	aExp = extractFloat64Exp( a );
2123	aSign = extractFloat64Sign( a );
2124	bSig = extractFloat64Frac( a: b );
2125	bExp = extractFloat64Exp( a: b );
2126	bSign = extractFloat64Sign( a: b );
2127	zSign = aSign ^ bSign;
2128	if ( aExp == `0x7FF` ) {
2129	if ( aSig \|\| ( ( bExp == `0x7FF` ) && bSig ) ) {
2130	return propagateFloat64NaN( a, b );
2131	}
2132	if ( ( bExp \| bSig ) == `0` ) {
2133	roundData->exception \|= float_flag_invalid;
2134	return float64_default_nan;
2135	}
2136	return packFloat64( zSign, zExp: `0x7FF`, zSig: `0` );
2137	}
2138	if ( bExp == `0x7FF` ) {
2139	if ( bSig ) return propagateFloat64NaN( a, b );
2140	if ( ( aExp \| aSig ) == `0` ) {
2141	roundData->exception \|= float_flag_invalid;
2142	return float64_default_nan;
2143	}
2144	return packFloat64( zSign, zExp: `0x7FF`, zSig: `0` );
2145	}
2146	if ( aExp == `0` ) {
2147	if ( aSig == `0` ) return packFloat64( zSign, zExp: `0`, zSig: `0` );
2148	normalizeFloat64Subnormal( aSig, zExpPtr: &aExp, zSigPtr: &aSig );
2149	}
2150	if ( bExp == `0` ) {
2151	if ( bSig == `0` ) return packFloat64( zSign, zExp: `0`, zSig: `0` );
2152	normalizeFloat64Subnormal( aSig: bSig, zExpPtr: &bExp, zSigPtr: &bSig );
2153	}
2154	zExp = aExp + bExp - `0x3FF`;
2155	aSig = ( aSig \| LIT64( `0x0010000000000000` ) )<<`10`;
2156	bSig = ( bSig \| LIT64( `0x0010000000000000` ) )<<`11`;
2157	mul64To128( a: aSig, b: bSig, z0Ptr: &zSig0, z1Ptr: &zSig1 );
2158	zSig0 \|= ( zSig1 != `0` );
2159	if ( `0` <= (sbits64) ( zSig0<<`1` ) ) {
2160	zSig0 <<= `1`;
2161	--zExp;
2162	}
2163	return roundAndPackFloat64( roundData, zSign, zExp, zSig: zSig0 );
2164
2165	}
2166
2167	/*
2168	-------------------------------------------------------------------------------
2169	Returns the result of dividing the double-precision floating-point value `a'
2170	by the corresponding value `b'. The operation is performed according to
2171	the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2172	-------------------------------------------------------------------------------
2173	*/
2174	float64 float64_div( struct roundingData *roundData, float64 a, float64 b )
2175	{
2176	flag aSign, bSign, zSign;
2177	int16 aExp, bExp, zExp;
2178	bits64 aSig, bSig, zSig;
2179	bits64 rem0, rem1;
2180	bits64 term0, term1;
2181
2182	aSig = extractFloat64Frac( a );
2183	aExp = extractFloat64Exp( a );
2184	aSign = extractFloat64Sign( a );
2185	bSig = extractFloat64Frac( a: b );
2186	bExp = extractFloat64Exp( a: b );
2187	bSign = extractFloat64Sign( a: b );
2188	zSign = aSign ^ bSign;
2189	if ( aExp == `0x7FF` ) {
2190	if ( aSig ) return propagateFloat64NaN( a, b );
2191	if ( bExp == `0x7FF` ) {
2192	if ( bSig ) return propagateFloat64NaN( a, b );
2193	roundData->exception \|= float_flag_invalid;
2194	return float64_default_nan;
2195	}
2196	return packFloat64( zSign, zExp: `0x7FF`, zSig: `0` );
2197	}
2198	if ( bExp == `0x7FF` ) {
2199	if ( bSig ) return propagateFloat64NaN( a, b );
2200	return packFloat64( zSign, zExp: `0`, zSig: `0` );
2201	}
2202	if ( bExp == `0` ) {
2203	if ( bSig == `0` ) {
2204	if ( ( aExp \| aSig ) == `0` ) {
2205	roundData->exception \|= float_flag_invalid;
2206	return float64_default_nan;
2207	}
2208	roundData->exception \|= float_flag_divbyzero;
2209	return packFloat64( zSign, zExp: `0x7FF`, zSig: `0` );
2210	}
2211	normalizeFloat64Subnormal( aSig: bSig, zExpPtr: &bExp, zSigPtr: &bSig );
2212	}
2213	if ( aExp == `0` ) {
2214	if ( aSig == `0` ) return packFloat64( zSign, zExp: `0`, zSig: `0` );
2215	normalizeFloat64Subnormal( aSig, zExpPtr: &aExp, zSigPtr: &aSig );
2216	}
2217	zExp = aExp - bExp + `0x3FD`;
2218	aSig = ( aSig \| LIT64( `0x0010000000000000` ) )<<`10`;
2219	bSig = ( bSig \| LIT64( `0x0010000000000000` ) )<<`11`;
2220	if ( bSig <= ( aSig + aSig ) ) {
2221	aSig >>= `1`;
2222	++zExp;
2223	}
2224	zSig = estimateDiv128To64( a0: aSig, a1: `0`, b: bSig );
2225	if ( ( zSig & `0x1FF` ) <= `2` ) {
2226	mul64To128( a: bSig, b: zSig, z0Ptr: &term0, z1Ptr: &term1 );
2227	sub128( a0: aSig, a1: `0`, b0: term0, b1: term1, z0Ptr: &rem0, z1Ptr: &rem1 );
2228	while ( (sbits64) rem0 < `0` ) {
2229	--zSig;
2230	add128( a0: rem0, a1: rem1, b0: `0`, b1: bSig, z0Ptr: &rem0, z1Ptr: &rem1 );
2231	}
2232	zSig \|= ( rem1 != `0` );
2233	}
2234	return roundAndPackFloat64( roundData, zSign, zExp, zSig );
2235
2236	}
2237
2238	/*
2239	-------------------------------------------------------------------------------
2240	Returns the remainder of the double-precision floating-point value `a'
2241	with respect to the corresponding value `b'. The operation is performed
2242	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2243	-------------------------------------------------------------------------------
2244	*/
2245	float64 float64_rem( struct roundingData *roundData, float64 a, float64 b )
2246	{
2247	flag aSign, bSign, zSign;
2248	int16 aExp, bExp, expDiff;
2249	bits64 aSig, bSig;
2250	bits64 q, alternateASig;
2251	sbits64 sigMean;
2252
2253	aSig = extractFloat64Frac( a );
2254	aExp = extractFloat64Exp( a );
2255	aSign = extractFloat64Sign( a );
2256	bSig = extractFloat64Frac( a: b );
2257	bExp = extractFloat64Exp( a: b );
2258	bSign = extractFloat64Sign( a: b );
2259	if ( aExp == `0x7FF` ) {
2260	if ( aSig \|\| ( ( bExp == `0x7FF` ) && bSig ) ) {
2261	return propagateFloat64NaN( a, b );
2262	}
2263	roundData->exception \|= float_flag_invalid;
2264	return float64_default_nan;
2265	}
2266	if ( bExp == `0x7FF` ) {
2267	if ( bSig ) return propagateFloat64NaN( a, b );
2268	return a;
2269	}
2270	if ( bExp == `0` ) {
2271	if ( bSig == `0` ) {
2272	roundData->exception \|= float_flag_invalid;
2273	return float64_default_nan;
2274	}
2275	normalizeFloat64Subnormal( aSig: bSig, zExpPtr: &bExp, zSigPtr: &bSig );
2276	}
2277	if ( aExp == `0` ) {
2278	if ( aSig == `0` ) return a;
2279	normalizeFloat64Subnormal( aSig, zExpPtr: &aExp, zSigPtr: &aSig );
2280	}
2281	expDiff = aExp - bExp;
2282	aSig = ( aSig \| LIT64( `0x0010000000000000` ) )<<`11`;
2283	bSig = ( bSig \| LIT64( `0x0010000000000000` ) )<<`11`;
2284	if ( expDiff < `0` ) {
2285	if ( expDiff < -`1` ) return a;
2286	aSig >>= `1`;
2287	}
2288	q = ( bSig <= aSig );
2289	if ( q ) aSig -= bSig;
2290	expDiff -= `64`;
2291	while ( `0` < expDiff ) {
2292	q = estimateDiv128To64( a0: aSig, a1: `0`, b: bSig );
2293	q = ( `2` < q ) ? q - `2` : `0`;
2294	aSig = - ( ( bSig>>`2` ) * q );
2295	expDiff -= `62`;
2296	}
2297	expDiff += `64`;
2298	if ( `0` < expDiff ) {
2299	q = estimateDiv128To64( a0: aSig, a1: `0`, b: bSig );
2300	q = ( `2` < q ) ? q - `2` : `0`;
2301	q >>= `64` - expDiff;
2302	bSig >>= `2`;
2303	aSig = ( ( aSig>>`1` )<<( expDiff - `1` ) ) - bSig * q;
2304	}
2305	else {
2306	aSig >>= `2`;
2307	bSig >>= `2`;
2308	}
2309	do {
2310	alternateASig = aSig;
2311	++q;
2312	aSig -= bSig;
2313	} while ( `0` <= (sbits64) aSig );
2314	sigMean = aSig + alternateASig;
2315	if ( ( sigMean < `0` ) \|\| ( ( sigMean == `0` ) && ( q & `1` ) ) ) {
2316	aSig = alternateASig;
2317	}
2318	zSign = ( (sbits64) aSig < `0` );
2319	if ( zSign ) aSig = - aSig;
2320	return normalizeRoundAndPackFloat64( roundData, zSign: aSign ^ zSign, zExp: bExp, zSig: aSig );
2321
2322	}
2323
2324	/*
2325	-------------------------------------------------------------------------------
2326	Returns the square root of the double-precision floating-point value `a'.
2327	The operation is performed according to the IEC/IEEE Standard for Binary
2328	Floating-point Arithmetic.
2329	-------------------------------------------------------------------------------
2330	*/
2331	float64 float64_sqrt( struct roundingData *roundData, float64 a )
2332	{
2333	flag aSign;
2334	int16 aExp, zExp;
2335	bits64 aSig, zSig;
2336	bits64 rem0, rem1, term0, term1; //, shiftedRem;
2337	//float64 z;
2338
2339	aSig = extractFloat64Frac( a );
2340	aExp = extractFloat64Exp( a );
2341	aSign = extractFloat64Sign( a );
2342	if ( aExp == `0x7FF` ) {
2343	if ( aSig ) return propagateFloat64NaN( a, b: a );
2344	if ( ! aSign ) return a;
2345	roundData->exception \|= float_flag_invalid;
2346	return float64_default_nan;
2347	}
2348	if ( aSign ) {
2349	if ( ( aExp \| aSig ) == `0` ) return a;
2350	roundData->exception \|= float_flag_invalid;
2351	return float64_default_nan;
2352	}
2353	if ( aExp == `0` ) {
2354	if ( aSig == `0` ) return `0`;
2355	normalizeFloat64Subnormal( aSig, zExpPtr: &aExp, zSigPtr: &aSig );
2356	}
2357	zExp = ( ( aExp - `0x3FF` )>>`1` ) + `0x3FE`;
2358	aSig \|= LIT64( `0x0010000000000000` );
2359	zSig = estimateSqrt32( aExp, a: aSig>>`21` );
2360	zSig <<= `31`;
2361	aSig <<= `9` - ( aExp & `1` );
2362	zSig = estimateDiv128To64( a0: aSig, a1: `0`, b: zSig ) + zSig + `2`;
2363	if ( ( zSig & `0x3FF` ) <= `5` ) {
2364	if ( zSig < `2` ) {
2365	zSig = LIT64( `0xFFFFFFFFFFFFFFFF` );
2366	}
2367	else {
2368	aSig <<= `2`;
2369	mul64To128( a: zSig, b: zSig, z0Ptr: &term0, z1Ptr: &term1 );
2370	sub128( a0: aSig, a1: `0`, b0: term0, b1: term1, z0Ptr: &rem0, z1Ptr: &rem1 );
2371	while ( (sbits64) rem0 < `0` ) {
2372	--zSig;
2373	shortShift128Left( a0: `0`, a1: zSig, count: `1`, z0Ptr: &term0, z1Ptr: &term1 );
2374	term1 \|= `1`;
2375	add128( a0: rem0, a1: rem1, b0: term0, b1: term1, z0Ptr: &rem0, z1Ptr: &rem1 );
2376	}
2377	zSig \|= ( ( rem0 \| rem1 ) != `0` );
2378	}
2379	}
2380	shift64RightJamming( a: zSig, count: `1`, zPtr: &zSig );
2381	return roundAndPackFloat64( roundData, zSign: `0`, zExp, zSig );
2382
2383	}
2384
2385	/*
2386	-------------------------------------------------------------------------------
2387	Returns 1 if the double-precision floating-point value `a' is equal to the
2388	corresponding value `b', and 0 otherwise. The comparison is performed
2389	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2390	-------------------------------------------------------------------------------
2391	*/
2392	flag float64_eq( float64 a, float64 b )
2393	{
2394
2395	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
2396	\|\| ( ( extractFloat64Exp( a: b ) == `0x7FF` ) && extractFloat64Frac( a: b ) )
2397	) {
2398	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( a: b ) ) {
2399	float_raise( float_flag_invalid );
2400	}
2401	return `0`;
2402	}
2403	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<`1` ) == `0` );
2404
2405	}
2406
2407	/*
2408	-------------------------------------------------------------------------------
2409	Returns 1 if the double-precision floating-point value `a' is less than or
2410	equal to the corresponding value `b', and 0 otherwise. The comparison is
2411	performed according to the IEC/IEEE Standard for Binary Floating-point
2412	Arithmetic.
2413	-------------------------------------------------------------------------------
2414	*/
2415	flag float64_le( float64 a, float64 b )
2416	{
2417	flag aSign, bSign;
2418
2419	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
2420	\|\| ( ( extractFloat64Exp( a: b ) == `0x7FF` ) && extractFloat64Frac( a: b ) )
2421	) {
2422	float_raise( float_flag_invalid );
2423	return `0`;
2424	}
2425	aSign = extractFloat64Sign( a );
2426	bSign = extractFloat64Sign( a: b );
2427	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<`1` ) == `0` );
2428	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
2429
2430	}
2431
2432	/*
2433	-------------------------------------------------------------------------------
2434	Returns 1 if the double-precision floating-point value `a' is less than
2435	the corresponding value `b', and 0 otherwise. The comparison is performed
2436	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2437	-------------------------------------------------------------------------------
2438	*/
2439	flag float64_lt( float64 a, float64 b )
2440	{
2441	flag aSign, bSign;
2442
2443	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
2444	\|\| ( ( extractFloat64Exp( a: b ) == `0x7FF` ) && extractFloat64Frac( a: b ) )
2445	) {
2446	float_raise( float_flag_invalid );
2447	return `0`;
2448	}
2449	aSign = extractFloat64Sign( a );
2450	bSign = extractFloat64Sign( a: b );
2451	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<`1` ) != `0` );
2452	return ( a != b ) && ( aSign ^ ( a < b ) );
2453
2454	}
2455
2456	/*
2457	-------------------------------------------------------------------------------
2458	Returns 1 if the double-precision floating-point value `a' is equal to the
2459	corresponding value `b', and 0 otherwise. The invalid exception is raised
2460	if either operand is a NaN. Otherwise, the comparison is performed
2461	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2462	-------------------------------------------------------------------------------
2463	*/
2464	flag float64_eq_signaling( float64 a, float64 b )
2465	{
2466
2467	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
2468	\|\| ( ( extractFloat64Exp( a: b ) == `0x7FF` ) && extractFloat64Frac( a: b ) )
2469	) {
2470	float_raise( float_flag_invalid );
2471	return `0`;
2472	}
2473	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<`1` ) == `0` );
2474
2475	}
2476
2477	/*
2478	-------------------------------------------------------------------------------
2479	Returns 1 if the double-precision floating-point value `a' is less than or
2480	equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2481	cause an exception. Otherwise, the comparison is performed according to the
2482	IEC/IEEE Standard for Binary Floating-point Arithmetic.
2483	-------------------------------------------------------------------------------
2484	*/
2485	flag float64_le_quiet( float64 a, float64 b )
2486	{
2487	flag aSign, bSign;
2488	//int16 aExp, bExp;
2489
2490	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
2491	\|\| ( ( extractFloat64Exp( a: b ) == `0x7FF` ) && extractFloat64Frac( a: b ) )
2492	) {
2493	/ Do nothing, even if NaN as we're quiet /
2494	return `0`;
2495	}
2496	aSign = extractFloat64Sign( a );
2497	bSign = extractFloat64Sign( a: b );
2498	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<`1` ) == `0` );
2499	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
2500
2501	}
2502
2503	/*
2504	-------------------------------------------------------------------------------
2505	Returns 1 if the double-precision floating-point value `a' is less than
2506	the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2507	exception. Otherwise, the comparison is performed according to the IEC/IEEE
2508	Standard for Binary Floating-point Arithmetic.
2509	-------------------------------------------------------------------------------
2510	*/
2511	flag float64_lt_quiet( float64 a, float64 b )
2512	{
2513	flag aSign, bSign;
2514
2515	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
2516	\|\| ( ( extractFloat64Exp( a: b ) == `0x7FF` ) && extractFloat64Frac( a: b ) )
2517	) {
2518	/ Do nothing, even if NaN as we're quiet /
2519	return `0`;
2520	}
2521	aSign = extractFloat64Sign( a );
2522	bSign = extractFloat64Sign( a: b );
2523	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<`1` ) != `0` );
2524	return ( a != b ) && ( aSign ^ ( a < b ) );
2525
2526	}
2527
2528	#ifdef FLOATX80
2529
2530	/*
2531	-------------------------------------------------------------------------------
2532	Returns the result of converting the extended double-precision floating-
2533	point value `a' to the 32-bit two's complement integer format. The
2534	conversion is performed according to the IEC/IEEE Standard for Binary
2535	Floating-point Arithmetic---which means in particular that the conversion
2536	is rounded according to the current rounding mode. If `a' is a NaN, the
2537	largest positive integer is returned. Otherwise, if the conversion
2538	overflows, the largest integer with the same sign as `a' is returned.
2539	-------------------------------------------------------------------------------
2540	*/
2541	int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
2542	{
2543	flag aSign;
2544	int32 aExp, shiftCount;
2545	bits64 aSig;
2546
2547	aSig = extractFloatx80Frac( a );
2548	aExp = extractFloatx80Exp( a );
2549	aSign = extractFloatx80Sign( a );
2550	if ( ( aExp == `0x7FFF` ) && (bits64) ( aSig<<`1` ) ) aSign = `0`;
2551	shiftCount = `0x4037` - aExp;
2552	if ( shiftCount <= `0` ) shiftCount = `1`;
2553	shift64RightJamming( aSig, shiftCount, &aSig );
2554	return roundAndPackInt32( roundData, aSign, aSig );
2555
2556	}
2557
2558	/*
2559	-------------------------------------------------------------------------------
2560	Returns the result of converting the extended double-precision floating-
2561	point value `a' to the 32-bit two's complement integer format. The
2562	conversion is performed according to the IEC/IEEE Standard for Binary
2563	Floating-point Arithmetic, except that the conversion is always rounded
2564	toward zero. If `a' is a NaN, the largest positive integer is returned.
2565	Otherwise, if the conversion overflows, the largest integer with the same
2566	sign as `a' is returned.
2567	-------------------------------------------------------------------------------
2568	*/
2569	int32 floatx80_to_int32_round_to_zero( floatx80 a )
2570	{
2571	flag aSign;
2572	int32 aExp, shiftCount;
2573	bits64 aSig, savedASig;
2574	int32 z;
2575
2576	aSig = extractFloatx80Frac( a );
2577	aExp = extractFloatx80Exp( a );
2578	aSign = extractFloatx80Sign( a );
2579	shiftCount = `0x403E` - aExp;
2580	if ( shiftCount < `32` ) {
2581	if ( ( aExp == `0x7FFF` ) && (bits64) ( aSig<<`1` ) ) aSign = `0`;
2582	goto invalid;
2583	}
2584	else if ( `63` < shiftCount ) {
2585	if ( aExp \|\| aSig ) float_raise( float_flag_inexact );
2586	return `0`;
2587	}
2588	savedASig = aSig;
2589	aSig >>= shiftCount;
2590	z = aSig;
2591	if ( aSign ) z = - z;
2592	if ( ( z < `0` ) ^ aSign ) {
2593	invalid:
2594	float_raise( float_flag_invalid );
2595	return aSign ? `0x80000000` : `0x7FFFFFFF`;
2596	}
2597	if ( ( aSig<<shiftCount ) != savedASig ) {
2598	float_raise( float_flag_inexact );
2599	}
2600	return z;
2601
2602	}
2603
2604	/*
2605	-------------------------------------------------------------------------------
2606	Returns the result of converting the extended double-precision floating-
2607	point value `a' to the single-precision floating-point format. The
2608	conversion is performed according to the IEC/IEEE Standard for Binary
2609	Floating-point Arithmetic.
2610	-------------------------------------------------------------------------------
2611	*/
2612	float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
2613	{
2614	flag aSign;
2615	int32 aExp;
2616	bits64 aSig;
2617
2618	aSig = extractFloatx80Frac( a );
2619	aExp = extractFloatx80Exp( a );
2620	aSign = extractFloatx80Sign( a );
2621	if ( aExp == `0x7FFF` ) {
2622	if ( (bits64) ( aSig<<`1` ) ) {
2623	return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
2624	}
2625	return packFloat32( aSign, `0xFF`, `0` );
2626	}
2627	shift64RightJamming( aSig, `33`, &aSig );
2628	if ( aExp \|\| aSig ) aExp -= `0x3F81`;
2629	return roundAndPackFloat32( roundData, aSign, aExp, aSig );
2630
2631	}
2632
2633	/*
2634	-------------------------------------------------------------------------------
2635	Returns the result of converting the extended double-precision floating-
2636	point value `a' to the double-precision floating-point format. The
2637	conversion is performed according to the IEC/IEEE Standard for Binary
2638	Floating-point Arithmetic.
2639	-------------------------------------------------------------------------------
2640	*/
2641	float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a )
2642	{
2643	flag aSign;
2644	int32 aExp;
2645	bits64 aSig, zSig;
2646
2647	aSig = extractFloatx80Frac( a );
2648	aExp = extractFloatx80Exp( a );
2649	aSign = extractFloatx80Sign( a );
2650	if ( aExp == `0x7FFF` ) {
2651	if ( (bits64) ( aSig<<`1` ) ) {
2652	return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
2653	}
2654	return packFloat64( aSign, `0x7FF`, `0` );
2655	}
2656	shift64RightJamming( aSig, `1`, &zSig );
2657	if ( aExp \|\| aSig ) aExp -= `0x3C01`;
2658	return roundAndPackFloat64( roundData, aSign, aExp, zSig );
2659
2660	}
2661
2662	/*
2663	-------------------------------------------------------------------------------
2664	Rounds the extended double-precision floating-point value `a' to an integer,
2665	and returns the result as an extended quadruple-precision floating-point
2666	value. The operation is performed according to the IEC/IEEE Standard for
2667	Binary Floating-point Arithmetic.
2668	-------------------------------------------------------------------------------
2669	*/
2670	floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a )
2671	{
2672	flag aSign;
2673	int32 aExp;
2674	bits64 lastBitMask, roundBitsMask;
2675	int8 roundingMode;
2676	floatx80 z;
2677
2678	aExp = extractFloatx80Exp( a );
2679	if ( `0x403E` <= aExp ) {
2680	if ( ( aExp == `0x7FFF` ) && (bits64) ( extractFloatx80Frac( a )<<`1` ) ) {
2681	return propagateFloatx80NaN( a, a );
2682	}
2683	return a;
2684	}
2685	if ( aExp <= `0x3FFE` ) {
2686	if ( ( aExp == `0` )
2687	&& ( (bits64) ( extractFloatx80Frac( a )<<`1` ) == `0` ) ) {
2688	return a;
2689	}
2690	roundData->exception \|= float_flag_inexact;
2691	aSign = extractFloatx80Sign( a );
2692	switch ( roundData->mode ) {
2693	case float_round_nearest_even:
2694	if ( ( aExp == `0x3FFE` ) && (bits64) ( extractFloatx80Frac( a )<<`1` )
2695	) {
2696	return
2697	packFloatx80( aSign, `0x3FFF`, LIT64( `0x8000000000000000` ) );
2698	}
2699	break;
2700	case float_round_down:
2701	return
2702	aSign ?
2703	packFloatx80( `1`, `0x3FFF`, LIT64( `0x8000000000000000` ) )
2704	: packFloatx80( `0`, `0`, `0` );
2705	case float_round_up:
2706	return
2707	aSign ? packFloatx80( `1`, `0`, `0` )
2708	: packFloatx80( `0`, `0x3FFF`, LIT64( `0x8000000000000000` ) );
2709	}
2710	return packFloatx80( aSign, `0`, `0` );
2711	}
2712	lastBitMask = `1`;
2713	lastBitMask <<= `0x403E` - aExp;
2714	roundBitsMask = lastBitMask - `1`;
2715	z = a;
2716	roundingMode = roundData->mode;
2717	if ( roundingMode == float_round_nearest_even ) {
2718	z.low += lastBitMask>>`1`;
2719	if ( ( z.low & roundBitsMask ) == `0` ) z.low &= ~ lastBitMask;
2720	}
2721	else if ( roundingMode != float_round_to_zero ) {
2722	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2723	z.low += roundBitsMask;
2724	}
2725	}
2726	z.low &= ~ roundBitsMask;
2727	if ( z.low == `0` ) {
2728	++z.high;
2729	z.low = LIT64( `0x8000000000000000` );
2730	}
2731	if ( z.low != a.low ) roundData->exception \|= float_flag_inexact;
2732	return z;
2733
2734	}
2735
2736	/*
2737	-------------------------------------------------------------------------------
2738	Returns the result of adding the absolute values of the extended double-
2739	precision floating-point values `a' and `b'. If `zSign' is true, the sum is
2740	negated before being returned. `zSign' is ignored if the result is a NaN.
2741	The addition is performed according to the IEC/IEEE Standard for Binary
2742	Floating-point Arithmetic.
2743	-------------------------------------------------------------------------------
2744	*/
2745	static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
2746	{
2747	int32 aExp, bExp, zExp;
2748	bits64 aSig, bSig, zSig0, zSig1;
2749	int32 expDiff;
2750
2751	aSig = extractFloatx80Frac( a );
2752	aExp = extractFloatx80Exp( a );
2753	bSig = extractFloatx80Frac( b );
2754	bExp = extractFloatx80Exp( b );
2755	expDiff = aExp - bExp;
2756	if ( `0` < expDiff ) {
2757	if ( aExp == `0x7FFF` ) {
2758	if ( (bits64) ( aSig<<`1` ) ) return propagateFloatx80NaN( a, b );
2759	return a;
2760	}
2761	if ( bExp == `0` ) --expDiff;
2762	shift64ExtraRightJamming( bSig, `0`, expDiff, &bSig, &zSig1 );
2763	zExp = aExp;
2764	}
2765	else if ( expDiff < `0` ) {
2766	if ( bExp == `0x7FFF` ) {
2767	if ( (bits64) ( bSig<<`1` ) ) return propagateFloatx80NaN( a, b );
2768	return packFloatx80( zSign, `0x7FFF`, LIT64( `0x8000000000000000` ) );
2769	}
2770	if ( aExp == `0` ) ++expDiff;
2771	shift64ExtraRightJamming( aSig, `0`, - expDiff, &aSig, &zSig1 );
2772	zExp = bExp;
2773	}
2774	else {
2775	if ( aExp == `0x7FFF` ) {
2776	if ( (bits64) ( ( aSig \| bSig )<<`1` ) ) {
2777	return propagateFloatx80NaN( a, b );
2778	}
2779	return a;
2780	}
2781	zSig1 = `0`;
2782	zSig0 = aSig + bSig;
2783	if ( aExp == `0` ) {
2784	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2785	goto roundAndPack;
2786	}
2787	zExp = aExp;
2788	goto shiftRight1;
2789	}
2790
2791	zSig0 = aSig + bSig;
2792
2793	if ( (sbits64) zSig0 < `0` ) goto roundAndPack;
2794	shiftRight1:
2795	shift64ExtraRightJamming( zSig0, zSig1, `1`, &zSig0, &zSig1 );
2796	zSig0 \|= LIT64( `0x8000000000000000` );
2797	++zExp;
2798	roundAndPack:
2799	return
2800	roundAndPackFloatx80(
2801	roundData, zSign, zExp, zSig0, zSig1 );
2802
2803	}
2804
2805	/*
2806	-------------------------------------------------------------------------------
2807	Returns the result of subtracting the absolute values of the extended
2808	double-precision floating-point values `a' and `b'. If `zSign' is true,
2809	the difference is negated before being returned. `zSign' is ignored if the
2810	result is a NaN. The subtraction is performed according to the IEC/IEEE
2811	Standard for Binary Floating-point Arithmetic.
2812	-------------------------------------------------------------------------------
2813	*/
2814	static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
2815	{
2816	int32 aExp, bExp, zExp;
2817	bits64 aSig, bSig, zSig0, zSig1;
2818	int32 expDiff;
2819	floatx80 z;
2820
2821	aSig = extractFloatx80Frac( a );
2822	aExp = extractFloatx80Exp( a );
2823	bSig = extractFloatx80Frac( b );
2824	bExp = extractFloatx80Exp( b );
2825	expDiff = aExp - bExp;
2826	if ( `0` < expDiff ) goto aExpBigger;
2827	if ( expDiff < `0` ) goto bExpBigger;
2828	if ( aExp == `0x7FFF` ) {
2829	if ( (bits64) ( ( aSig \| bSig )<<`1` ) ) {
2830	return propagateFloatx80NaN( a, b );
2831	}
2832	roundData->exception \|= float_flag_invalid;
2833	z.low = floatx80_default_nan_low;
2834	z.high = floatx80_default_nan_high;
2835	z.__padding = `0`;
2836	return z;
2837	}
2838	if ( aExp == `0` ) {
2839	aExp = `1`;
2840	bExp = `1`;
2841	}
2842	zSig1 = `0`;
2843	if ( bSig < aSig ) goto aBigger;
2844	if ( aSig < bSig ) goto bBigger;
2845	return packFloatx80( roundData->mode == float_round_down, `0`, `0` );
2846	bExpBigger:
2847	if ( bExp == `0x7FFF` ) {
2848	if ( (bits64) ( bSig<<`1` ) ) return propagateFloatx80NaN( a, b );
2849	return packFloatx80( zSign ^ `1`, `0x7FFF`, LIT64( `0x8000000000000000` ) );
2850	}
2851	if ( aExp == `0` ) ++expDiff;
2852	shift128RightJamming( aSig, `0`, - expDiff, &aSig, &zSig1 );
2853	bBigger:
2854	sub128( bSig, `0`, aSig, zSig1, &zSig0, &zSig1 );
2855	zExp = bExp;
2856	zSign ^= `1`;
2857	goto normalizeRoundAndPack;
2858	aExpBigger:
2859	if ( aExp == `0x7FFF` ) {
2860	if ( (bits64) ( aSig<<`1` ) ) return propagateFloatx80NaN( a, b );
2861	return a;
2862	}
2863	if ( bExp == `0` ) --expDiff;
2864	shift128RightJamming( bSig, `0`, expDiff, &bSig, &zSig1 );
2865	aBigger:
2866	sub128( aSig, `0`, bSig, zSig1, &zSig0, &zSig1 );
2867	zExp = aExp;
2868	normalizeRoundAndPack:
2869	return
2870	normalizeRoundAndPackFloatx80(
2871	roundData, zSign, zExp, zSig0, zSig1 );
2872
2873	}
2874
2875	/*
2876	-------------------------------------------------------------------------------
2877	Returns the result of adding the extended double-precision floating-point
2878	values `a' and `b'. The operation is performed according to the IEC/IEEE
2879	Standard for Binary Floating-point Arithmetic.
2880	-------------------------------------------------------------------------------
2881	*/
2882	floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b )
2883	{
2884	flag aSign, bSign;
2885
2886	aSign = extractFloatx80Sign( a );
2887	bSign = extractFloatx80Sign( b );
2888	if ( aSign == bSign ) {
2889	return addFloatx80Sigs( roundData, a, b, aSign );
2890	}
2891	else {
2892	return subFloatx80Sigs( roundData, a, b, aSign );
2893	}
2894
2895	}
2896
2897	/*
2898	-------------------------------------------------------------------------------
2899	Returns the result of subtracting the extended double-precision floating-
2900	point values `a' and `b'. The operation is performed according to the
2901	IEC/IEEE Standard for Binary Floating-point Arithmetic.
2902	-------------------------------------------------------------------------------
2903	*/
2904	floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b )
2905	{
2906	flag aSign, bSign;
2907
2908	aSign = extractFloatx80Sign( a );
2909	bSign = extractFloatx80Sign( b );
2910	if ( aSign == bSign ) {
2911	return subFloatx80Sigs( roundData, a, b, aSign );
2912	}
2913	else {
2914	return addFloatx80Sigs( roundData, a, b, aSign );
2915	}
2916
2917	}
2918
2919	/*
2920	-------------------------------------------------------------------------------
2921	Returns the result of multiplying the extended double-precision floating-
2922	point values `a' and `b'. The operation is performed according to the
2923	IEC/IEEE Standard for Binary Floating-point Arithmetic.
2924	-------------------------------------------------------------------------------
2925	*/
2926	floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
2927	{
2928	flag aSign, bSign, zSign;
2929	int32 aExp, bExp, zExp;
2930	bits64 aSig, bSig, zSig0, zSig1;
2931	floatx80 z;
2932
2933	aSig = extractFloatx80Frac( a );
2934	aExp = extractFloatx80Exp( a );
2935	aSign = extractFloatx80Sign( a );
2936	bSig = extractFloatx80Frac( b );
2937	bExp = extractFloatx80Exp( b );
2938	bSign = extractFloatx80Sign( b );
2939	zSign = aSign ^ bSign;
2940	if ( aExp == `0x7FFF` ) {
2941	if ( (bits64) ( aSig<<`1` )
2942	\|\| ( ( bExp == `0x7FFF` ) && (bits64) ( bSig<<`1` ) ) ) {
2943	return propagateFloatx80NaN( a, b );
2944	}
2945	if ( ( bExp \| bSig ) == `0` ) goto invalid;
2946	return packFloatx80( zSign, `0x7FFF`, LIT64( `0x8000000000000000` ) );
2947	}
2948	if ( bExp == `0x7FFF` ) {
2949	if ( (bits64) ( bSig<<`1` ) ) return propagateFloatx80NaN( a, b );
2950	if ( ( aExp \| aSig ) == `0` ) {
2951	invalid:
2952	roundData->exception \|= float_flag_invalid;
2953	z.low = floatx80_default_nan_low;
2954	z.high = floatx80_default_nan_high;
2955	z.__padding = `0`;
2956	return z;
2957	}
2958	return packFloatx80( zSign, `0x7FFF`, LIT64( `0x8000000000000000` ) );
2959	}
2960	if ( aExp == `0` ) {
2961	if ( aSig == `0` ) return packFloatx80( zSign, `0`, `0` );
2962	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2963	}
2964	if ( bExp == `0` ) {
2965	if ( bSig == `0` ) return packFloatx80( zSign, `0`, `0` );
2966	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2967	}
2968	zExp = aExp + bExp - `0x3FFE`;
2969	mul64To128( aSig, bSig, &zSig0, &zSig1 );
2970	if ( `0` < (sbits64) zSig0 ) {
2971	shortShift128Left( zSig0, zSig1, `1`, &zSig0, &zSig1 );
2972	--zExp;
2973	}
2974	return
2975	roundAndPackFloatx80(
2976	roundData, zSign, zExp, zSig0, zSig1 );
2977
2978	}
2979
2980	/*
2981	-------------------------------------------------------------------------------
2982	Returns the result of dividing the extended double-precision floating-point
2983	value `a' by the corresponding value `b'. The operation is performed
2984	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2985	-------------------------------------------------------------------------------
2986	*/
2987	floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
2988	{
2989	flag aSign, bSign, zSign;
2990	int32 aExp, bExp, zExp;
2991	bits64 aSig, bSig, zSig0, zSig1;
2992	bits64 rem0, rem1, rem2, term0, term1, term2;
2993	floatx80 z;
2994
2995	aSig = extractFloatx80Frac( a );
2996	aExp = extractFloatx80Exp( a );
2997	aSign = extractFloatx80Sign( a );
2998	bSig = extractFloatx80Frac( b );
2999	bExp = extractFloatx80Exp( b );
3000	bSign = extractFloatx80Sign( b );
3001	zSign = aSign ^ bSign;
3002	if ( aExp == `0x7FFF` ) {
3003	if ( (bits64) ( aSig<<`1` ) ) return propagateFloatx80NaN( a, b );
3004	if ( bExp == `0x7FFF` ) {
3005	if ( (bits64) ( bSig<<`1` ) ) return propagateFloatx80NaN( a, b );
3006	goto invalid;
3007	}
3008	return packFloatx80( zSign, `0x7FFF`, LIT64( `0x8000000000000000` ) );
3009	}
3010	if ( bExp == `0x7FFF` ) {
3011	if ( (bits64) ( bSig<<`1` ) ) return propagateFloatx80NaN( a, b );
3012	return packFloatx80( zSign, `0`, `0` );
3013	}
3014	if ( bExp == `0` ) {
3015	if ( bSig == `0` ) {
3016	if ( ( aExp \| aSig ) == `0` ) {
3017	invalid:
3018	roundData->exception \|= float_flag_invalid;
3019	z.low = floatx80_default_nan_low;
3020	z.high = floatx80_default_nan_high;
3021	z.__padding = `0`;
3022	return z;
3023	}
3024	roundData->exception \|= float_flag_divbyzero;
3025	return packFloatx80( zSign, `0x7FFF`, LIT64( `0x8000000000000000` ) );
3026	}
3027	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3028	}
3029	if ( aExp == `0` ) {
3030	if ( aSig == `0` ) return packFloatx80( zSign, `0`, `0` );
3031	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3032	}
3033	zExp = aExp - bExp + `0x3FFE`;
3034	rem1 = `0`;
3035	if ( bSig <= aSig ) {
3036	shift128Right( aSig, `0`, `1`, &aSig, &rem1 );
3037	++zExp;
3038	}
3039	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3040	mul64To128( bSig, zSig0, &term0, &term1 );
3041	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3042	while ( (sbits64) rem0 < `0` ) {
3043	--zSig0;
3044	add128( rem0, rem1, `0`, bSig, &rem0, &rem1 );
3045	}
3046	zSig1 = estimateDiv128To64( rem1, `0`, bSig );
3047	if ( (bits64) ( zSig1<<`1` ) <= `8` ) {
3048	mul64To128( bSig, zSig1, &term1, &term2 );
3049	sub128( rem1, `0`, term1, term2, &rem1, &rem2 );
3050	while ( (sbits64) rem1 < `0` ) {
3051	--zSig1;
3052	add128( rem1, rem2, `0`, bSig, &rem1, &rem2 );
3053	}
3054	zSig1 \|= ( ( rem1 \| rem2 ) != `0` );
3055	}
3056	return
3057	roundAndPackFloatx80(
3058	roundData, zSign, zExp, zSig0, zSig1 );
3059
3060	}
3061
3062	/*
3063	-------------------------------------------------------------------------------
3064	Returns the remainder of the extended double-precision floating-point value
3065	`a' with respect to the corresponding value `b'. The operation is performed
3066	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3067	-------------------------------------------------------------------------------
3068	*/
3069	floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
3070	{
3071	flag aSign, bSign, zSign;
3072	int32 aExp, bExp, expDiff;
3073	bits64 aSig0, aSig1, bSig;
3074	bits64 q, term0, term1, alternateASig0, alternateASig1;
3075	floatx80 z;
3076
3077	aSig0 = extractFloatx80Frac( a );
3078	aExp = extractFloatx80Exp( a );
3079	aSign = extractFloatx80Sign( a );
3080	bSig = extractFloatx80Frac( b );
3081	bExp = extractFloatx80Exp( b );
3082	bSign = extractFloatx80Sign( b );
3083	if ( aExp == `0x7FFF` ) {
3084	if ( (bits64) ( aSig0<<`1` )
3085	\|\| ( ( bExp == `0x7FFF` ) && (bits64) ( bSig<<`1` ) ) ) {
3086	return propagateFloatx80NaN( a, b );
3087	}
3088	goto invalid;
3089	}
3090	if ( bExp == `0x7FFF` ) {
3091	if ( (bits64) ( bSig<<`1` ) ) return propagateFloatx80NaN( a, b );
3092	return a;
3093	}
3094	if ( bExp == `0` ) {
3095	if ( bSig == `0` ) {
3096	invalid:
3097	roundData->exception \|= float_flag_invalid;
3098	z.low = floatx80_default_nan_low;
3099	z.high = floatx80_default_nan_high;
3100	z.__padding = `0`;
3101	return z;
3102	}
3103	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3104	}
3105	if ( aExp == `0` ) {
3106	if ( (bits64) ( aSig0<<`1` ) == `0` ) return a;
3107	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3108	}
3109	bSig \|= LIT64( `0x8000000000000000` );
3110	zSign = aSign;
3111	expDiff = aExp - bExp;
3112	aSig1 = `0`;
3113	if ( expDiff < `0` ) {
3114	if ( expDiff < -`1` ) return a;
3115	shift128Right( aSig0, `0`, `1`, &aSig0, &aSig1 );
3116	expDiff = `0`;
3117	}
3118	q = ( bSig <= aSig0 );
3119	if ( q ) aSig0 -= bSig;
3120	expDiff -= `64`;
3121	while ( `0` < expDiff ) {
3122	q = estimateDiv128To64( aSig0, aSig1, bSig );
3123	q = ( `2` < q ) ? q - `2` : `0`;
3124	mul64To128( bSig, q, &term0, &term1 );
3125	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3126	shortShift128Left( aSig0, aSig1, `62`, &aSig0, &aSig1 );
3127	expDiff -= `62`;
3128	}
3129	expDiff += `64`;
3130	if ( `0` < expDiff ) {
3131	q = estimateDiv128To64( aSig0, aSig1, bSig );
3132	q = ( `2` < q ) ? q - `2` : `0`;
3133	q >>= `64` - expDiff;
3134	mul64To128( bSig, q<<( `64` - expDiff ), &term0, &term1 );
3135	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3136	shortShift128Left( `0`, bSig, `64` - expDiff, &term0, &term1 );
3137	while ( le128( term0, term1, aSig0, aSig1 ) ) {
3138	++q;
3139	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3140	}
3141	}
3142	else {
3143	term1 = `0`;
3144	term0 = bSig;
3145	}
3146	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3147	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3148	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3149	&& ( q & `1` ) )
3150	) {
3151	aSig0 = alternateASig0;
3152	aSig1 = alternateASig1;
3153	zSign = ! zSign;
3154	}
3155
3156	return
3157	normalizeRoundAndPackFloatx80(
3158	roundData, zSign, bExp + expDiff, aSig0, aSig1 );
3159
3160	}
3161
3162	/*
3163	-------------------------------------------------------------------------------
3164	Returns the square root of the extended double-precision floating-point
3165	value `a'. The operation is performed according to the IEC/IEEE Standard
3166	for Binary Floating-point Arithmetic.
3167	-------------------------------------------------------------------------------
3168	*/
3169	floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
3170	{
3171	flag aSign;
3172	int32 aExp, zExp;
3173	bits64 aSig0, aSig1, zSig0, zSig1;
3174	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3175	bits64 shiftedRem0, shiftedRem1;
3176	floatx80 z;
3177
3178	aSig0 = extractFloatx80Frac( a );
3179	aExp = extractFloatx80Exp( a );
3180	aSign = extractFloatx80Sign( a );
3181	if ( aExp == `0x7FFF` ) {
3182	if ( (bits64) ( aSig0<<`1` ) ) return propagateFloatx80NaN( a, a );
3183	if ( ! aSign ) return a;
3184	goto invalid;
3185	}
3186	if ( aSign ) {
3187	if ( ( aExp \| aSig0 ) == `0` ) return a;
3188	invalid:
3189	roundData->exception \|= float_flag_invalid;
3190	z.low = floatx80_default_nan_low;
3191	z.high = floatx80_default_nan_high;
3192	z.__padding = `0`;
3193	return z;
3194	}
3195	if ( aExp == `0` ) {
3196	if ( aSig0 == `0` ) return packFloatx80( `0`, `0`, `0` );
3197	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3198	}
3199	zExp = ( ( aExp - `0x3FFF` )>>`1` ) + `0x3FFF`;
3200	zSig0 = estimateSqrt32( aExp, aSig0>>`32` );
3201	zSig0 <<= `31`;
3202	aSig1 = `0`;
3203	shift128Right( aSig0, `0`, ( aExp & `1` ) + `2`, &aSig0, &aSig1 );
3204	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + `4`;
3205	if ( `0` <= (sbits64) zSig0 ) zSig0 = LIT64( `0xFFFFFFFFFFFFFFFF` );
3206	shortShift128Left( aSig0, aSig1, `2`, &aSig0, &aSig1 );
3207	mul64To128( zSig0, zSig0, &term0, &term1 );
3208	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3209	while ( (sbits64) rem0 < `0` ) {
3210	--zSig0;
3211	shortShift128Left( `0`, zSig0, `1`, &term0, &term1 );
3212	term1 \|= `1`;
3213	add128( rem0, rem1, term0, term1, &rem0, &rem1 );
3214	}
3215	shortShift128Left( rem0, rem1, `63`, &shiftedRem0, &shiftedRem1 );
3216	zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
3217	if ( (bits64) ( zSig1<<`1` ) <= `10` ) {
3218	if ( zSig1 == `0` ) zSig1 = `1`;
3219	mul64To128( zSig0, zSig1, &term1, &term2 );
3220	shortShift128Left( term1, term2, `1`, &term1, &term2 );
3221	sub128( rem1, `0`, term1, term2, &rem1, &rem2 );
3222	mul64To128( zSig1, zSig1, &term2, &term3 );
3223	sub192( rem1, rem2, `0`, `0`, term2, term3, &rem1, &rem2, &rem3 );
3224	while ( (sbits64) rem1 < `0` ) {
3225	--zSig1;
3226	shortShift192Left( `0`, zSig0, zSig1, `1`, &term1, &term2, &term3 );
3227	term3 \|= `1`;
3228	add192(
3229	rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
3230	}
3231	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != `0` );
3232	}
3233	return
3234	roundAndPackFloatx80(
3235	roundData, `0`, zExp, zSig0, zSig1 );
3236
3237	}
3238
3239	/*
3240	-------------------------------------------------------------------------------
3241	Returns 1 if the extended double-precision floating-point value `a' is
3242	equal to the corresponding value `b', and 0 otherwise. The comparison is
3243	performed according to the IEC/IEEE Standard for Binary Floating-point
3244	Arithmetic.
3245	-------------------------------------------------------------------------------
3246	*/
3247	flag floatx80_eq( floatx80 a, floatx80 b )
3248	{
3249
3250	if ( ( ( extractFloatx80Exp( a ) == `0x7FFF` )
3251	&& (bits64) ( extractFloatx80Frac( a )<<`1` ) )
3252	\|\| ( ( extractFloatx80Exp( b ) == `0x7FFF` )
3253	&& (bits64) ( extractFloatx80Frac( b )<<`1` ) )
3254	) {
3255	if ( floatx80_is_signaling_nan( a )
3256	\|\| floatx80_is_signaling_nan( b ) ) {
3257	float_raise( float_flag_invalid );
3258	}
3259	return `0`;
3260	}
3261	return
3262	( a.low == b.low )
3263	&& ( ( a.high == b.high )
3264	\|\| ( ( a.low == `0` )
3265	&& ( (bits16) ( ( a.high \| b.high )<<`1` ) == `0` ) )
3266	);
3267
3268	}
3269
3270	/*
3271	-------------------------------------------------------------------------------
3272	Returns 1 if the extended double-precision floating-point value `a' is
3273	less than or equal to the corresponding value `b', and 0 otherwise. The
3274	comparison is performed according to the IEC/IEEE Standard for Binary
3275	Floating-point Arithmetic.
3276	-------------------------------------------------------------------------------
3277	*/
3278	flag floatx80_le( floatx80 a, floatx80 b )
3279	{
3280	flag aSign, bSign;
3281
3282	if ( ( ( extractFloatx80Exp( a ) == `0x7FFF` )
3283	&& (bits64) ( extractFloatx80Frac( a )<<`1` ) )
3284	\|\| ( ( extractFloatx80Exp( b ) == `0x7FFF` )
3285	&& (bits64) ( extractFloatx80Frac( b )<<`1` ) )
3286	) {
3287	float_raise( float_flag_invalid );
3288	return `0`;
3289	}
3290	aSign = extractFloatx80Sign( a );
3291	bSign = extractFloatx80Sign( b );
3292	if ( aSign != bSign ) {
3293	return
3294	aSign
3295	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
3296	== `0` );
3297	}
3298	return
3299	aSign ? le128( b.high, b.low, a.high, a.low )
3300	: le128( a.high, a.low, b.high, b.low );
3301
3302	}
3303
3304	/*
3305	-------------------------------------------------------------------------------
3306	Returns 1 if the extended double-precision floating-point value `a' is
3307	less than the corresponding value `b', and 0 otherwise. The comparison
3308	is performed according to the IEC/IEEE Standard for Binary Floating-point
3309	Arithmetic.
3310	-------------------------------------------------------------------------------
3311	*/
3312	flag floatx80_lt( floatx80 a, floatx80 b )
3313	{
3314	flag aSign, bSign;
3315
3316	if ( ( ( extractFloatx80Exp( a ) == `0x7FFF` )
3317	&& (bits64) ( extractFloatx80Frac( a )<<`1` ) )
3318	\|\| ( ( extractFloatx80Exp( b ) == `0x7FFF` )
3319	&& (bits64) ( extractFloatx80Frac( b )<<`1` ) )
3320	) {
3321	float_raise( float_flag_invalid );
3322	return `0`;
3323	}
3324	aSign = extractFloatx80Sign( a );
3325	bSign = extractFloatx80Sign( b );
3326	if ( aSign != bSign ) {
3327	return
3328	aSign
3329	&& ( ( ( (bits16) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
3330	!= `0` );
3331	}
3332	return
3333	aSign ? lt128( b.high, b.low, a.high, a.low )
3334	: lt128( a.high, a.low, b.high, b.low );
3335
3336	}
3337
3338	/*
3339	-------------------------------------------------------------------------------
3340	Returns 1 if the extended double-precision floating-point value `a' is equal
3341	to the corresponding value `b', and 0 otherwise. The invalid exception is
3342	raised if either operand is a NaN. Otherwise, the comparison is performed
3343	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3344	-------------------------------------------------------------------------------
3345	*/
3346	flag floatx80_eq_signaling( floatx80 a, floatx80 b )
3347	{
3348
3349	if ( ( ( extractFloatx80Exp( a ) == `0x7FFF` )
3350	&& (bits64) ( extractFloatx80Frac( a )<<`1` ) )
3351	\|\| ( ( extractFloatx80Exp( b ) == `0x7FFF` )
3352	&& (bits64) ( extractFloatx80Frac( b )<<`1` ) )
3353	) {
3354	float_raise( float_flag_invalid );
3355	return `0`;
3356	}
3357	return
3358	( a.low == b.low )
3359	&& ( ( a.high == b.high )
3360	\|\| ( ( a.low == `0` )
3361	&& ( (bits16) ( ( a.high \| b.high )<<`1` ) == `0` ) )
3362	);
3363
3364	}
3365
3366	/*
3367	-------------------------------------------------------------------------------
3368	Returns 1 if the extended double-precision floating-point value `a' is less
3369	than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
3370	do not cause an exception. Otherwise, the comparison is performed according
3371	to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3372	-------------------------------------------------------------------------------
3373	*/
3374	flag floatx80_le_quiet( floatx80 a, floatx80 b )
3375	{
3376	flag aSign, bSign;
3377
3378	if ( ( ( extractFloatx80Exp( a ) == `0x7FFF` )
3379	&& (bits64) ( extractFloatx80Frac( a )<<`1` ) )
3380	\|\| ( ( extractFloatx80Exp( b ) == `0x7FFF` )
3381	&& (bits64) ( extractFloatx80Frac( b )<<`1` ) )
3382	) {
3383	/ Do nothing, even if NaN as we're quiet /
3384	return `0`;
3385	}
3386	aSign = extractFloatx80Sign( a );
3387	bSign = extractFloatx80Sign( b );
3388	if ( aSign != bSign ) {
3389	return
3390	aSign
3391	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
3392	== `0` );
3393	}
3394	return
3395	aSign ? le128( b.high, b.low, a.high, a.low )
3396	: le128( a.high, a.low, b.high, b.low );
3397
3398	}
3399
3400	/*
3401	-------------------------------------------------------------------------------
3402	Returns 1 if the extended double-precision floating-point value `a' is less
3403	than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
3404	an exception. Otherwise, the comparison is performed according to the
3405	IEC/IEEE Standard for Binary Floating-point Arithmetic.
3406	-------------------------------------------------------------------------------
3407	*/
3408	flag floatx80_lt_quiet( floatx80 a, floatx80 b )
3409	{
3410	flag aSign, bSign;
3411
3412	if ( ( ( extractFloatx80Exp( a ) == `0x7FFF` )
3413	&& (bits64) ( extractFloatx80Frac( a )<<`1` ) )
3414	\|\| ( ( extractFloatx80Exp( b ) == `0x7FFF` )
3415	&& (bits64) ( extractFloatx80Frac( b )<<`1` ) )
3416	) {
3417	/ Do nothing, even if NaN as we're quiet /
3418	return `0`;
3419	}
3420	aSign = extractFloatx80Sign( a );
3421	bSign = extractFloatx80Sign( b );
3422	if ( aSign != bSign ) {
3423	return
3424	aSign
3425	&& ( ( ( (bits16) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
3426	!= `0` );
3427	}
3428	return
3429	aSign ? lt128( b.high, b.low, a.high, a.low )
3430	: lt128( a.high, a.low, b.high, b.low );
3431
3432	}
3433
3434	#endif
3435
3436

source code of linux/arch/arm/nwfpe/softfloat.c