softfloat-macros source code [linux/arch/arm/nwfpe/softfloat-macros]

1
2	/*
3	===============================================================================
4
5	This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6	Arithmetic Package, Release 2.
7
8	Written by John R. Hauser. This work was made possible in part by the
9	International Computer Science Institute, located at Suite 600, 1947 Center
10	Street, Berkeley, California 94704. Funding was partially provided by the
11	National Science Foundation under grant MIP-9311980. The original version
12	of this code was written as part of a project to build a fixed-point vector
13	processor in collaboration with the University of California at Berkeley,
14	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
15	is available through the web page
16	http://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
17
18	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
19	has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20	TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
21	PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22	AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23
24	Derivative works are acceptable, even for commercial purposes, so long as
25	(1) they include prominent notice that the work is derivative, and (2) they
26	include prominent notice akin to these three paragraphs for those parts of
27	this code that are retained.
28
29	===============================================================================
30	*/
31
32	/*
33	-------------------------------------------------------------------------------
34	Shifts `a' right by the number of bits given in `count'. If any nonzero
35	bits are shifted off, they are ``jammed'' into the least significant bit of
36	the result by setting the least significant bit to 1. The value of `count'
37	can be arbitrarily large; in particular, if `count' is greater than 32, the
38	result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39	The result is stored in the location pointed to by `zPtr'.
40	-------------------------------------------------------------------------------
41	*/
42	INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43	{
44	bits32 z;
45	if ( count == `0` ) {
46	z = a;
47	}
48	else if ( count < `32` ) {
49	z = ( a>>count ) \| ( ( a<<( ( - count ) & `31` ) ) != `0` );
50	}
51	else {
52	z = ( a != `0` );
53	}
54	*zPtr = z;
55	}
56
57	/*
58	-------------------------------------------------------------------------------
59	Shifts `a' right by the number of bits given in `count'. If any nonzero
60	bits are shifted off, they are ``jammed'' into the least significant bit of
61	the result by setting the least significant bit to 1. The value of `count'
62	can be arbitrarily large; in particular, if `count' is greater than 64, the
63	result will be either 0 or 1, depending on whether `a' is zero or nonzero.
64	The result is stored in the location pointed to by `zPtr'.
65	-------------------------------------------------------------------------------
66	*/
67	INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
68	{
69	bits64 z;
70
71	__asm__("@shift64RightJamming -- start");
72	if ( count == `0` ) {
73	z = a;
74	}
75	else if ( count < `64` ) {
76	z = ( a>>count ) \| ( ( a<<( ( - count ) & `63` ) ) != `0` );
77	}
78	else {
79	z = ( a != `0` );
80	}
81	__asm__("@shift64RightJamming -- end");
82	*zPtr = z;
83	}
84
85	/*
86	-------------------------------------------------------------------------------
87	Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
88	_plus_ the number of bits given in `count'. The shifted result is at most
89	64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
90	bits shifted off form a second 64-bit result as follows: The _last_ bit
91	shifted off is the most-significant bit of the extra result, and the other
92	63 bits of the extra result are all zero if and only if _all_but_the_last_
93	bits shifted off were all zero. This extra result is stored in the location
94	pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
95	(This routine makes more sense if `a0' and `a1' are considered to form a
96	fixed-point value with binary point between `a0' and `a1'. This fixed-point
97	value is shifted right by the number of bits given in `count', and the
98	integer part of the result is returned at the location pointed to by
99	`z0Ptr'. The fractional part of the result may be slightly corrupted as
100	described above, and is returned at the location pointed to by `z1Ptr'.)
101	-------------------------------------------------------------------------------
102	*/
103	INLINE void
104	shift64ExtraRightJamming(
105	bits64 a0, bits64 a1, int16 count, bits64 z0Ptr, bits64 z1Ptr )
106	{
107	bits64 z0, z1;
108	int8 negCount = ( - count ) & `63`;
109
110	if ( count == `0` ) {
111	z1 = a1;
112	z0 = a0;
113	}
114	else if ( count < `64` ) {
115	z1 = ( a0<<negCount ) \| ( a1 != `0` );
116	z0 = a0>>count;
117	}
118	else {
119	if ( count == `64` ) {
120	z1 = a0 \| ( a1 != `0` );
121	}
122	else {
123	z1 = ( ( a0 \| a1 ) != `0` );
124	}
125	z0 = `0`;
126	}
127	*z1Ptr = z1;
128	*z0Ptr = z0;
129
130	}
131
132	/*
133	-------------------------------------------------------------------------------
134	Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
135	number of bits given in `count'. Any bits shifted off are lost. The value
136	of `count' can be arbitrarily large; in particular, if `count' is greater
137	than 128, the result will be 0. The result is broken into two 64-bit pieces
138	which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
139	-------------------------------------------------------------------------------
140	*/
141	INLINE void
142	shift128Right(
143	bits64 a0, bits64 a1, int16 count, bits64 z0Ptr, bits64 z1Ptr )
144	{
145	bits64 z0, z1;
146	int8 negCount = ( - count ) & `63`;
147
148	if ( count == `0` ) {
149	z1 = a1;
150	z0 = a0;
151	}
152	else if ( count < `64` ) {
153	z1 = ( a0<<negCount ) \| ( a1>>count );
154	z0 = a0>>count;
155	}
156	else {
157	z1 = ( count < `64` ) ? ( a0>>( count & `63` ) ) : `0`;
158	z0 = `0`;
159	}
160	*z1Ptr = z1;
161	*z0Ptr = z0;
162
163	}
164
165	/*
166	-------------------------------------------------------------------------------
167	Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
168	number of bits given in `count'. If any nonzero bits are shifted off, they
169	are ``jammed'' into the least significant bit of the result by setting the
170	least significant bit to 1. The value of `count' can be arbitrarily large;
171	in particular, if `count' is greater than 128, the result will be either 0
172	or 1, depending on whether the concatenation of `a0' and `a1' is zero or
173	nonzero. The result is broken into two 64-bit pieces which are stored at
174	the locations pointed to by `z0Ptr' and `z1Ptr'.
175	-------------------------------------------------------------------------------
176	*/
177	INLINE void
178	shift128RightJamming(
179	bits64 a0, bits64 a1, int16 count, bits64 z0Ptr, bits64 z1Ptr )
180	{
181	bits64 z0, z1;
182	int8 negCount = ( - count ) & `63`;
183
184	if ( count == `0` ) {
185	z1 = a1;
186	z0 = a0;
187	}
188	else if ( count < `64` ) {
189	z1 = ( a0<<negCount ) \| ( a1>>count ) \| ( ( a1<<negCount ) != `0` );
190	z0 = a0>>count;
191	}
192	else {
193	if ( count == `64` ) {
194	z1 = a0 \| ( a1 != `0` );
195	}
196	else if ( count < `128` ) {
197	z1 = ( a0>>( count & `63` ) ) \| ( ( ( a0<<negCount ) \| a1 ) != `0` );
198	}
199	else {
200	z1 = ( ( a0 \| a1 ) != `0` );
201	}
202	z0 = `0`;
203	}
204	*z1Ptr = z1;
205	*z0Ptr = z0;
206
207	}
208
209	/*
210	-------------------------------------------------------------------------------
211	Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
212	by 64 _plus_ the number of bits given in `count'. The shifted result is
213	at most 128 nonzero bits; these are broken into two 64-bit pieces which are
214	stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
215	off form a third 64-bit result as follows: The _last_ bit shifted off is
216	the most-significant bit of the extra result, and the other 63 bits of the
217	extra result are all zero if and only if _all_but_the_last_ bits shifted off
218	were all zero. This extra result is stored in the location pointed to by
219	`z2Ptr'. The value of `count' can be arbitrarily large.
220	(This routine makes more sense if `a0', `a1', and `a2' are considered
221	to form a fixed-point value with binary point between `a1' and `a2'. This
222	fixed-point value is shifted right by the number of bits given in `count',
223	and the integer part of the result is returned at the locations pointed to
224	by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
225	corrupted as described above, and is returned at the location pointed to by
226	`z2Ptr'.)
227	-------------------------------------------------------------------------------
228	*/
229	INLINE void
230	shift128ExtraRightJamming(
231	bits64 a0,
232	bits64 a1,
233	bits64 a2,
234	int16 count,
235	bits64 *z0Ptr,
236	bits64 *z1Ptr,
237	bits64 *z2Ptr
238	)
239	{
240	bits64 z0, z1, z2;
241	int8 negCount = ( - count ) & `63`;
242
243	if ( count == `0` ) {
244	z2 = a2;
245	z1 = a1;
246	z0 = a0;
247	}
248	else {
249	if ( count < `64` ) {
250	z2 = a1<<negCount;
251	z1 = ( a0<<negCount ) \| ( a1>>count );
252	z0 = a0>>count;
253	}
254	else {
255	if ( count == `64` ) {
256	z2 = a1;
257	z1 = a0;
258	}
259	else {
260	a2 \|= a1;
261	if ( count < `128` ) {
262	z2 = a0<<negCount;
263	z1 = a0>>( count & `63` );
264	}
265	else {
266	z2 = ( count == `128` ) ? a0 : ( a0 != `0` );
267	z1 = `0`;
268	}
269	}
270	z0 = `0`;
271	}
272	z2 \|= ( a2 != `0` );
273	}
274	*z2Ptr = z2;
275	*z1Ptr = z1;
276	*z0Ptr = z0;
277
278	}
279
280	/*
281	-------------------------------------------------------------------------------
282	Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283	number of bits given in `count'. Any bits shifted off are lost. The value
284	of `count' must be less than 64. The result is broken into two 64-bit
285	pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286	-------------------------------------------------------------------------------
287	*/
288	INLINE void
289	shortShift128Left(
290	bits64 a0, bits64 a1, int16 count, bits64 z0Ptr, bits64 z1Ptr )
291	{
292
293	*z1Ptr = a1<<count;
294	*z0Ptr =
295	( count == `0` ) ? a0 : ( a0<<count ) \| ( a1>>( ( - count ) & `63` ) );
296
297	}
298
299	/*
300	-------------------------------------------------------------------------------
301	Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
302	by the number of bits given in `count'. Any bits shifted off are lost.
303	The value of `count' must be less than 64. The result is broken into three
304	64-bit pieces which are stored at the locations pointed to by `z0Ptr',
305	`z1Ptr', and `z2Ptr'.
306	-------------------------------------------------------------------------------
307	*/
308	INLINE void
309	shortShift192Left(
310	bits64 a0,
311	bits64 a1,
312	bits64 a2,
313	int16 count,
314	bits64 *z0Ptr,
315	bits64 *z1Ptr,
316	bits64 *z2Ptr
317	)
318	{
319	bits64 z0, z1, z2;
320	int8 negCount;
321
322	z2 = a2<<count;
323	z1 = a1<<count;
324	z0 = a0<<count;
325	if ( `0` < count ) {
326	negCount = ( ( - count ) & `63` );
327	z1 \|= a2>>negCount;
328	z0 \|= a1>>negCount;
329	}
330	*z2Ptr = z2;
331	*z1Ptr = z1;
332	*z0Ptr = z0;
333
334	}
335
336	/*
337	-------------------------------------------------------------------------------
338	Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
339	value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
340	any carry out is lost. The result is broken into two 64-bit pieces which
341	are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
342	-------------------------------------------------------------------------------
343	*/
344	INLINE void
345	add128(
346	bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 z0Ptr, bits64 z1Ptr )
347	{
348	bits64 z1;
349
350	z1 = a1 + b1;
351	*z1Ptr = z1;
352	*z0Ptr = a0 + b0 + ( z1 < a1 );
353
354	}
355
356	/*
357	-------------------------------------------------------------------------------
358	Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
359	192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
360	modulo 2^192, so any carry out is lost. The result is broken into three
361	64-bit pieces which are stored at the locations pointed to by `z0Ptr',
362	`z1Ptr', and `z2Ptr'.
363	-------------------------------------------------------------------------------
364	*/
365	INLINE void
366	add192(
367	bits64 a0,
368	bits64 a1,
369	bits64 a2,
370	bits64 b0,
371	bits64 b1,
372	bits64 b2,
373	bits64 *z0Ptr,
374	bits64 *z1Ptr,
375	bits64 *z2Ptr
376	)
377	{
378	bits64 z0, z1, z2;
379	int8 carry0, carry1;
380
381	z2 = a2 + b2;
382	carry1 = ( z2 < a2 );
383	z1 = a1 + b1;
384	carry0 = ( z1 < a1 );
385	z0 = a0 + b0;
386	z1 += carry1;
387	z0 += ( z1 < carry1 );
388	z0 += carry0;
389	*z2Ptr = z2;
390	*z1Ptr = z1;
391	*z0Ptr = z0;
392
393	}
394
395	/*
396	-------------------------------------------------------------------------------
397	Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
398	128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
399	2^128, so any borrow out (carry out) is lost. The result is broken into two
400	64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
401	`z1Ptr'.
402	-------------------------------------------------------------------------------
403	*/
404	INLINE void
405	sub128(
406	bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 z0Ptr, bits64 z1Ptr )
407	{
408
409	*z1Ptr = a1 - b1;
410	*z0Ptr = a0 - b0 - ( a1 < b1 );
411
412	}
413
414	/*
415	-------------------------------------------------------------------------------
416	Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
417	from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
418	Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
419	result is broken into three 64-bit pieces which are stored at the locations
420	pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
421	-------------------------------------------------------------------------------
422	*/
423	INLINE void
424	sub192(
425	bits64 a0,
426	bits64 a1,
427	bits64 a2,
428	bits64 b0,
429	bits64 b1,
430	bits64 b2,
431	bits64 *z0Ptr,
432	bits64 *z1Ptr,
433	bits64 *z2Ptr
434	)
435	{
436	bits64 z0, z1, z2;
437	int8 borrow0, borrow1;
438
439	z2 = a2 - b2;
440	borrow1 = ( a2 < b2 );
441	z1 = a1 - b1;
442	borrow0 = ( a1 < b1 );
443	z0 = a0 - b0;
444	z0 -= ( z1 < borrow1 );
445	z1 -= borrow1;
446	z0 -= borrow0;
447	*z2Ptr = z2;
448	*z1Ptr = z1;
449	*z0Ptr = z0;
450
451	}
452
453	/*
454	-------------------------------------------------------------------------------
455	Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
456	into two 64-bit pieces which are stored at the locations pointed to by
457	`z0Ptr' and `z1Ptr'.
458	-------------------------------------------------------------------------------
459	*/
460	INLINE void mul64To128( bits64 a, bits64 b, bits64 z0Ptr, bits64 z1Ptr )
461	{
462	bits32 aHigh, aLow, bHigh, bLow;
463	bits64 z0, zMiddleA, zMiddleB, z1;
464
465	aLow = a;
466	aHigh = a>>`32`;
467	bLow = b;
468	bHigh = b>>`32`;
469	z1 = ( (bits64) aLow ) * bLow;
470	zMiddleA = ( (bits64) aLow ) * bHigh;
471	zMiddleB = ( (bits64) aHigh ) * bLow;
472	z0 = ( (bits64) aHigh ) * bHigh;
473	zMiddleA += zMiddleB;
474	z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<`32` ) + ( zMiddleA>>`32` );
475	zMiddleA <<= `32`;
476	z1 += zMiddleA;
477	z0 += ( z1 < zMiddleA );
478	*z1Ptr = z1;
479	*z0Ptr = z0;
480
481	}
482
483	/*
484	-------------------------------------------------------------------------------
485	Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
486	obtain a 192-bit product. The product is broken into three 64-bit pieces
487	which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
488	`z2Ptr'.
489	-------------------------------------------------------------------------------
490	*/
491	INLINE void
492	mul128By64To192(
493	bits64 a0,
494	bits64 a1,
495	bits64 b,
496	bits64 *z0Ptr,
497	bits64 *z1Ptr,
498	bits64 *z2Ptr
499	)
500	{
501	bits64 z0, z1, z2, more1;
502
503	mul64To128( a: a1, b, z0Ptr: &z1, z1Ptr: &z2 );
504	mul64To128( a: a0, b, z0Ptr: &z0, z1Ptr: &more1 );
505	add128( a0: z0, a1: more1, b0: `0`, b1: z1, z0Ptr: &z0, z1Ptr: &z1 );
506	*z2Ptr = z2;
507	*z1Ptr = z1;
508	*z0Ptr = z0;
509
510	}
511
512	/*
513	-------------------------------------------------------------------------------
514	Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
515	128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
516	product. The product is broken into four 64-bit pieces which are stored at
517	the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
518	-------------------------------------------------------------------------------
519	*/
520	INLINE void
521	mul128To256(
522	bits64 a0,
523	bits64 a1,
524	bits64 b0,
525	bits64 b1,
526	bits64 *z0Ptr,
527	bits64 *z1Ptr,
528	bits64 *z2Ptr,
529	bits64 *z3Ptr
530	)
531	{
532	bits64 z0, z1, z2, z3;
533	bits64 more1, more2;
534
535	mul64To128( a: a1, b: b1, z0Ptr: &z2, z1Ptr: &z3 );
536	mul64To128( a: a1, b: b0, z0Ptr: &z1, z1Ptr: &more2 );
537	add128( a0: z1, a1: more2, b0: `0`, b1: z2, z0Ptr: &z1, z1Ptr: &z2 );
538	mul64To128( a: a0, b: b0, z0Ptr: &z0, z1Ptr: &more1 );
539	add128( a0: z0, a1: more1, b0: `0`, b1: z1, z0Ptr: &z0, z1Ptr: &z1 );
540	mul64To128( a: a0, b: b1, z0Ptr: &more1, z1Ptr: &more2 );
541	add128( a0: more1, a1: more2, b0: `0`, b1: z2, z0Ptr: &more1, z1Ptr: &z2 );
542	add128( a0: z0, a1: z1, b0: `0`, b1: more1, z0Ptr: &z0, z1Ptr: &z1 );
543	*z3Ptr = z3;
544	*z2Ptr = z2;
545	*z1Ptr = z1;
546	*z0Ptr = z0;
547
548	}
549
550	/*
551	-------------------------------------------------------------------------------
552	Returns an approximation to the 64-bit integer quotient obtained by dividing
553	`b' into the 128-bit value formed by concatenating `a0' and `a1'. The
554	divisor `b' must be at least 2^63. If q is the exact quotient truncated
555	toward zero, the approximation returned lies between q and q + 2 inclusive.
556	If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
557	unsigned integer is returned.
558	-------------------------------------------------------------------------------
559	*/
560	static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
561	{
562	bits64 b0, b1;
563	bits64 rem0, rem1, term0, term1;
564	bits64 z;
565	if ( b <= a0 ) return LIT64( `0xFFFFFFFFFFFFFFFF` );
566	b0 = b>>`32`; / hence b0 is 32 bits wide now /
567	if ( b0<<`32` <= a0 ) {
568	z = LIT64( `0xFFFFFFFF00000000` );
569	} else {
570	z = a0;
571	do_div( z, b0 );
572	z <<= `32`;
573	}
574	mul64To128( a: b, b: z, z0Ptr: &term0, z1Ptr: &term1 );
575	sub128( a0, a1, b0: term0, b1: term1, z0Ptr: &rem0, z1Ptr: &rem1 );
576	while ( ( (sbits64) rem0 ) < `0` ) {
577	z -= LIT64( `0x100000000` );
578	b1 = b<<`32`;
579	add128( a0: rem0, a1: rem1, b0, b1, z0Ptr: &rem0, z1Ptr: &rem1 );
580	}
581	rem0 = ( rem0<<`32` ) \| ( rem1>>`32` );
582	if ( b0<<`32` <= rem0 ) {
583	z \|= `0xFFFFFFFF`;
584	} else {
585	do_div( rem0, b0 );
586	z \|= rem0;
587	}
588	return z;
589
590	}
591
592	/*
593	-------------------------------------------------------------------------------
594	Returns an approximation to the square root of the 32-bit significand given
595	by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
596	`aExp' (the least significant bit) is 1, the integer returned approximates
597	2^31sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'*
598	is 0, the integer returned approximates 2^31sqrt(`a'/2^30). In either*
599	case, the approximation returned lies strictly within +/-2 of the exact
600	value.
601	-------------------------------------------------------------------------------
602	*/
603	static bits32 estimateSqrt32( int16 aExp, bits32 a )
604	{
605	static const bits16 sqrtOddAdjustments[] = {
606	`0x0004`, `0x0022`, `0x005D`, `0x00B1`, `0x011D`, `0x019F`, `0x0236`, `0x02E0`,
607	`0x039C`, `0x0468`, `0x0545`, `0x0631`, `0x072B`, `0x0832`, `0x0946`, `0x0A67`
608	};
609	static const bits16 sqrtEvenAdjustments[] = {
610	`0x0A2D`, `0x08AF`, `0x075A`, `0x0629`, `0x051A`, `0x0429`, `0x0356`, `0x029E`,
611	`0x0200`, `0x0179`, `0x0109`, `0x00AF`, `0x0068`, `0x0034`, `0x0012`, `0x0002`
612	};
613	int8 index;
614	bits32 z;
615	bits64 A;
616
617	index = ( a>>`27` ) & `15`;
618	if ( aExp & `1` ) {
619	z = `0x4000` + ( a>>`17` ) - sqrtOddAdjustments[ index ];
620	z = ( ( a / z )<<`14` ) + ( z<<`15` );
621	a >>= `1`;
622	}
623	else {
624	z = `0x8000` + ( a>>`17` ) - sqrtEvenAdjustments[ index ];
625	z = a / z + z;
626	z = ( `0x20000` <= z ) ? `0xFFFF8000` : ( z<<`15` );
627	if ( z <= a ) return (bits32) ( ( (sbits32) a )>>`1` );
628	}
629	A = ( (bits64) a )<<`31`;
630	do_div( A, z );
631	return ( (bits32) A ) + ( z>>`1` );
632
633	}
634
635	/*
636	-------------------------------------------------------------------------------
637	Returns the number of leading 0 bits before the most-significant 1 bit
638	of `a'. If `a' is zero, 32 is returned.
639	-------------------------------------------------------------------------------
640	*/
641	static int8 countLeadingZeros32( bits32 a )
642	{
643	static const int8 countLeadingZerosHigh[] = {
644	`8`, `7`, `6`, `6`, `5`, `5`, `5`, `5`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
645	`3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`,
646	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
647	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
648	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
649	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
650	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
651	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
652	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
653	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
654	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
655	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
656	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
657	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
658	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
659	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
660	};
661	int8 shiftCount;
662
663	shiftCount = `0`;
664	if ( a < `0x10000` ) {
665	shiftCount += `16`;
666	a <<= `16`;
667	}
668	if ( a < `0x1000000` ) {
669	shiftCount += `8`;
670	a <<= `8`;
671	}
672	shiftCount += countLeadingZerosHigh[ a>>`24` ];
673	return shiftCount;
674
675	}
676
677	/*
678	-------------------------------------------------------------------------------
679	Returns the number of leading 0 bits before the most-significant 1 bit
680	of `a'. If `a' is zero, 64 is returned.
681	-------------------------------------------------------------------------------
682	*/
683	static int8 countLeadingZeros64( bits64 a )
684	{
685	int8 shiftCount;
686
687	shiftCount = `0`;
688	if ( a < ( (bits64) `1` )<<`32` ) {
689	shiftCount += `32`;
690	}
691	else {
692	a >>= `32`;
693	}
694	shiftCount += countLeadingZeros32( a );
695	return shiftCount;
696
697	}
698
699	/*
700	-------------------------------------------------------------------------------
701	Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
702	is equal to the 128-bit value formed by concatenating `b0' and `b1'.
703	Otherwise, returns 0.
704	-------------------------------------------------------------------------------
705	*/
706	INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
707	{
708
709	return ( a0 == b0 ) && ( a1 == b1 );
710
711	}
712
713	/*
714	-------------------------------------------------------------------------------
715	Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
716	than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
717	Otherwise, returns 0.
718	-------------------------------------------------------------------------------
719	*/
720	INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
721	{
722
723	return ( a0 < b0 ) \|\| ( ( a0 == b0 ) && ( a1 <= b1 ) );
724
725	}
726
727	/*
728	-------------------------------------------------------------------------------
729	Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
730	than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
731	returns 0.
732	-------------------------------------------------------------------------------
733	*/
734	INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
735	{
736
737	return ( a0 < b0 ) \|\| ( ( a0 == b0 ) && ( a1 < b1 ) );
738
739	}
740
741	/*
742	-------------------------------------------------------------------------------
743	Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
744	not equal to the 128-bit value formed by concatenating `b0' and `b1'.
745	Otherwise, returns 0.
746	-------------------------------------------------------------------------------
747	*/
748	INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
749	{
750
751	return ( a0 != b0 ) \|\| ( a1 != b1 );
752
753	}
754
755

source code of linux/arch/arm/nwfpe/softfloat-macros