dfdiv.S source code [compiler-rt/lib/builtins/hexagon/dfdiv.S]

1	//===----------------------Hexagon builtin routine ------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	// Double Precision Divide
10
11	#define A r1:0
12	#define AH r1
13	#define AL r0
14
15	#define B r3:2
16	#define BH r3
17	#define BL r2
18
19	#define Q r5:4
20	#define QH r5
21	#define QL r4
22
23	#define PROD r7:6
24	#define PRODHI r7
25	#define PRODLO r6
26
27	#define SFONE r8
28	#define SFDEN r9
29	#define SFERROR r10
30	#define SFRECIP r11
31
32	#define EXPBA r13:12
33	#define EXPB r13
34	#define EXPA r12
35
36	#define REMSUB2 r15:14
37
38
39
40	#define SIGN r28
41
42	#define Q_POSITIVE p3
43	#define NORMAL p2
44	#define NO_OVF_UNF p1
45	#define P_TMP p0
46
47	#define RECIPEST_SHIFT 3
48	#define QADJ 61
49
50	#define DFCLASS_NORMAL 0x02
51	#define DFCLASS_NUMBER 0x0F
52	#define DFCLASS_INFINITE 0x08
53	#define DFCLASS_ZERO 0x01
54	#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
55	#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
56
57	#define DF_MANTBITS 52
58	#define DF_EXPBITS 11
59	#define SF_MANTBITS 23
60	#define SF_EXPBITS 8
61	#define DF_BIAS 0x3ff
62
63	#define SR_ROUND_OFF 22
64
65	#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
66	#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
67	#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
68	#define END(TAG) .size TAG,.-TAG
69
70	.text
71	.global __hexagon_divdf3
72	.type __hexagon_divdf3,@function
73	Q6_ALIAS(divdf3)
74	FAST_ALIAS(divdf3)
75	FAST2_ALIAS(divdf3)
76	.p2align `5`
77	__hexagon_divdf3:
78	{
79	NORMAL = dfclass(A,#DFCLASS_NORMAL)
80	NORMAL = dfclass(B,#DFCLASS_NORMAL)
81	EXPBA = combine(BH,AH)
82	SIGN = xor(AH,BH)
83	}
84	#undef A
85	#undef AH
86	#undef AL
87	#undef B
88	#undef BH
89	#undef BL
90	#define REM r1:0
91	#define REMHI r1
92	#define REMLO r0
93	#define DENOM r3:2
94	#define DENOMHI r3
95	#define DENOMLO r2
96	{
97	if (!NORMAL) jump .Ldiv_abnormal
98	PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
99	SFONE = ##`0x3f800001`
100	}
101	{
102	SFDEN = or(SFONE,PRODLO)
103	EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-`32`)
104	EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-`32`)
105	Q_POSITIVE = cmp.gt(SIGN,#-`1`)
106	}
107	#undef SIGN
108	#define ONE r28
109	.Ldenorm_continue:
110	{
111	SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
112	SFERROR = and(SFONE,#-`2`)
113	ONE = #`1`
114	EXPA = sub(EXPA,EXPB)
115	}
116	#undef EXPB
117	#define RECIPEST r13
118	{
119	SFERROR -= sfmpy(SFRECIP,SFDEN):lib
120	REMHI = insert(ONE,#DF_EXPBITS+`1`,#DF_MANTBITS-`32`)
121	RECIPEST = ##`0x00800000` << RECIPEST_SHIFT
122	}
123	{
124	SFRECIP += sfmpy(SFRECIP,SFERROR):lib
125	DENOMHI = insert(ONE,#DF_EXPBITS+`1`,#DF_MANTBITS-`32`)
126	SFERROR = and(SFONE,#-`2`)
127	}
128	{
129	SFERROR -= sfmpy(SFRECIP,SFDEN):lib
130	QH = #-DF_BIAS+`1`
131	QL = #DF_BIAS-`1`
132	}
133	{
134	SFRECIP += sfmpy(SFRECIP,SFERROR):lib
135	NO_OVF_UNF = cmp.gt(EXPA,QH)
136	NO_OVF_UNF = !cmp.gt(EXPA,QL)
137	}
138	{
139	RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
140	Q = #`0`
141	EXPA = add(EXPA,#-QADJ)
142	}
143	#undef SFERROR
144	#undef SFRECIP
145	#define TMP r10
146	#define TMP1 r11
147	{
148	RECIPEST = add(RECIPEST,#((-`3`) << RECIPEST_SHIFT))
149	}
150
151	#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
152	{ \
153	PROD = mpyu(RECIPEST,REMHI); \
154	REM = asl(REM,# ## ( REMSHIFT )); \
155	}; \
156	{ \
157	PRODLO = # ## 0; \
158	REM -= mpyu(PRODHI,DENOMLO); \
159	REMSUB2 = mpyu(PRODHI,DENOMHI); \
160	}; \
161	{ \
162	Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
163	REM -= asl(REMSUB2, # ## 32); \
164	EXTRA \
165	}
166
167
168	DIV_ITER1B(ASL,`14`,`15`,)
169	DIV_ITER1B(ASR,`1`,`15`,)
170	DIV_ITER1B(ASR,`16`,`15`,)
171	DIV_ITER1B(ASR,`31`,`15`,PROD=# ( `0` );)
172
173	#undef REMSUB2
174	#define TMPPAIR r15:14
175	#define TMPPAIRHI r15
176	#define TMPPAIRLO r14
177	#undef RECIPEST
178	#define EXPB r13
179	{
180	// compare or sub with carry
181	TMPPAIR = sub(REM,DENOM)
182	P_TMP = cmp.gtu(DENOM,REM)
183	// set up amt to add to q
184	if (!P_TMP.new) PRODLO = #`2`
185	}
186	{
187	Q = add(Q,PROD)
188	if (!P_TMP) REM = TMPPAIR
189	TMPPAIR = #`0`
190	}
191	{
192	P_TMP = cmp.eq(REM,TMPPAIR)
193	if (!P_TMP.new) QL = or(QL,ONE)
194	}
195	{
196	PROD = neg(Q)
197	}
198	{
199	if (!Q_POSITIVE) Q = PROD
200	}
201	#undef REM
202	#undef REMHI
203	#undef REMLO
204	#undef DENOM
205	#undef DENOMLO
206	#undef DENOMHI
207	#define A r1:0
208	#define AH r1
209	#define AL r0
210	#define B r3:2
211	#define BH r3
212	#define BL r2
213	{
214	A = convert_d2df(Q)
215	if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
216	}
217	{
218	AH += asl(EXPA,#DF_MANTBITS-`32`)
219	jumpr r31
220	}
221
222	.Ldiv_ovf_unf:
223	{
224	AH += asl(EXPA,#DF_MANTBITS-`32`)
225	EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-`32`)
226	}
227	{
228	PROD = abs(Q)
229	EXPA = add(EXPA,EXPB)
230	}
231	{
232	P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow
233	if (P_TMP.new) jump:nt .Ldiv_ovf
234	}
235	{
236	P_TMP = cmp.gt(EXPA,#`0`)
237	if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible...
238	}
239	// Underflow
240	// We know what the infinite range exponent should be (EXPA)
241	// Q is 2's complement, PROD is abs(Q)
242	// Normalize Q, shift right, add a high bit, convert, change exponent
243
244	#define FUDGE1 7 // how much to shift right
245	#define FUDGE2 4 // how many guard/round to keep at lsbs
246
247	{
248	EXPB = add(clb(PROD),#-`1`) // doesn't need to be added in since
249	EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent
250	TMP = USR
251	TMP1 = #`63`
252	}
253	{
254	EXPB = min(EXPA,TMP1)
255	TMP1 = or(TMP,#`0x030`)
256	PROD = asl(PROD,EXPB)
257	EXPA = #`0`
258	}
259	{
260	TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out
261	PROD = lsr(PROD,EXPB) // shift out bits
262	B = #`1`
263	}
264	{
265	P_TMP = cmp.gtu(B,TMPPAIR)
266	if (!P_TMP.new) PRODLO = or(BL,PRODLO)
267	PRODHI = setbit(PRODHI,#DF_MANTBITS-`32`+FUDGE2)
268	}
269	{
270	Q = neg(PROD)
271	P_TMP = bitsclr(PRODLO,#(`1`<<FUDGE2)-`1`)
272	if (!P_TMP.new) TMP = TMP1
273	}
274	{
275	USR = TMP
276	if (Q_POSITIVE) Q = PROD
277	TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
278	}
279	{
280	A = convert_d2df(Q)
281	}
282	{
283	AH += asl(TMP,#DF_MANTBITS-`32`)
284	jumpr r31
285	}
286
287
288	.Lpossible_unf:
289	// If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal
290	// The answer is correct, but we need to raise Underflow
291	{
292	B = extractu(A,#`63`,#`0`)
293	TMPPAIR = combine(##`0x00100000`,#`0`) // min normal
294	TMP = #`0x7FFF`
295	}
296	{
297	P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value...
298	P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)?
299	}
300
301	#if (__HEXAGON_ARCH__ == 60)
302	TMP = USR // If not, just return
303	if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact
304	// Note that inexact is already set...
305	#else
306	{
307	if (!P_TMP) jumpr r31 // If not, just return
308	TMP = USR // Else, we want to set Unf+Inexact
309	} // Note that inexact is already set...
310	#endif
311	{
312	TMP = or(TMP,#`0x30`)
313	}
314	{
315	USR = TMP
316	}
317	{
318	p0 = dfcmp.eq(A,A)
319	jumpr r31
320	}
321
322	.Ldiv_ovf:
323
324	// Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
325
326	{
327	TMP = USR
328	B = combine(##`0x7fefffff`,#-`1`)
329	AH = mux(Q_POSITIVE,#`0`,#-`1`)
330	}
331	{
332	PROD = combine(##`0x7ff00000`,#`0`)
333	QH = extractu(TMP,#`2`,#SR_ROUND_OFF)
334	TMP = or(TMP,#`0x28`)
335	}
336	{
337	USR = TMP
338	QH ^= lsr(AH,#`31`)
339	QL = QH
340	}
341	{
342	p0 = !cmp.eq(QL,#`1`) // if not round-to-zero
343	p0 = !cmp.eq(QH,#`2`) // and not rounding the other way
344	if (p0.new) B = PROD // go to inf
345	p0 = dfcmp.eq(B,B) // get exceptions
346	}
347	{
348	A = insert(B,#`63`,#`0`)
349	jumpr r31
350	}
351
352	#undef ONE
353	#define SIGN r28
354	#undef NORMAL
355	#undef NO_OVF_UNF
356	#define P_INF p1
357	#define P_ZERO p2
358	.Ldiv_abnormal:
359	{
360	P_TMP = dfclass(A,#DFCLASS_NUMBER)
361	P_TMP = dfclass(B,#DFCLASS_NUMBER)
362	Q_POSITIVE = cmp.gt(SIGN,#-`1`)
363	}
364	{
365	P_INF = dfclass(A,#DFCLASS_INFINITE)
366	P_INF = dfclass(B,#DFCLASS_INFINITE)
367	}
368	{
369	P_ZERO = dfclass(A,#DFCLASS_ZERO)
370	P_ZERO = dfclass(B,#DFCLASS_ZERO)
371	}
372	{
373	if (!P_TMP) jump .Ldiv_nan
374	if (P_INF) jump .Ldiv_invalid
375	}
376	{
377	if (P_ZERO) jump .Ldiv_invalid
378	}
379	{
380	P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero
381	P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite
382	}
383	{
384	P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
385	P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero
386	}
387	{
388	if (!P_ZERO) jump .Ldiv_zero_result
389	if (!P_INF) jump .Ldiv_inf_result
390	}
391	// Now we've narrowed it down to (de)normal / (de)normal
392	// Set up A/EXPA B/EXPB and go back
393	#undef P_ZERO
394	#undef P_INF
395	#define P_TMP2 p1
396	{
397	P_TMP = dfclass(A,#DFCLASS_NORMAL)
398	P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
399	TMP = ##`0x00100000`
400	}
401	{
402	EXPBA = combine(BH,AH)
403	AH = insert(TMP,#DF_EXPBITS+`1`,#DF_MANTBITS-`32`) // clear out hidden bit, sign bit
404	BH = insert(TMP,#DF_EXPBITS+`1`,#DF_MANTBITS-`32`) // clear out hidden bit, sign bit
405	}
406	{
407	if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit
408	if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit
409	}
410	{
411	QH = add(clb(A),#-DF_EXPBITS)
412	QL = add(clb(B),#-DF_EXPBITS)
413	TMP = #`1`
414	}
415	{
416	EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-`32`)
417	EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-`32`)
418	}
419	{
420	A = asl(A,QH)
421	B = asl(B,QL)
422	if (!P_TMP) EXPA = sub(TMP,QH)
423	if (!P_TMP2) EXPB = sub(TMP,QL)
424	} // recreate values needed by resume coke
425	{
426	PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
427	}
428	{
429	SFDEN = or(SFONE,PRODLO)
430	jump .Ldenorm_continue
431	}
432
433	.Ldiv_zero_result:
434	{
435	AH = xor(AH,BH)
436	B = #`0`
437	}
438	{
439	A = insert(B,#`63`,#`0`)
440	jumpr r31
441	}
442	.Ldiv_inf_result:
443	{
444	p2 = dfclass(B,#DFCLASS_ZERO)
445	p2 = dfclass(A,#DFCLASS_NONINFINITE)
446	}
447	{
448	TMP = USR
449	if (!p2) jump `1f`
450	AH = xor(AH,BH)
451	}
452	{
453	TMP = or(TMP,#`0x04`) // DBZ
454	}
455	{
456	USR = TMP
457	}
458	`1`:
459	{
460	B = combine(##`0x7ff00000`,#`0`)
461	p0 = dfcmp.uo(B,B) // take possible exception
462	}
463	{
464	A = insert(B,#`63`,#`0`)
465	jumpr r31
466	}
467	.Ldiv_nan:
468	{
469	p0 = dfclass(A,#`0x10`)
470	p1 = dfclass(B,#`0x10`)
471	if (!p0.new) A = B
472	if (!p1.new) B = A
473	}
474	{
475	QH = convert_df2sf(A) // get possible invalid exceptions
476	QL = convert_df2sf(B)
477	}
478	{
479	A = #-`1`
480	jumpr r31
481	}
482
483	.Ldiv_invalid:
484	{
485	TMP = ##`0x7f800001`
486	}
487	{
488	A = convert_sf2df(TMP) // get invalid, get DF qNaN
489	jumpr r31
490	}
491	END(__hexagon_divdf3)
492

source code of compiler-rt/lib/builtins/hexagon/dfdiv.S