dfaddsub.S source code [compiler-rt/lib/builtins/hexagon/dfaddsub.S]

1	//===----------------------Hexagon builtin routine ------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	// Double Precision Multiply
10
11	#define A r1:0
12	#define AH r1
13	#define AL r0
14	#define B r3:2
15	#define BH r3
16	#define BL r2
17
18	#define EXPA r4
19	#define EXPB r5
20	#define EXPB_A r5:4
21
22	#define ZTMP r7:6
23	#define ZTMPH r7
24	#define ZTMPL r6
25
26	#define ATMP r13:12
27	#define ATMPH r13
28	#define ATMPL r12
29
30	#define BTMP r9:8
31	#define BTMPH r9
32	#define BTMPL r8
33
34	#define ATMP2 r11:10
35	#define ATMP2H r11
36	#define ATMP2L r10
37
38	#define EXPDIFF r15
39	#define EXTRACTOFF r14
40	#define EXTRACTAMT r15:14
41
42	#define TMP r28
43
44	#define MANTBITS 52
45	#define HI_MANTBITS 20
46	#define EXPBITS 11
47	#define BIAS 1024
48	#define MANTISSA_TO_INT_BIAS 52
49	#define SR_BIT_INEXACT 5
50
51	#ifndef SR_ROUND_OFF
52	#define SR_ROUND_OFF 22
53	#endif
54
55	#define NORMAL p3
56	#define BIGB p2
57
58	#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
59	#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
60	#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
61	#define END(TAG) .size TAG,.-TAG
62
63	.text
64	.global __hexagon_adddf3
65	.global __hexagon_subdf3
66	.type __hexagon_adddf3, @function
67	.type __hexagon_subdf3, @function
68
69	Q6_ALIAS(adddf3)
70	FAST_ALIAS(adddf3)
71	FAST2_ALIAS(adddf3)
72	Q6_ALIAS(subdf3)
73	FAST_ALIAS(subdf3)
74	FAST2_ALIAS(subdf3)
75
76	.p2align `5`
77	__hexagon_adddf3:
78	{
79	EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
80	EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
81	ATMP = combine(##`0x20000000`,#`0`)
82	}
83	{
84	NORMAL = dfclass(A,#`2`)
85	NORMAL = dfclass(B,#`2`)
86	BTMP = ATMP
87	BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A?
88	}
89	{
90	if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code
91	if (BIGB) A = B // if B >> A, swap A and B
92	if (BIGB) B = A // If B >> A, swap A and B
93	if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents
94	}
95	{
96	ATMP = insert(A,#MANTBITS,#EXPBITS-`2`) // Q1.62
97	BTMP = insert(B,#MANTBITS,#EXPBITS-`2`) // Q1.62
98	EXPDIFF = sub(EXPA,EXPB)
99	ZTMP = combine(#`62`,#`1`)
100	}
101	#undef BIGB
102	#undef NORMAL
103	#define B_POS p3
104	#define A_POS p2
105	#define NO_STICKIES p1
106	.Ladd_continue:
107	{
108	EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60,
109	// will collapse to sticky bit
110	ATMP2 = neg(ATMP)
111	A_POS = cmp.gt(AH,#-`1`)
112	EXTRACTOFF = #`0`
113	}
114	{
115	if (!A_POS) ATMP = ATMP2
116	ATMP2 = extractu(BTMP,EXTRACTAMT)
117	BTMP = ASR(BTMP,EXPDIFF)
118	#undef EXTRACTAMT
119	#undef EXPDIFF
120	#undef EXTRACTOFF
121	#define ZERO r15:14
122	ZERO = #`0`
123	}
124	{
125	NO_STICKIES = cmp.eq(ATMP2,ZERO)
126	if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
127	EXPB = add(EXPA,#-BIAS-`60`)
128	B_POS = cmp.gt(BH,#-`1`)
129	}
130	{
131	ATMP = add(ATMP,BTMP) // ADD!!!
132	ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!!
133	ZTMP = combine(#`54`,##`2045`)
134	}
135	{
136	p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation
137	p0 = !cmp.gtu(EXPA,ZTMPL)
138	if (!p0.new) jump:nt .Ladd_ovf_unf
139	if (!B_POS) ATMP = ATMP2 // if B neg, pick difference
140	}
141	{
142	A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice!
143	p0 = cmp.eq(ATMPH,#`0`)
144	p0 = cmp.eq(ATMPL,#`0`)
145	if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly?
146	}
147	{
148	AH += asl(EXPB,#HI_MANTBITS)
149	jumpr r31
150	}
151	.falign
152	__hexagon_subdf3:
153	{
154	BH = togglebit(BH,#`31`)
155	jump __qdsp_adddf3
156	}
157
158
159	.falign
160	.Ladd_zero:
161	// True zero, full cancellation
162	// +0 unless round towards negative infinity
163	{
164	TMP = USR
165	A = #`0`
166	BH = #`1`
167	}
168	{
169	TMP = extractu(TMP,#`2`,#`22`)
170	BH = asl(BH,#`31`)
171	}
172	{
173	p0 = cmp.eq(TMP,#`2`)
174	if (p0.new) AH = xor(AH,BH)
175	jumpr r31
176	}
177	.falign
178	.Ladd_ovf_unf:
179	// Overflow or Denormal is possible
180	// Good news: Underflow flag is not possible!
181
182	// ATMP has 2's complement value
183	//
184	// EXPA has A's exponent, EXPB has EXPA-BIAS-60
185	//
186	// Convert, extract exponent, add adjustment.
187	// If > 2046, overflow
188	// If <= 0, denormal
189	//
190	// Note that we've not done our zero check yet, so do that too
191
192	{
193	A = convert_d2df(ATMP)
194	p0 = cmp.eq(ATMPH,#`0`)
195	p0 = cmp.eq(ATMPL,#`0`)
196	if (p0.new) jump:nt .Ladd_zero
197	}
198	{
199	TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
200	AH += asl(EXPB,#HI_MANTBITS)
201	}
202	{
203	EXPB = add(EXPB,TMP)
204	B = combine(##`0x00100000`,#`0`)
205	}
206	{
207	p0 = cmp.gt(EXPB,##BIAS+BIAS-`2`)
208	if (p0.new) jump:nt .Ladd_ovf
209	}
210	{
211	p0 = cmp.gt(EXPB,#`0`)
212	if (p0.new) jumpr:t r31
213	TMP = sub(#`1`,EXPB)
214	}
215	{
216	B = insert(A,#MANTBITS,#`0`)
217	A = ATMP
218	}
219	{
220	B = lsr(B,TMP)
221	}
222	{
223	A = insert(B,#`63`,#`0`)
224	jumpr r31
225	}
226	.falign
227	.Ladd_ovf:
228	// We get either max finite value or infinity. Either way, overflow+inexact
229	{
230	A = ATMP // 2's complement value
231	TMP = USR
232	ATMP = combine(##`0x7fefffff`,#-`1`) // positive max finite
233	}
234	{
235	EXPB = extractu(TMP,#`2`,#SR_ROUND_OFF) // rounding bits
236	TMP = or(TMP,#`0x28`) // inexact + overflow
237	BTMP = combine(##`0x7ff00000`,#`0`) // positive infinity
238	}
239	{
240	USR = TMP
241	EXPB ^= lsr(AH,#`31`) // Does sign match rounding?
242	TMP = EXPB // unmodified rounding mode
243	}
244	{
245	p0 = !cmp.eq(TMP,#`1`) // If not round-to-zero and
246	p0 = !cmp.eq(EXPB,#`2`) // Not rounding the other way,
247	if (p0.new) ATMP = BTMP // we should get infinity
248	}
249	{
250	A = insert(ATMP,#`63`,#`0`) // insert inf/maxfinite, leave sign
251	}
252	{
253	p0 = dfcmp.eq(A,A)
254	jumpr r31
255	}
256
257	.Ladd_abnormal:
258	{
259	ATMP = extractu(A,#`63`,#`0`) // strip off sign
260	BTMP = extractu(B,#`63`,#`0`) // strip off sign
261	}
262	{
263	p3 = cmp.gtu(ATMP,BTMP)
264	if (!p3.new) A = B // sort values
265	if (!p3.new) B = A // sort values
266	}
267	{
268	// Any NaN --> NaN, possibly raise invalid if sNaN
269	p0 = dfclass(A,#`0x0f`) // A not NaN?
270	if (!p0.new) jump:nt .Linvalid_nan_add
271	if (!p3) ATMP = BTMP
272	if (!p3) BTMP = ATMP
273	}
274	{
275	// Infinity + non-infinity number is infinity
276	// Infinity + infinity --> inf or nan
277	p1 = dfclass(A,#`0x08`) // A is infinity
278	if (p1.new) jump:nt .Linf_add
279	}
280	{
281	p2 = dfclass(B,#`0x01`) // B is zero
282	if (p2.new) jump:nt .LB_zero // so return A or special 0+0
283	ATMP = #`0`
284	}
285	// We are left with adding one or more subnormals
286	{
287	p0 = dfclass(A,#`4`)
288	if (p0.new) jump:nt .Ladd_two_subnormal
289	ATMP = combine(##`0x20000000`,#`0`)
290	}
291	{
292	EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
293	EXPB = #`1`
294	// BTMP already ABS(B)
295	BTMP = asl(BTMP,#EXPBITS-`2`)
296	}
297	#undef ZERO
298	#define EXTRACTOFF r14
299	#define EXPDIFF r15
300	{
301	ATMP = insert(A,#MANTBITS,#EXPBITS-`2`)
302	EXPDIFF = sub(EXPA,EXPB)
303	ZTMP = combine(#`62`,#`1`)
304	jump .Ladd_continue
305	}
306
307	.Ladd_two_subnormal:
308	{
309	ATMP = extractu(A,#`63`,#`0`)
310	BTMP = extractu(B,#`63`,#`0`)
311	}
312	{
313	ATMP = neg(ATMP)
314	BTMP = neg(BTMP)
315	p0 = cmp.gt(AH,#-`1`)
316	p1 = cmp.gt(BH,#-`1`)
317	}
318	{
319	if (p0) ATMP = A
320	if (p1) BTMP = B
321	}
322	{
323	ATMP = add(ATMP,BTMP)
324	}
325	{
326	BTMP = neg(ATMP)
327	p0 = cmp.gt(ATMPH,#-`1`)
328	B = #`0`
329	}
330	{
331	if (!p0) A = BTMP
332	if (p0) A = ATMP
333	BH = ##`0x80000000`
334	}
335	{
336	if (!p0) AH = or(AH,BH)
337	p0 = dfcmp.eq(A,B)
338	if (p0.new) jump:nt .Lzero_plus_zero
339	}
340	{
341	jumpr r31
342	}
343
344	.Linvalid_nan_add:
345	{
346	TMP = convert_df2sf(A) // will generate invalid if sNaN
347	p0 = dfclass(B,#`0x0f`) // if B is not NaN
348	if (p0.new) B = A // make it whatever A is
349	}
350	{
351	BL = convert_df2sf(B) // will generate invalid if sNaN
352	A = #-`1`
353	jumpr r31
354	}
355	.falign
356	.LB_zero:
357	{
358	p0 = dfcmp.eq(ATMP,A) // is A also zero?
359	if (!p0.new) jumpr:t r31 // If not, just return A
360	}
361	// 0 + 0 is special
362	// if equal integral values, they have the same sign, which is fine for all rounding
363	// modes.
364	// If unequal in sign, we get +0 for all rounding modes except round down
365	.Lzero_plus_zero:
366	{
367	p0 = cmp.eq(A,B)
368	if (p0.new) jumpr:t r31
369	}
370	{
371	TMP = USR
372	}
373	{
374	TMP = extractu(TMP,#`2`,#SR_ROUND_OFF)
375	A = #`0`
376	}
377	{
378	p0 = cmp.eq(TMP,#`2`)
379	if (p0.new) AH = ##`0x80000000`
380	jumpr r31
381	}
382	.Linf_add:
383	// adding infinities is only OK if they are equal
384	{
385	p0 = !cmp.eq(AH,BH) // Do they have different signs
386	p0 = dfclass(B,#`8`) // And is B also infinite?
387	if (!p0.new) jumpr:t r31 // If not, just a normal inf
388	}
389	{
390	BL = ##`0x7f800001` // sNAN
391	}
392	{
393	A = convert_sf2df(BL) // trigger invalid, set NaN
394	jumpr r31
395	}
396	END(__hexagon_adddf3)
397

Provided by KDAB

Learn to use CMake with our Intro Training

Find out more

source code of compiler-rt/lib/builtins/hexagon/dfaddsub.S