vecemu.c source code [linux/arch/powerpc/kernel/vecemu.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Routines to emulate some Altivec/VMX instructions, specifically
4	* those that can trap when given denormalized operands in Java mode.
5	*/
6	#include <linux/kernel.h>
7	#include <linux/errno.h>
8	#include <linux/sched.h>
9	#include <asm/ptrace.h>
10	#include <asm/processor.h>
11	#include <asm/switch_to.h>
12	#include <linux/uaccess.h>
13	#include <asm/inst.h>
14
15	/ Functions in vector.S /
16	extern void vaddfp(vector128 dst, vector128 a, vector128 *b);
17	extern void vsubfp(vector128 dst, vector128 a, vector128 *b);
18	extern void vmaddfp(vector128 dst, vector128 a, vector128 b, vector128 c);
19	extern void vnmsubfp(vector128 dst, vector128 a, vector128 b, vector128 c);
20	extern void vrefp(vector128 dst, vector128 src);
21	extern void vrsqrtefp(vector128 dst, vector128 src);
22	extern void vexptep(vector128 dst, vector128 src);
23
24	static unsigned int exp2s[`8`] = {
25	`0x800000`,
26	`0x8b95c2`,
27	`0x9837f0`,
28	`0xa5fed7`,
29	`0xb504f3`,
30	`0xc5672a`,
31	`0xd744fd`,
32	`0xeac0c7`
33	};
34
35	/*
36	* Computes an estimate of 2^x. The `s' argument is the 32-bit
37	* single-precision floating-point representation of x.
38	*/
39	static unsigned int eexp2(unsigned int s)
40	{
41	int exp, pwr;
42	unsigned int mant, frac;
43
44	/ extract exponent field from input /
45	exp = ((s >> `23`) & `0xff`) - `127`;
46	if (exp > `7`) {
47	/ check for NaN input /
48	if (exp == `128` && (s & `0x7fffff`) != `0`)
49	return s \| `0x400000`; / return QNaN /
50	/ 2^-big = 0, 2^+big = +Inf /
51	return (s & `0x80000000`)? `0`: `0x7f800000`; / 0 or +Inf /
52	}
53	if (exp < -`23`)
54	return `0x3f800000`; / 1.0 /
55
56	/ convert to fixed point integer in 9.23 representation /
57	pwr = (s & `0x7fffff`) \| `0x800000`;
58	if (exp > `0`)
59	pwr <<= exp;
60	else
61	pwr >>= -exp;
62	if (s & `0x80000000`)
63	pwr = -pwr;
64
65	/ extract integer part, which becomes exponent part of result /
66	exp = (pwr >> `23`) + `126`;
67	if (exp >= `254`)
68	return `0x7f800000`;
69	if (exp < -`23`)
70	return `0`;
71
72	/ table lookup on top 3 bits of fraction to get mantissa /
73	mant = exp2s[(pwr >> `20`) & `7`];
74
75	/ linear interpolation using remaining 20 bits of fraction /
76	asm("mulhwu %0,%1,%2" : "=r" (frac)
77	: "r" (pwr << `12`), "r" (`0x172b83ff`));
78	asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
79	mant += frac;
80
81	if (exp >= `0`)
82	return mant + (exp << `23`);
83
84	/ denormalized result /
85	exp = -exp;
86	mant += `1` << (exp - `1`);
87	return mant >> exp;
88	}
89
90	/*
91	* Computes an estimate of log_2(x). The `s' argument is the 32-bit
92	* single-precision floating-point representation of x.
93	*/
94	static unsigned int elog2(unsigned int s)
95	{
96	int exp, mant, lz, frac;
97
98	exp = s & `0x7f800000`;
99	mant = s & `0x7fffff`;
100	if (exp == `0x7f800000`) { / Inf or NaN /
101	if (mant != `0`)
102	s \|= `0x400000`; / turn NaN into QNaN /
103	return s;
104	}
105	if ((exp \| mant) == `0`) / +0 or -0 /
106	return `0xff800000`; / return -Inf /
107
108	if (exp == `0`) {
109	/ denormalized /
110	asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
111	mant <<= lz - `8`;
112	exp = (-`118` - lz) << `23`;
113	} else {
114	mant \|= `0x800000`;
115	exp -= `127` << `23`;
116	}
117
118	if (mant >= `0xb504f3`) { / 2^0.5 * 2^23 /
119	exp \|= `0x400000`; / 0.5 * 2^23 /
120	asm("mulhwu %0,%1,%2" : "=r" (mant)
121	: "r" (mant), "r" (`0xb504f334`)); / 2^-0.5 * 2^32 /
122	}
123	if (mant >= `0x9837f0`) { / 2^0.25 * 2^23 /
124	exp \|= `0x200000`; / 0.25 * 2^23 /
125	asm("mulhwu %0,%1,%2" : "=r" (mant)
126	: "r" (mant), "r" (`0xd744fccb`)); / 2^-0.25 * 2^32 /
127	}
128	if (mant >= `0x8b95c2`) { / 2^0.125 * 2^23 /
129	exp \|= `0x100000`; / 0.125 * 2^23 /
130	asm("mulhwu %0,%1,%2" : "=r" (mant)
131	: "r" (mant), "r" (`0xeac0c6e8`)); / 2^-0.125 * 2^32 /
132	}
133	if (mant > `0x800000`) { / 1.0 * 2^23 /
134	/ calculate (mant - 1) * 1.381097463 /
135	/ 1.381097463 == 0.125 / (2^0.125 - 1) /
136	asm("mulhwu %0,%1,%2" : "=r" (frac)
137	: "r" ((mant - `0x800000`) << `1`), "r" (`0xb0c7cd3a`));
138	exp += frac;
139	}
140	s = exp & `0x80000000`;
141	if (exp != `0`) {
142	if (s)
143	exp = -exp;
144	asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
145	lz = `8` - lz;
146	if (lz > `0`)
147	exp >>= lz;
148	else if (lz < `0`)
149	exp <<= -lz;
150	s += ((lz + `126`) << `23`) + exp;
151	}
152	return s;
153	}
154
155	#define VSCR_SAT 1
156
157	static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
158	{
159	int exp, mant;
160
161	exp = (x >> `23`) & `0xff`;
162	mant = x & `0x7fffff`;
163	if (exp == `255` && mant != `0`)
164	return `0`; / NaN -> 0 /
165	exp = exp - `127` + scale;
166	if (exp < `0`)
167	return `0`; / round towards zero /
168	if (exp >= `31`) {
169	/ saturate, unless the result would be -2^31 /
170	if (x + (scale << `23`) != `0xcf000000`)
171	*vscrp \|= VSCR_SAT;
172	return (x & `0x80000000`)? `0x80000000`: `0x7fffffff`;
173	}
174	mant \|= `0x800000`;
175	mant = (mant << `7`) >> (`30` - exp);
176	return (x & `0x80000000`)? -mant: mant;
177	}
178
179	static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
180	{
181	int exp;
182	unsigned int mant;
183
184	exp = (x >> `23`) & `0xff`;
185	mant = x & `0x7fffff`;
186	if (exp == `255` && mant != `0`)
187	return `0`; / NaN -> 0 /
188	exp = exp - `127` + scale;
189	if (exp < `0`)
190	return `0`; / round towards zero /
191	if (x & `0x80000000`) {
192	/ negative => saturate to 0 /
193	*vscrp \|= VSCR_SAT;
194	return `0`;
195	}
196	if (exp >= `32`) {
197	/ saturate /
198	*vscrp \|= VSCR_SAT;
199	return `0xffffffff`;
200	}
201	mant \|= `0x800000`;
202	mant = (mant << `8`) >> (`31` - exp);
203	return mant;
204	}
205
206	/ Round to floating integer, towards 0 /
207	static unsigned int rfiz(unsigned int x)
208	{
209	int exp;
210
211	exp = ((x >> `23`) & `0xff`) - `127`;
212	if (exp == `128` && (x & `0x7fffff`) != `0`)
213	return x \| `0x400000`; / NaN -> make it a QNaN /
214	if (exp >= `23`)
215	return x; / it's an integer already (or Inf) /
216	if (exp < `0`)
217	return x & `0x80000000`; / \|x\| < 1.0 rounds to 0 /
218	return x & ~(`0x7fffff` >> exp);
219	}
220
221	/ Round to floating integer, towards +/- Inf /
222	static unsigned int rfii(unsigned int x)
223	{
224	int exp, mask;
225
226	exp = ((x >> `23`) & `0xff`) - `127`;
227	if (exp == `128` && (x & `0x7fffff`) != `0`)
228	return x \| `0x400000`; / NaN -> make it a QNaN /
229	if (exp >= `23`)
230	return x; / it's an integer already (or Inf) /
231	if ((x & `0x7fffffff`) == `0`)
232	return x; / +/-0 -> +/-0 /
233	if (exp < `0`)
234	/ 0 < \|x\| < 1.0 rounds to +/- 1.0 /
235	return (x & `0x80000000`) \| `0x3f800000`;
236	mask = `0x7fffff` >> exp;
237	/ mantissa overflows into exponent - that's OK,*
238	it can't overflow into the sign bit /*
239	return (x + mask) & ~mask;
240	}
241
242	/ Round to floating integer, to nearest /
243	static unsigned int rfin(unsigned int x)
244	{
245	int exp, half;
246
247	exp = ((x >> `23`) & `0xff`) - `127`;
248	if (exp == `128` && (x & `0x7fffff`) != `0`)
249	return x \| `0x400000`; / NaN -> make it a QNaN /
250	if (exp >= `23`)
251	return x; / it's an integer already (or Inf) /
252	if (exp < -`1`)
253	return x & `0x80000000`; / \|x\| < 0.5 -> +/-0 /
254	if (exp == -`1`)
255	/ 0.5 <= \|x\| < 1.0 rounds to +/- 1.0 /
256	return (x & `0x80000000`) \| `0x3f800000`;
257	half = `0x400000` >> exp;
258	/ add 0.5 to the magnitude and chop off the fraction bits /
259	return (x + half) & ~(`0x7fffff` >> exp);
260	}
261
262	int emulate_altivec(struct pt_regs *regs)
263	{
264	ppc_inst_t instr;
265	unsigned int i, word;
266	unsigned int va, vb, vc, vd;
267	vector128 *vrs;
268
269	if (get_user_instr(instr, (void __user *)regs->nip))
270	return -EFAULT;
271
272	word = ppc_inst_val(instr);
273	if (ppc_inst_primary_opcode(instr) != `4`)
274	return -EINVAL; / not an altivec instruction /
275	vd = (word >> `21`) & `0x1f`;
276	va = (word >> `16`) & `0x1f`;
277	vb = (word >> `11`) & `0x1f`;
278	vc = (word >> `6`) & `0x1f`;
279
280	vrs = current->thread.vr_state.vr;
281	switch (word & `0x3f`) {
282	case `10`:
283	switch (vc) {
284	case `0`: / vaddfp /
285	vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
286	break;
287	case `1`: / vsubfp /
288	vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
289	break;
290	case `4`: / vrefp /
291	vrefp(&vrs[vd], &vrs[vb]);
292	break;
293	case `5`: / vrsqrtefp /
294	vrsqrtefp(&vrs[vd], &vrs[vb]);
295	break;
296	case `6`: / vexptefp /
297	for (i = `0`; i < `4`; ++i)
298	vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
299	break;
300	case `7`: / vlogefp /
301	for (i = `0`; i < `4`; ++i)
302	vrs[vd].u[i] = elog2(vrs[vb].u[i]);
303	break;
304	case `8`: / vrfin /
305	for (i = `0`; i < `4`; ++i)
306	vrs[vd].u[i] = rfin(vrs[vb].u[i]);
307	break;
308	case `9`: / vrfiz /
309	for (i = `0`; i < `4`; ++i)
310	vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
311	break;
312	case `10`: / vrfip /
313	for (i = `0`; i < `4`; ++i) {
314	u32 x = vrs[vb].u[i];
315	x = (x & `0x80000000`)? rfiz(x): rfii(x);
316	vrs[vd].u[i] = x;
317	}
318	break;
319	case `11`: / vrfim /
320	for (i = `0`; i < `4`; ++i) {
321	u32 x = vrs[vb].u[i];
322	x = (x & `0x80000000`)? rfii(x): rfiz(x);
323	vrs[vd].u[i] = x;
324	}
325	break;
326	case `14`: / vctuxs /
327	for (i = `0`; i < `4`; ++i)
328	vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
329	&current->thread.vr_state.vscr.u[`3`]);
330	break;
331	case `15`: / vctsxs /
332	for (i = `0`; i < `4`; ++i)
333	vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
334	&current->thread.vr_state.vscr.u[`3`]);
335	break;
336	default:
337	return -EINVAL;
338	}
339	break;
340	case `46`: / vmaddfp /
341	vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
342	break;
343	case `47`: / vnmsubfp /
344	vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
345	break;
346	default:
347	return -EINVAL;
348	}
349
350	return `0`;
351	}
352

source code of linux/arch/powerpc/kernel/vecemu.c