1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Routines to emulate some Altivec/VMX instructions, specifically
4 * those that can trap when given denormalized operands in Java mode.
5 */
6#include <linux/kernel.h>
7#include <linux/errno.h>
8#include <linux/sched.h>
9#include <asm/ptrace.h>
10#include <asm/processor.h>
11#include <asm/switch_to.h>
12#include <linux/uaccess.h>
13#include <asm/inst.h>
14
15/* Functions in vector.S */
16extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
17extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
18extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
19extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
20extern void vrefp(vector128 *dst, vector128 *src);
21extern void vrsqrtefp(vector128 *dst, vector128 *src);
22extern void vexptep(vector128 *dst, vector128 *src);
23
24static unsigned int exp2s[8] = {
25 0x800000,
26 0x8b95c2,
27 0x9837f0,
28 0xa5fed7,
29 0xb504f3,
30 0xc5672a,
31 0xd744fd,
32 0xeac0c7
33};
34
35/*
36 * Computes an estimate of 2^x. The `s' argument is the 32-bit
37 * single-precision floating-point representation of x.
38 */
39static unsigned int eexp2(unsigned int s)
40{
41 int exp, pwr;
42 unsigned int mant, frac;
43
44 /* extract exponent field from input */
45 exp = ((s >> 23) & 0xff) - 127;
46 if (exp > 7) {
47 /* check for NaN input */
48 if (exp == 128 && (s & 0x7fffff) != 0)
49 return s | 0x400000; /* return QNaN */
50 /* 2^-big = 0, 2^+big = +Inf */
51 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
52 }
53 if (exp < -23)
54 return 0x3f800000; /* 1.0 */
55
56 /* convert to fixed point integer in 9.23 representation */
57 pwr = (s & 0x7fffff) | 0x800000;
58 if (exp > 0)
59 pwr <<= exp;
60 else
61 pwr >>= -exp;
62 if (s & 0x80000000)
63 pwr = -pwr;
64
65 /* extract integer part, which becomes exponent part of result */
66 exp = (pwr >> 23) + 126;
67 if (exp >= 254)
68 return 0x7f800000;
69 if (exp < -23)
70 return 0;
71
72 /* table lookup on top 3 bits of fraction to get mantissa */
73 mant = exp2s[(pwr >> 20) & 7];
74
75 /* linear interpolation using remaining 20 bits of fraction */
76 asm("mulhwu %0,%1,%2" : "=r" (frac)
77 : "r" (pwr << 12), "r" (0x172b83ff));
78 asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
79 mant += frac;
80
81 if (exp >= 0)
82 return mant + (exp << 23);
83
84 /* denormalized result */
85 exp = -exp;
86 mant += 1 << (exp - 1);
87 return mant >> exp;
88}
89
90/*
91 * Computes an estimate of log_2(x). The `s' argument is the 32-bit
92 * single-precision floating-point representation of x.
93 */
94static unsigned int elog2(unsigned int s)
95{
96 int exp, mant, lz, frac;
97
98 exp = s & 0x7f800000;
99 mant = s & 0x7fffff;
100 if (exp == 0x7f800000) { /* Inf or NaN */
101 if (mant != 0)
102 s |= 0x400000; /* turn NaN into QNaN */
103 return s;
104 }
105 if ((exp | mant) == 0) /* +0 or -0 */
106 return 0xff800000; /* return -Inf */
107
108 if (exp == 0) {
109 /* denormalized */
110 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
111 mant <<= lz - 8;
112 exp = (-118 - lz) << 23;
113 } else {
114 mant |= 0x800000;
115 exp -= 127 << 23;
116 }
117
118 if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
119 exp |= 0x400000; /* 0.5 * 2^23 */
120 asm("mulhwu %0,%1,%2" : "=r" (mant)
121 : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
122 }
123 if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
124 exp |= 0x200000; /* 0.25 * 2^23 */
125 asm("mulhwu %0,%1,%2" : "=r" (mant)
126 : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
127 }
128 if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
129 exp |= 0x100000; /* 0.125 * 2^23 */
130 asm("mulhwu %0,%1,%2" : "=r" (mant)
131 : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
132 }
133 if (mant > 0x800000) { /* 1.0 * 2^23 */
134 /* calculate (mant - 1) * 1.381097463 */
135 /* 1.381097463 == 0.125 / (2^0.125 - 1) */
136 asm("mulhwu %0,%1,%2" : "=r" (frac)
137 : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
138 exp += frac;
139 }
140 s = exp & 0x80000000;
141 if (exp != 0) {
142 if (s)
143 exp = -exp;
144 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
145 lz = 8 - lz;
146 if (lz > 0)
147 exp >>= lz;
148 else if (lz < 0)
149 exp <<= -lz;
150 s += ((lz + 126) << 23) + exp;
151 }
152 return s;
153}
154
155#define VSCR_SAT 1
156
157static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
158{
159 int exp, mant;
160
161 exp = (x >> 23) & 0xff;
162 mant = x & 0x7fffff;
163 if (exp == 255 && mant != 0)
164 return 0; /* NaN -> 0 */
165 exp = exp - 127 + scale;
166 if (exp < 0)
167 return 0; /* round towards zero */
168 if (exp >= 31) {
169 /* saturate, unless the result would be -2^31 */
170 if (x + (scale << 23) != 0xcf000000)
171 *vscrp |= VSCR_SAT;
172 return (x & 0x80000000)? 0x80000000: 0x7fffffff;
173 }
174 mant |= 0x800000;
175 mant = (mant << 7) >> (30 - exp);
176 return (x & 0x80000000)? -mant: mant;
177}
178
179static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
180{
181 int exp;
182 unsigned int mant;
183
184 exp = (x >> 23) & 0xff;
185 mant = x & 0x7fffff;
186 if (exp == 255 && mant != 0)
187 return 0; /* NaN -> 0 */
188 exp = exp - 127 + scale;
189 if (exp < 0)
190 return 0; /* round towards zero */
191 if (x & 0x80000000) {
192 /* negative => saturate to 0 */
193 *vscrp |= VSCR_SAT;
194 return 0;
195 }
196 if (exp >= 32) {
197 /* saturate */
198 *vscrp |= VSCR_SAT;
199 return 0xffffffff;
200 }
201 mant |= 0x800000;
202 mant = (mant << 8) >> (31 - exp);
203 return mant;
204}
205
206/* Round to floating integer, towards 0 */
207static unsigned int rfiz(unsigned int x)
208{
209 int exp;
210
211 exp = ((x >> 23) & 0xff) - 127;
212 if (exp == 128 && (x & 0x7fffff) != 0)
213 return x | 0x400000; /* NaN -> make it a QNaN */
214 if (exp >= 23)
215 return x; /* it's an integer already (or Inf) */
216 if (exp < 0)
217 return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
218 return x & ~(0x7fffff >> exp);
219}
220
221/* Round to floating integer, towards +/- Inf */
222static unsigned int rfii(unsigned int x)
223{
224 int exp, mask;
225
226 exp = ((x >> 23) & 0xff) - 127;
227 if (exp == 128 && (x & 0x7fffff) != 0)
228 return x | 0x400000; /* NaN -> make it a QNaN */
229 if (exp >= 23)
230 return x; /* it's an integer already (or Inf) */
231 if ((x & 0x7fffffff) == 0)
232 return x; /* +/-0 -> +/-0 */
233 if (exp < 0)
234 /* 0 < |x| < 1.0 rounds to +/- 1.0 */
235 return (x & 0x80000000) | 0x3f800000;
236 mask = 0x7fffff >> exp;
237 /* mantissa overflows into exponent - that's OK,
238 it can't overflow into the sign bit */
239 return (x + mask) & ~mask;
240}
241
242/* Round to floating integer, to nearest */
243static unsigned int rfin(unsigned int x)
244{
245 int exp, half;
246
247 exp = ((x >> 23) & 0xff) - 127;
248 if (exp == 128 && (x & 0x7fffff) != 0)
249 return x | 0x400000; /* NaN -> make it a QNaN */
250 if (exp >= 23)
251 return x; /* it's an integer already (or Inf) */
252 if (exp < -1)
253 return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
254 if (exp == -1)
255 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
256 return (x & 0x80000000) | 0x3f800000;
257 half = 0x400000 >> exp;
258 /* add 0.5 to the magnitude and chop off the fraction bits */
259 return (x + half) & ~(0x7fffff >> exp);
260}
261
262int emulate_altivec(struct pt_regs *regs)
263{
264 ppc_inst_t instr;
265 unsigned int i, word;
266 unsigned int va, vb, vc, vd;
267 vector128 *vrs;
268
269 if (get_user_instr(instr, (void __user *)regs->nip))
270 return -EFAULT;
271
272 word = ppc_inst_val(instr);
273 if (ppc_inst_primary_opcode(instr) != 4)
274 return -EINVAL; /* not an altivec instruction */
275 vd = (word >> 21) & 0x1f;
276 va = (word >> 16) & 0x1f;
277 vb = (word >> 11) & 0x1f;
278 vc = (word >> 6) & 0x1f;
279
280 vrs = current->thread.vr_state.vr;
281 switch (word & 0x3f) {
282 case 10:
283 switch (vc) {
284 case 0: /* vaddfp */
285 vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
286 break;
287 case 1: /* vsubfp */
288 vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
289 break;
290 case 4: /* vrefp */
291 vrefp(&vrs[vd], &vrs[vb]);
292 break;
293 case 5: /* vrsqrtefp */
294 vrsqrtefp(&vrs[vd], &vrs[vb]);
295 break;
296 case 6: /* vexptefp */
297 for (i = 0; i < 4; ++i)
298 vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
299 break;
300 case 7: /* vlogefp */
301 for (i = 0; i < 4; ++i)
302 vrs[vd].u[i] = elog2(vrs[vb].u[i]);
303 break;
304 case 8: /* vrfin */
305 for (i = 0; i < 4; ++i)
306 vrs[vd].u[i] = rfin(vrs[vb].u[i]);
307 break;
308 case 9: /* vrfiz */
309 for (i = 0; i < 4; ++i)
310 vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
311 break;
312 case 10: /* vrfip */
313 for (i = 0; i < 4; ++i) {
314 u32 x = vrs[vb].u[i];
315 x = (x & 0x80000000)? rfiz(x): rfii(x);
316 vrs[vd].u[i] = x;
317 }
318 break;
319 case 11: /* vrfim */
320 for (i = 0; i < 4; ++i) {
321 u32 x = vrs[vb].u[i];
322 x = (x & 0x80000000)? rfii(x): rfiz(x);
323 vrs[vd].u[i] = x;
324 }
325 break;
326 case 14: /* vctuxs */
327 for (i = 0; i < 4; ++i)
328 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
329 &current->thread.vr_state.vscr.u[3]);
330 break;
331 case 15: /* vctsxs */
332 for (i = 0; i < 4; ++i)
333 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
334 &current->thread.vr_state.vscr.u[3]);
335 break;
336 default:
337 return -EINVAL;
338 }
339 break;
340 case 46: /* vmaddfp */
341 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
342 break;
343 case 47: /* vnmsubfp */
344 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
345 break;
346 default:
347 return -EINVAL;
348 }
349
350 return 0;
351}
352

source code of linux/arch/powerpc/kernel/vecemu.c