1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Routines to emulate some Altivec/VMX instructions, specifically |
4 | * those that can trap when given denormalized operands in Java mode. |
5 | */ |
6 | #include <linux/kernel.h> |
7 | #include <linux/errno.h> |
8 | #include <linux/sched.h> |
9 | #include <asm/ptrace.h> |
10 | #include <asm/processor.h> |
11 | #include <asm/switch_to.h> |
12 | #include <linux/uaccess.h> |
13 | #include <asm/inst.h> |
14 | |
15 | /* Functions in vector.S */ |
16 | extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); |
17 | extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); |
18 | extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); |
19 | extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); |
20 | extern void vrefp(vector128 *dst, vector128 *src); |
21 | extern void vrsqrtefp(vector128 *dst, vector128 *src); |
22 | extern void vexptep(vector128 *dst, vector128 *src); |
23 | |
24 | static unsigned int exp2s[8] = { |
25 | 0x800000, |
26 | 0x8b95c2, |
27 | 0x9837f0, |
28 | 0xa5fed7, |
29 | 0xb504f3, |
30 | 0xc5672a, |
31 | 0xd744fd, |
32 | 0xeac0c7 |
33 | }; |
34 | |
35 | /* |
36 | * Computes an estimate of 2^x. The `s' argument is the 32-bit |
37 | * single-precision floating-point representation of x. |
38 | */ |
39 | static unsigned int eexp2(unsigned int s) |
40 | { |
41 | int exp, pwr; |
42 | unsigned int mant, frac; |
43 | |
44 | /* extract exponent field from input */ |
45 | exp = ((s >> 23) & 0xff) - 127; |
46 | if (exp > 7) { |
47 | /* check for NaN input */ |
48 | if (exp == 128 && (s & 0x7fffff) != 0) |
49 | return s | 0x400000; /* return QNaN */ |
50 | /* 2^-big = 0, 2^+big = +Inf */ |
51 | return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ |
52 | } |
53 | if (exp < -23) |
54 | return 0x3f800000; /* 1.0 */ |
55 | |
56 | /* convert to fixed point integer in 9.23 representation */ |
57 | pwr = (s & 0x7fffff) | 0x800000; |
58 | if (exp > 0) |
59 | pwr <<= exp; |
60 | else |
61 | pwr >>= -exp; |
62 | if (s & 0x80000000) |
63 | pwr = -pwr; |
64 | |
65 | /* extract integer part, which becomes exponent part of result */ |
66 | exp = (pwr >> 23) + 126; |
67 | if (exp >= 254) |
68 | return 0x7f800000; |
69 | if (exp < -23) |
70 | return 0; |
71 | |
72 | /* table lookup on top 3 bits of fraction to get mantissa */ |
73 | mant = exp2s[(pwr >> 20) & 7]; |
74 | |
75 | /* linear interpolation using remaining 20 bits of fraction */ |
76 | asm("mulhwu %0,%1,%2" : "=r" (frac) |
77 | : "r" (pwr << 12), "r" (0x172b83ff)); |
78 | asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); |
79 | mant += frac; |
80 | |
81 | if (exp >= 0) |
82 | return mant + (exp << 23); |
83 | |
84 | /* denormalized result */ |
85 | exp = -exp; |
86 | mant += 1 << (exp - 1); |
87 | return mant >> exp; |
88 | } |
89 | |
90 | /* |
91 | * Computes an estimate of log_2(x). The `s' argument is the 32-bit |
92 | * single-precision floating-point representation of x. |
93 | */ |
94 | static unsigned int elog2(unsigned int s) |
95 | { |
96 | int exp, mant, lz, frac; |
97 | |
98 | exp = s & 0x7f800000; |
99 | mant = s & 0x7fffff; |
100 | if (exp == 0x7f800000) { /* Inf or NaN */ |
101 | if (mant != 0) |
102 | s |= 0x400000; /* turn NaN into QNaN */ |
103 | return s; |
104 | } |
105 | if ((exp | mant) == 0) /* +0 or -0 */ |
106 | return 0xff800000; /* return -Inf */ |
107 | |
108 | if (exp == 0) { |
109 | /* denormalized */ |
110 | asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); |
111 | mant <<= lz - 8; |
112 | exp = (-118 - lz) << 23; |
113 | } else { |
114 | mant |= 0x800000; |
115 | exp -= 127 << 23; |
116 | } |
117 | |
118 | if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ |
119 | exp |= 0x400000; /* 0.5 * 2^23 */ |
120 | asm("mulhwu %0,%1,%2" : "=r" (mant) |
121 | : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ |
122 | } |
123 | if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ |
124 | exp |= 0x200000; /* 0.25 * 2^23 */ |
125 | asm("mulhwu %0,%1,%2" : "=r" (mant) |
126 | : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ |
127 | } |
128 | if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ |
129 | exp |= 0x100000; /* 0.125 * 2^23 */ |
130 | asm("mulhwu %0,%1,%2" : "=r" (mant) |
131 | : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ |
132 | } |
133 | if (mant > 0x800000) { /* 1.0 * 2^23 */ |
134 | /* calculate (mant - 1) * 1.381097463 */ |
135 | /* 1.381097463 == 0.125 / (2^0.125 - 1) */ |
136 | asm("mulhwu %0,%1,%2" : "=r" (frac) |
137 | : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); |
138 | exp += frac; |
139 | } |
140 | s = exp & 0x80000000; |
141 | if (exp != 0) { |
142 | if (s) |
143 | exp = -exp; |
144 | asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); |
145 | lz = 8 - lz; |
146 | if (lz > 0) |
147 | exp >>= lz; |
148 | else if (lz < 0) |
149 | exp <<= -lz; |
150 | s += ((lz + 126) << 23) + exp; |
151 | } |
152 | return s; |
153 | } |
154 | |
155 | #define VSCR_SAT 1 |
156 | |
157 | static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) |
158 | { |
159 | int exp, mant; |
160 | |
161 | exp = (x >> 23) & 0xff; |
162 | mant = x & 0x7fffff; |
163 | if (exp == 255 && mant != 0) |
164 | return 0; /* NaN -> 0 */ |
165 | exp = exp - 127 + scale; |
166 | if (exp < 0) |
167 | return 0; /* round towards zero */ |
168 | if (exp >= 31) { |
169 | /* saturate, unless the result would be -2^31 */ |
170 | if (x + (scale << 23) != 0xcf000000) |
171 | *vscrp |= VSCR_SAT; |
172 | return (x & 0x80000000)? 0x80000000: 0x7fffffff; |
173 | } |
174 | mant |= 0x800000; |
175 | mant = (mant << 7) >> (30 - exp); |
176 | return (x & 0x80000000)? -mant: mant; |
177 | } |
178 | |
179 | static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) |
180 | { |
181 | int exp; |
182 | unsigned int mant; |
183 | |
184 | exp = (x >> 23) & 0xff; |
185 | mant = x & 0x7fffff; |
186 | if (exp == 255 && mant != 0) |
187 | return 0; /* NaN -> 0 */ |
188 | exp = exp - 127 + scale; |
189 | if (exp < 0) |
190 | return 0; /* round towards zero */ |
191 | if (x & 0x80000000) { |
192 | /* negative => saturate to 0 */ |
193 | *vscrp |= VSCR_SAT; |
194 | return 0; |
195 | } |
196 | if (exp >= 32) { |
197 | /* saturate */ |
198 | *vscrp |= VSCR_SAT; |
199 | return 0xffffffff; |
200 | } |
201 | mant |= 0x800000; |
202 | mant = (mant << 8) >> (31 - exp); |
203 | return mant; |
204 | } |
205 | |
206 | /* Round to floating integer, towards 0 */ |
207 | static unsigned int rfiz(unsigned int x) |
208 | { |
209 | int exp; |
210 | |
211 | exp = ((x >> 23) & 0xff) - 127; |
212 | if (exp == 128 && (x & 0x7fffff) != 0) |
213 | return x | 0x400000; /* NaN -> make it a QNaN */ |
214 | if (exp >= 23) |
215 | return x; /* it's an integer already (or Inf) */ |
216 | if (exp < 0) |
217 | return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ |
218 | return x & ~(0x7fffff >> exp); |
219 | } |
220 | |
221 | /* Round to floating integer, towards +/- Inf */ |
222 | static unsigned int rfii(unsigned int x) |
223 | { |
224 | int exp, mask; |
225 | |
226 | exp = ((x >> 23) & 0xff) - 127; |
227 | if (exp == 128 && (x & 0x7fffff) != 0) |
228 | return x | 0x400000; /* NaN -> make it a QNaN */ |
229 | if (exp >= 23) |
230 | return x; /* it's an integer already (or Inf) */ |
231 | if ((x & 0x7fffffff) == 0) |
232 | return x; /* +/-0 -> +/-0 */ |
233 | if (exp < 0) |
234 | /* 0 < |x| < 1.0 rounds to +/- 1.0 */ |
235 | return (x & 0x80000000) | 0x3f800000; |
236 | mask = 0x7fffff >> exp; |
237 | /* mantissa overflows into exponent - that's OK, |
238 | it can't overflow into the sign bit */ |
239 | return (x + mask) & ~mask; |
240 | } |
241 | |
242 | /* Round to floating integer, to nearest */ |
243 | static unsigned int rfin(unsigned int x) |
244 | { |
245 | int exp, half; |
246 | |
247 | exp = ((x >> 23) & 0xff) - 127; |
248 | if (exp == 128 && (x & 0x7fffff) != 0) |
249 | return x | 0x400000; /* NaN -> make it a QNaN */ |
250 | if (exp >= 23) |
251 | return x; /* it's an integer already (or Inf) */ |
252 | if (exp < -1) |
253 | return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ |
254 | if (exp == -1) |
255 | /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ |
256 | return (x & 0x80000000) | 0x3f800000; |
257 | half = 0x400000 >> exp; |
258 | /* add 0.5 to the magnitude and chop off the fraction bits */ |
259 | return (x + half) & ~(0x7fffff >> exp); |
260 | } |
261 | |
262 | int emulate_altivec(struct pt_regs *regs) |
263 | { |
264 | ppc_inst_t instr; |
265 | unsigned int i, word; |
266 | unsigned int va, vb, vc, vd; |
267 | vector128 *vrs; |
268 | |
269 | if (get_user_instr(instr, (void __user *)regs->nip)) |
270 | return -EFAULT; |
271 | |
272 | word = ppc_inst_val(instr); |
273 | if (ppc_inst_primary_opcode(instr) != 4) |
274 | return -EINVAL; /* not an altivec instruction */ |
275 | vd = (word >> 21) & 0x1f; |
276 | va = (word >> 16) & 0x1f; |
277 | vb = (word >> 11) & 0x1f; |
278 | vc = (word >> 6) & 0x1f; |
279 | |
280 | vrs = current->thread.vr_state.vr; |
281 | switch (word & 0x3f) { |
282 | case 10: |
283 | switch (vc) { |
284 | case 0: /* vaddfp */ |
285 | vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); |
286 | break; |
287 | case 1: /* vsubfp */ |
288 | vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); |
289 | break; |
290 | case 4: /* vrefp */ |
291 | vrefp(&vrs[vd], &vrs[vb]); |
292 | break; |
293 | case 5: /* vrsqrtefp */ |
294 | vrsqrtefp(&vrs[vd], &vrs[vb]); |
295 | break; |
296 | case 6: /* vexptefp */ |
297 | for (i = 0; i < 4; ++i) |
298 | vrs[vd].u[i] = eexp2(vrs[vb].u[i]); |
299 | break; |
300 | case 7: /* vlogefp */ |
301 | for (i = 0; i < 4; ++i) |
302 | vrs[vd].u[i] = elog2(vrs[vb].u[i]); |
303 | break; |
304 | case 8: /* vrfin */ |
305 | for (i = 0; i < 4; ++i) |
306 | vrs[vd].u[i] = rfin(vrs[vb].u[i]); |
307 | break; |
308 | case 9: /* vrfiz */ |
309 | for (i = 0; i < 4; ++i) |
310 | vrs[vd].u[i] = rfiz(vrs[vb].u[i]); |
311 | break; |
312 | case 10: /* vrfip */ |
313 | for (i = 0; i < 4; ++i) { |
314 | u32 x = vrs[vb].u[i]; |
315 | x = (x & 0x80000000)? rfiz(x): rfii(x); |
316 | vrs[vd].u[i] = x; |
317 | } |
318 | break; |
319 | case 11: /* vrfim */ |
320 | for (i = 0; i < 4; ++i) { |
321 | u32 x = vrs[vb].u[i]; |
322 | x = (x & 0x80000000)? rfii(x): rfiz(x); |
323 | vrs[vd].u[i] = x; |
324 | } |
325 | break; |
326 | case 14: /* vctuxs */ |
327 | for (i = 0; i < 4; ++i) |
328 | vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, |
329 | ¤t->thread.vr_state.vscr.u[3]); |
330 | break; |
331 | case 15: /* vctsxs */ |
332 | for (i = 0; i < 4; ++i) |
333 | vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, |
334 | ¤t->thread.vr_state.vscr.u[3]); |
335 | break; |
336 | default: |
337 | return -EINVAL; |
338 | } |
339 | break; |
340 | case 46: /* vmaddfp */ |
341 | vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); |
342 | break; |
343 | case 47: /* vnmsubfp */ |
344 | vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); |
345 | break; |
346 | default: |
347 | return -EINVAL; |
348 | } |
349 | |
350 | return 0; |
351 | } |
352 | |