1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* visemul.c: Emulation of VIS instructions. |
3 | * |
4 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) |
5 | */ |
6 | #include <linux/kernel.h> |
7 | #include <linux/errno.h> |
8 | #include <linux/thread_info.h> |
9 | #include <linux/perf_event.h> |
10 | |
11 | #include <asm/ptrace.h> |
12 | #include <asm/pstate.h> |
13 | #include <asm/fpumacro.h> |
14 | #include <linux/uaccess.h> |
15 | #include <asm/cacheflush.h> |
16 | |
17 | /* OPF field of various VIS instructions. */ |
18 | |
19 | /* 000111011 - four 16-bit packs */ |
20 | #define FPACK16_OPF 0x03b |
21 | |
22 | /* 000111010 - two 32-bit packs */ |
23 | #define FPACK32_OPF 0x03a |
24 | |
25 | /* 000111101 - four 16-bit packs */ |
26 | #define FPACKFIX_OPF 0x03d |
27 | |
28 | /* 001001101 - four 16-bit expands */ |
29 | #define FEXPAND_OPF 0x04d |
30 | |
31 | /* 001001011 - two 32-bit merges */ |
32 | #define FPMERGE_OPF 0x04b |
33 | |
34 | /* 000110001 - 8-by-16-bit partitioned product */ |
35 | #define FMUL8x16_OPF 0x031 |
36 | |
37 | /* 000110011 - 8-by-16-bit upper alpha partitioned product */ |
38 | #define FMUL8x16AU_OPF 0x033 |
39 | |
40 | /* 000110101 - 8-by-16-bit lower alpha partitioned product */ |
41 | #define FMUL8x16AL_OPF 0x035 |
42 | |
43 | /* 000110110 - upper 8-by-16-bit partitioned product */ |
44 | #define FMUL8SUx16_OPF 0x036 |
45 | |
46 | /* 000110111 - lower 8-by-16-bit partitioned product */ |
47 | #define FMUL8ULx16_OPF 0x037 |
48 | |
49 | /* 000111000 - upper 8-by-16-bit partitioned product */ |
50 | #define FMULD8SUx16_OPF 0x038 |
51 | |
52 | /* 000111001 - lower unsigned 8-by-16-bit partitioned product */ |
53 | #define FMULD8ULx16_OPF 0x039 |
54 | |
55 | /* 000101000 - four 16-bit compare; set rd if src1 > src2 */ |
56 | #define FCMPGT16_OPF 0x028 |
57 | |
58 | /* 000101100 - two 32-bit compare; set rd if src1 > src2 */ |
59 | #define FCMPGT32_OPF 0x02c |
60 | |
61 | /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ |
62 | #define FCMPLE16_OPF 0x020 |
63 | |
64 | /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ |
65 | #define FCMPLE32_OPF 0x024 |
66 | |
67 | /* 000100010 - four 16-bit compare; set rd if src1 != src2 */ |
68 | #define FCMPNE16_OPF 0x022 |
69 | |
70 | /* 000100110 - two 32-bit compare; set rd if src1 != src2 */ |
71 | #define FCMPNE32_OPF 0x026 |
72 | |
73 | /* 000101010 - four 16-bit compare; set rd if src1 == src2 */ |
74 | #define FCMPEQ16_OPF 0x02a |
75 | |
76 | /* 000101110 - two 32-bit compare; set rd if src1 == src2 */ |
77 | #define FCMPEQ32_OPF 0x02e |
78 | |
79 | /* 000000000 - Eight 8-bit edge boundary processing */ |
80 | #define EDGE8_OPF 0x000 |
81 | |
82 | /* 000000001 - Eight 8-bit edge boundary processing, no CC */ |
83 | #define EDGE8N_OPF 0x001 |
84 | |
85 | /* 000000010 - Eight 8-bit edge boundary processing, little-endian */ |
86 | #define EDGE8L_OPF 0x002 |
87 | |
88 | /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ |
89 | #define EDGE8LN_OPF 0x003 |
90 | |
91 | /* 000000100 - Four 16-bit edge boundary processing */ |
92 | #define EDGE16_OPF 0x004 |
93 | |
94 | /* 000000101 - Four 16-bit edge boundary processing, no CC */ |
95 | #define EDGE16N_OPF 0x005 |
96 | |
97 | /* 000000110 - Four 16-bit edge boundary processing, little-endian */ |
98 | #define EDGE16L_OPF 0x006 |
99 | |
100 | /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ |
101 | #define EDGE16LN_OPF 0x007 |
102 | |
103 | /* 000001000 - Two 32-bit edge boundary processing */ |
104 | #define EDGE32_OPF 0x008 |
105 | |
106 | /* 000001001 - Two 32-bit edge boundary processing, no CC */ |
107 | #define EDGE32N_OPF 0x009 |
108 | |
109 | /* 000001010 - Two 32-bit edge boundary processing, little-endian */ |
110 | #define EDGE32L_OPF 0x00a |
111 | |
112 | /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ |
113 | #define EDGE32LN_OPF 0x00b |
114 | |
115 | /* 000111110 - distance between 8 8-bit components */ |
116 | #define PDIST_OPF 0x03e |
117 | |
118 | /* 000010000 - convert 8-bit 3-D address to blocked byte address */ |
119 | #define ARRAY8_OPF 0x010 |
120 | |
121 | /* 000010010 - convert 16-bit 3-D address to blocked byte address */ |
122 | #define ARRAY16_OPF 0x012 |
123 | |
124 | /* 000010100 - convert 32-bit 3-D address to blocked byte address */ |
125 | #define ARRAY32_OPF 0x014 |
126 | |
127 | /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ |
128 | #define BMASK_OPF 0x019 |
129 | |
130 | /* 001001100 - Permute bytes as specified by GSR.MASK */ |
131 | #define BSHUFFLE_OPF 0x04c |
132 | |
133 | #define VIS_OPF_SHIFT 5 |
134 | #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) |
135 | |
136 | #define RS1(INSN) (((INSN) >> 14) & 0x1f) |
137 | #define RS2(INSN) (((INSN) >> 0) & 0x1f) |
138 | #define RD(INSN) (((INSN) >> 25) & 0x1f) |
139 | |
140 | static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, |
141 | unsigned int rd, int from_kernel) |
142 | { |
143 | if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { |
144 | if (from_kernel != 0) |
145 | __asm__ __volatile__("flushw" ); |
146 | else |
147 | flushw_user(); |
148 | } |
149 | } |
150 | |
151 | static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) |
152 | { |
153 | unsigned long value, fp; |
154 | |
155 | if (reg < 16) |
156 | return (!reg ? 0 : regs->u_regs[reg]); |
157 | |
158 | fp = regs->u_regs[UREG_FP]; |
159 | |
160 | if (regs->tstate & TSTATE_PRIV) { |
161 | struct reg_window *win; |
162 | win = (struct reg_window *)(fp + STACK_BIAS); |
163 | value = win->locals[reg - 16]; |
164 | } else if (!test_thread_64bit_stack(fp)) { |
165 | struct reg_window32 __user *win32; |
166 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); |
167 | get_user(value, &win32->locals[reg - 16]); |
168 | } else { |
169 | struct reg_window __user *win; |
170 | win = (struct reg_window __user *)(fp + STACK_BIAS); |
171 | get_user(value, &win->locals[reg - 16]); |
172 | } |
173 | return value; |
174 | } |
175 | |
176 | static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, |
177 | struct pt_regs *regs) |
178 | { |
179 | unsigned long fp = regs->u_regs[UREG_FP]; |
180 | |
181 | BUG_ON(reg < 16); |
182 | BUG_ON(regs->tstate & TSTATE_PRIV); |
183 | |
184 | if (!test_thread_64bit_stack(fp)) { |
185 | struct reg_window32 __user *win32; |
186 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); |
187 | return (unsigned long __user *)&win32->locals[reg - 16]; |
188 | } else { |
189 | struct reg_window __user *win; |
190 | win = (struct reg_window __user *)(fp + STACK_BIAS); |
191 | return &win->locals[reg - 16]; |
192 | } |
193 | } |
194 | |
195 | static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, |
196 | struct pt_regs *regs) |
197 | { |
198 | BUG_ON(reg >= 16); |
199 | BUG_ON(regs->tstate & TSTATE_PRIV); |
200 | |
201 | return ®s->u_regs[reg]; |
202 | } |
203 | |
204 | static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) |
205 | { |
206 | if (rd < 16) { |
207 | unsigned long *rd_kern = __fetch_reg_addr_kern(reg: rd, regs); |
208 | |
209 | *rd_kern = val; |
210 | } else { |
211 | unsigned long __user *rd_user = __fetch_reg_addr_user(reg: rd, regs); |
212 | |
213 | if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) |
214 | __put_user((u32)val, (u32 __user *)rd_user); |
215 | else |
216 | __put_user(val, rd_user); |
217 | } |
218 | } |
219 | |
220 | static inline unsigned long fpd_regval(struct fpustate *f, |
221 | unsigned int insn_regnum) |
222 | { |
223 | insn_regnum = (((insn_regnum & 1) << 5) | |
224 | (insn_regnum & 0x1e)); |
225 | |
226 | return *(unsigned long *) &f->regs[insn_regnum]; |
227 | } |
228 | |
229 | static inline unsigned long *fpd_regaddr(struct fpustate *f, |
230 | unsigned int insn_regnum) |
231 | { |
232 | insn_regnum = (((insn_regnum & 1) << 5) | |
233 | (insn_regnum & 0x1e)); |
234 | |
235 | return (unsigned long *) &f->regs[insn_regnum]; |
236 | } |
237 | |
238 | static inline unsigned int fps_regval(struct fpustate *f, |
239 | unsigned int insn_regnum) |
240 | { |
241 | return f->regs[insn_regnum]; |
242 | } |
243 | |
244 | static inline unsigned int *fps_regaddr(struct fpustate *f, |
245 | unsigned int insn_regnum) |
246 | { |
247 | return &f->regs[insn_regnum]; |
248 | } |
249 | |
250 | struct edge_tab { |
251 | u16 left, right; |
252 | }; |
253 | static struct edge_tab edge8_tab[8] = { |
254 | { 0xff, 0x80 }, |
255 | { 0x7f, 0xc0 }, |
256 | { 0x3f, 0xe0 }, |
257 | { 0x1f, 0xf0 }, |
258 | { 0x0f, 0xf8 }, |
259 | { 0x07, 0xfc }, |
260 | { 0x03, 0xfe }, |
261 | { 0x01, 0xff }, |
262 | }; |
263 | static struct edge_tab edge8_tab_l[8] = { |
264 | { 0xff, 0x01 }, |
265 | { 0xfe, 0x03 }, |
266 | { 0xfc, 0x07 }, |
267 | { 0xf8, 0x0f }, |
268 | { 0xf0, 0x1f }, |
269 | { 0xe0, 0x3f }, |
270 | { 0xc0, 0x7f }, |
271 | { 0x80, 0xff }, |
272 | }; |
273 | static struct edge_tab edge16_tab[4] = { |
274 | { 0xf, 0x8 }, |
275 | { 0x7, 0xc }, |
276 | { 0x3, 0xe }, |
277 | { 0x1, 0xf }, |
278 | }; |
279 | static struct edge_tab edge16_tab_l[4] = { |
280 | { 0xf, 0x1 }, |
281 | { 0xe, 0x3 }, |
282 | { 0xc, 0x7 }, |
283 | { 0x8, 0xf }, |
284 | }; |
285 | static struct edge_tab edge32_tab[2] = { |
286 | { 0x3, 0x2 }, |
287 | { 0x1, 0x3 }, |
288 | }; |
289 | static struct edge_tab edge32_tab_l[2] = { |
290 | { 0x3, 0x1 }, |
291 | { 0x2, 0x3 }, |
292 | }; |
293 | |
294 | static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) |
295 | { |
296 | unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; |
297 | u16 left, right; |
298 | |
299 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), from_kernel: 0); |
300 | orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); |
301 | orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); |
302 | |
303 | if (test_thread_flag(TIF_32BIT)) { |
304 | rs1 = rs1 & 0xffffffff; |
305 | rs2 = rs2 & 0xffffffff; |
306 | } |
307 | switch (opf) { |
308 | default: |
309 | case EDGE8_OPF: |
310 | case EDGE8N_OPF: |
311 | left = edge8_tab[rs1 & 0x7].left; |
312 | right = edge8_tab[rs2 & 0x7].right; |
313 | break; |
314 | case EDGE8L_OPF: |
315 | case EDGE8LN_OPF: |
316 | left = edge8_tab_l[rs1 & 0x7].left; |
317 | right = edge8_tab_l[rs2 & 0x7].right; |
318 | break; |
319 | |
320 | case EDGE16_OPF: |
321 | case EDGE16N_OPF: |
322 | left = edge16_tab[(rs1 >> 1) & 0x3].left; |
323 | right = edge16_tab[(rs2 >> 1) & 0x3].right; |
324 | break; |
325 | |
326 | case EDGE16L_OPF: |
327 | case EDGE16LN_OPF: |
328 | left = edge16_tab_l[(rs1 >> 1) & 0x3].left; |
329 | right = edge16_tab_l[(rs2 >> 1) & 0x3].right; |
330 | break; |
331 | |
332 | case EDGE32_OPF: |
333 | case EDGE32N_OPF: |
334 | left = edge32_tab[(rs1 >> 2) & 0x1].left; |
335 | right = edge32_tab[(rs2 >> 2) & 0x1].right; |
336 | break; |
337 | |
338 | case EDGE32L_OPF: |
339 | case EDGE32LN_OPF: |
340 | left = edge32_tab_l[(rs1 >> 2) & 0x1].left; |
341 | right = edge32_tab_l[(rs2 >> 2) & 0x1].right; |
342 | break; |
343 | } |
344 | |
345 | if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) |
346 | rd_val = right & left; |
347 | else |
348 | rd_val = left; |
349 | |
350 | store_reg(regs, val: rd_val, RD(insn)); |
351 | |
352 | switch (opf) { |
353 | case EDGE8_OPF: |
354 | case EDGE8L_OPF: |
355 | case EDGE16_OPF: |
356 | case EDGE16L_OPF: |
357 | case EDGE32_OPF: |
358 | case EDGE32L_OPF: { |
359 | unsigned long ccr, tstate; |
360 | |
361 | __asm__ __volatile__("subcc %1, %2, %%g0\n\t" |
362 | "rd %%ccr, %0" |
363 | : "=r" (ccr) |
364 | : "r" (orig_rs1), "r" (orig_rs2) |
365 | : "cc" ); |
366 | tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); |
367 | regs->tstate = tstate | (ccr << 32UL); |
368 | } |
369 | } |
370 | } |
371 | |
372 | static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) |
373 | { |
374 | unsigned long rs1, rs2, rd_val; |
375 | unsigned int bits, bits_mask; |
376 | |
377 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), from_kernel: 0); |
378 | rs1 = fetch_reg(RS1(insn), regs); |
379 | rs2 = fetch_reg(RS2(insn), regs); |
380 | |
381 | bits = (rs2 > 5 ? 5 : rs2); |
382 | bits_mask = (1UL << bits) - 1UL; |
383 | |
384 | rd_val = ((((rs1 >> 11) & 0x3) << 0) | |
385 | (((rs1 >> 33) & 0x3) << 2) | |
386 | (((rs1 >> 55) & 0x1) << 4) | |
387 | (((rs1 >> 13) & 0xf) << 5) | |
388 | (((rs1 >> 35) & 0xf) << 9) | |
389 | (((rs1 >> 56) & 0xf) << 13) | |
390 | (((rs1 >> 17) & bits_mask) << 17) | |
391 | (((rs1 >> 39) & bits_mask) << (17 + bits)) | |
392 | (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); |
393 | |
394 | switch (opf) { |
395 | case ARRAY16_OPF: |
396 | rd_val <<= 1; |
397 | break; |
398 | |
399 | case ARRAY32_OPF: |
400 | rd_val <<= 2; |
401 | } |
402 | |
403 | store_reg(regs, val: rd_val, RD(insn)); |
404 | } |
405 | |
406 | static void bmask(struct pt_regs *regs, unsigned int insn) |
407 | { |
408 | unsigned long rs1, rs2, rd_val, gsr; |
409 | |
410 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), from_kernel: 0); |
411 | rs1 = fetch_reg(RS1(insn), regs); |
412 | rs2 = fetch_reg(RS2(insn), regs); |
413 | rd_val = rs1 + rs2; |
414 | |
415 | store_reg(regs, val: rd_val, RD(insn)); |
416 | |
417 | gsr = current_thread_info()->gsr[0] & 0xffffffff; |
418 | gsr |= rd_val << 32UL; |
419 | current_thread_info()->gsr[0] = gsr; |
420 | } |
421 | |
422 | static void bshuffle(struct pt_regs *regs, unsigned int insn) |
423 | { |
424 | struct fpustate *f = FPUSTATE; |
425 | unsigned long rs1, rs2, rd_val; |
426 | unsigned long bmask, i; |
427 | |
428 | bmask = current_thread_info()->gsr[0] >> 32UL; |
429 | |
430 | rs1 = fpd_regval(f, RS1(insn)); |
431 | rs2 = fpd_regval(f, RS2(insn)); |
432 | |
433 | rd_val = 0UL; |
434 | for (i = 0; i < 8; i++) { |
435 | unsigned long which = (bmask >> (i * 4)) & 0xf; |
436 | unsigned long byte; |
437 | |
438 | if (which < 8) |
439 | byte = (rs1 >> (which * 8)) & 0xff; |
440 | else |
441 | byte = (rs2 >> ((which-8)*8)) & 0xff; |
442 | rd_val |= (byte << (i * 8)); |
443 | } |
444 | |
445 | *fpd_regaddr(f, RD(insn)) = rd_val; |
446 | } |
447 | |
448 | static void pdist(struct pt_regs *regs, unsigned int insn) |
449 | { |
450 | struct fpustate *f = FPUSTATE; |
451 | unsigned long rs1, rs2, *rd, rd_val; |
452 | unsigned long i; |
453 | |
454 | rs1 = fpd_regval(f, RS1(insn)); |
455 | rs2 = fpd_regval(f, RS2(insn)); |
456 | rd = fpd_regaddr(f, RD(insn)); |
457 | |
458 | rd_val = *rd; |
459 | |
460 | for (i = 0; i < 8; i++) { |
461 | s16 s1, s2; |
462 | |
463 | s1 = (rs1 >> (56 - (i * 8))) & 0xff; |
464 | s2 = (rs2 >> (56 - (i * 8))) & 0xff; |
465 | |
466 | /* Absolute value of difference. */ |
467 | s1 -= s2; |
468 | if (s1 < 0) |
469 | s1 = ~s1 + 1; |
470 | |
471 | rd_val += s1; |
472 | } |
473 | |
474 | *rd = rd_val; |
475 | } |
476 | |
477 | static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) |
478 | { |
479 | struct fpustate *f = FPUSTATE; |
480 | unsigned long rs1, rs2, gsr, scale, rd_val; |
481 | |
482 | gsr = current_thread_info()->gsr[0]; |
483 | scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); |
484 | switch (opf) { |
485 | case FPACK16_OPF: { |
486 | unsigned long byte; |
487 | |
488 | rs2 = fpd_regval(f, RS2(insn)); |
489 | rd_val = 0; |
490 | for (byte = 0; byte < 4; byte++) { |
491 | unsigned int val; |
492 | s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; |
493 | int scaled = src << scale; |
494 | int from_fixed = scaled >> 7; |
495 | |
496 | val = ((from_fixed < 0) ? |
497 | 0 : |
498 | (from_fixed > 255) ? |
499 | 255 : from_fixed); |
500 | |
501 | rd_val |= (val << (8 * byte)); |
502 | } |
503 | *fps_regaddr(f, RD(insn)) = rd_val; |
504 | break; |
505 | } |
506 | |
507 | case FPACK32_OPF: { |
508 | unsigned long word; |
509 | |
510 | rs1 = fpd_regval(f, RS1(insn)); |
511 | rs2 = fpd_regval(f, RS2(insn)); |
512 | rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); |
513 | for (word = 0; word < 2; word++) { |
514 | unsigned long val; |
515 | s32 src = (rs2 >> (word * 32UL)); |
516 | s64 scaled = src << scale; |
517 | s64 from_fixed = scaled >> 23; |
518 | |
519 | val = ((from_fixed < 0) ? |
520 | 0 : |
521 | (from_fixed > 255) ? |
522 | 255 : from_fixed); |
523 | |
524 | rd_val |= (val << (32 * word)); |
525 | } |
526 | *fpd_regaddr(f, RD(insn)) = rd_val; |
527 | break; |
528 | } |
529 | |
530 | case FPACKFIX_OPF: { |
531 | unsigned long word; |
532 | |
533 | rs2 = fpd_regval(f, RS2(insn)); |
534 | |
535 | rd_val = 0; |
536 | for (word = 0; word < 2; word++) { |
537 | long val; |
538 | s32 src = (rs2 >> (word * 32UL)); |
539 | s64 scaled = src << scale; |
540 | s64 from_fixed = scaled >> 16; |
541 | |
542 | val = ((from_fixed < -32768) ? |
543 | -32768 : |
544 | (from_fixed > 32767) ? |
545 | 32767 : from_fixed); |
546 | |
547 | rd_val |= ((val & 0xffff) << (word * 16)); |
548 | } |
549 | *fps_regaddr(f, RD(insn)) = rd_val; |
550 | break; |
551 | } |
552 | |
553 | case FEXPAND_OPF: { |
554 | unsigned long byte; |
555 | |
556 | rs2 = fps_regval(f, RS2(insn)); |
557 | |
558 | rd_val = 0; |
559 | for (byte = 0; byte < 4; byte++) { |
560 | unsigned long val; |
561 | u8 src = (rs2 >> (byte * 8)) & 0xff; |
562 | |
563 | val = src << 4; |
564 | |
565 | rd_val |= (val << (byte * 16)); |
566 | } |
567 | *fpd_regaddr(f, RD(insn)) = rd_val; |
568 | break; |
569 | } |
570 | |
571 | case FPMERGE_OPF: { |
572 | rs1 = fps_regval(f, RS1(insn)); |
573 | rs2 = fps_regval(f, RS2(insn)); |
574 | |
575 | rd_val = (((rs2 & 0x000000ff) << 0) | |
576 | ((rs1 & 0x000000ff) << 8) | |
577 | ((rs2 & 0x0000ff00) << 8) | |
578 | ((rs1 & 0x0000ff00) << 16) | |
579 | ((rs2 & 0x00ff0000) << 16) | |
580 | ((rs1 & 0x00ff0000) << 24) | |
581 | ((rs2 & 0xff000000) << 24) | |
582 | ((rs1 & 0xff000000) << 32)); |
583 | *fpd_regaddr(f, RD(insn)) = rd_val; |
584 | break; |
585 | } |
586 | } |
587 | } |
588 | |
589 | static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) |
590 | { |
591 | struct fpustate *f = FPUSTATE; |
592 | unsigned long rs1, rs2, rd_val; |
593 | |
594 | switch (opf) { |
595 | case FMUL8x16_OPF: { |
596 | unsigned long byte; |
597 | |
598 | rs1 = fps_regval(f, RS1(insn)); |
599 | rs2 = fpd_regval(f, RS2(insn)); |
600 | |
601 | rd_val = 0; |
602 | for (byte = 0; byte < 4; byte++) { |
603 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; |
604 | s16 src2 = (rs2 >> (byte * 16)) & 0xffff; |
605 | u32 prod = src1 * src2; |
606 | u16 scaled = ((prod & 0x00ffff00) >> 8); |
607 | |
608 | /* Round up. */ |
609 | if (prod & 0x80) |
610 | scaled++; |
611 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); |
612 | } |
613 | |
614 | *fpd_regaddr(f, RD(insn)) = rd_val; |
615 | break; |
616 | } |
617 | |
618 | case FMUL8x16AU_OPF: |
619 | case FMUL8x16AL_OPF: { |
620 | unsigned long byte; |
621 | s16 src2; |
622 | |
623 | rs1 = fps_regval(f, RS1(insn)); |
624 | rs2 = fps_regval(f, RS2(insn)); |
625 | |
626 | rd_val = 0; |
627 | src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0); |
628 | for (byte = 0; byte < 4; byte++) { |
629 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; |
630 | u32 prod = src1 * src2; |
631 | u16 scaled = ((prod & 0x00ffff00) >> 8); |
632 | |
633 | /* Round up. */ |
634 | if (prod & 0x80) |
635 | scaled++; |
636 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); |
637 | } |
638 | |
639 | *fpd_regaddr(f, RD(insn)) = rd_val; |
640 | break; |
641 | } |
642 | |
643 | case FMUL8SUx16_OPF: |
644 | case FMUL8ULx16_OPF: { |
645 | unsigned long byte, ushift; |
646 | |
647 | rs1 = fpd_regval(f, RS1(insn)); |
648 | rs2 = fpd_regval(f, RS2(insn)); |
649 | |
650 | rd_val = 0; |
651 | ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; |
652 | for (byte = 0; byte < 4; byte++) { |
653 | u16 src1; |
654 | s16 src2; |
655 | u32 prod; |
656 | u16 scaled; |
657 | |
658 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); |
659 | src2 = ((rs2 >> (16 * byte)) & 0xffff); |
660 | prod = src1 * src2; |
661 | scaled = ((prod & 0x00ffff00) >> 8); |
662 | |
663 | /* Round up. */ |
664 | if (prod & 0x80) |
665 | scaled++; |
666 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); |
667 | } |
668 | |
669 | *fpd_regaddr(f, RD(insn)) = rd_val; |
670 | break; |
671 | } |
672 | |
673 | case FMULD8SUx16_OPF: |
674 | case FMULD8ULx16_OPF: { |
675 | unsigned long byte, ushift; |
676 | |
677 | rs1 = fps_regval(f, RS1(insn)); |
678 | rs2 = fps_regval(f, RS2(insn)); |
679 | |
680 | rd_val = 0; |
681 | ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; |
682 | for (byte = 0; byte < 2; byte++) { |
683 | u16 src1; |
684 | s16 src2; |
685 | u32 prod; |
686 | u16 scaled; |
687 | |
688 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); |
689 | src2 = ((rs2 >> (16 * byte)) & 0xffff); |
690 | prod = src1 * src2; |
691 | scaled = ((prod & 0x00ffff00) >> 8); |
692 | |
693 | /* Round up. */ |
694 | if (prod & 0x80) |
695 | scaled++; |
696 | rd_val |= ((scaled & 0xffffUL) << |
697 | ((byte * 32UL) + 7UL)); |
698 | } |
699 | *fpd_regaddr(f, RD(insn)) = rd_val; |
700 | break; |
701 | } |
702 | } |
703 | } |
704 | |
705 | static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) |
706 | { |
707 | struct fpustate *f = FPUSTATE; |
708 | unsigned long rs1, rs2, rd_val, i; |
709 | |
710 | rs1 = fpd_regval(f, RS1(insn)); |
711 | rs2 = fpd_regval(f, RS2(insn)); |
712 | |
713 | rd_val = 0; |
714 | |
715 | switch (opf) { |
716 | case FCMPGT16_OPF: |
717 | for (i = 0; i < 4; i++) { |
718 | s16 a = (rs1 >> (i * 16)) & 0xffff; |
719 | s16 b = (rs2 >> (i * 16)) & 0xffff; |
720 | |
721 | if (a > b) |
722 | rd_val |= 8 >> i; |
723 | } |
724 | break; |
725 | |
726 | case FCMPGT32_OPF: |
727 | for (i = 0; i < 2; i++) { |
728 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
729 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; |
730 | |
731 | if (a > b) |
732 | rd_val |= 2 >> i; |
733 | } |
734 | break; |
735 | |
736 | case FCMPLE16_OPF: |
737 | for (i = 0; i < 4; i++) { |
738 | s16 a = (rs1 >> (i * 16)) & 0xffff; |
739 | s16 b = (rs2 >> (i * 16)) & 0xffff; |
740 | |
741 | if (a <= b) |
742 | rd_val |= 8 >> i; |
743 | } |
744 | break; |
745 | |
746 | case FCMPLE32_OPF: |
747 | for (i = 0; i < 2; i++) { |
748 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
749 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; |
750 | |
751 | if (a <= b) |
752 | rd_val |= 2 >> i; |
753 | } |
754 | break; |
755 | |
756 | case FCMPNE16_OPF: |
757 | for (i = 0; i < 4; i++) { |
758 | s16 a = (rs1 >> (i * 16)) & 0xffff; |
759 | s16 b = (rs2 >> (i * 16)) & 0xffff; |
760 | |
761 | if (a != b) |
762 | rd_val |= 8 >> i; |
763 | } |
764 | break; |
765 | |
766 | case FCMPNE32_OPF: |
767 | for (i = 0; i < 2; i++) { |
768 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
769 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; |
770 | |
771 | if (a != b) |
772 | rd_val |= 2 >> i; |
773 | } |
774 | break; |
775 | |
776 | case FCMPEQ16_OPF: |
777 | for (i = 0; i < 4; i++) { |
778 | s16 a = (rs1 >> (i * 16)) & 0xffff; |
779 | s16 b = (rs2 >> (i * 16)) & 0xffff; |
780 | |
781 | if (a == b) |
782 | rd_val |= 8 >> i; |
783 | } |
784 | break; |
785 | |
786 | case FCMPEQ32_OPF: |
787 | for (i = 0; i < 2; i++) { |
788 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
789 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; |
790 | |
791 | if (a == b) |
792 | rd_val |= 2 >> i; |
793 | } |
794 | break; |
795 | } |
796 | |
797 | maybe_flush_windows(rs1: 0, rs2: 0, RD(insn), from_kernel: 0); |
798 | store_reg(regs, val: rd_val, RD(insn)); |
799 | } |
800 | |
801 | /* Emulate the VIS instructions which are not implemented in |
802 | * hardware on Niagara. |
803 | */ |
804 | int vis_emul(struct pt_regs *regs, unsigned int insn) |
805 | { |
806 | unsigned long pc = regs->tpc; |
807 | unsigned int opf; |
808 | |
809 | BUG_ON(regs->tstate & TSTATE_PRIV); |
810 | |
811 | perf_sw_event(event_id: PERF_COUNT_SW_EMULATION_FAULTS, nr: 1, regs, addr: 0); |
812 | |
813 | if (test_thread_flag(TIF_32BIT)) |
814 | pc = (u32)pc; |
815 | |
816 | if (get_user(insn, (u32 __user *) pc)) |
817 | return -EFAULT; |
818 | |
819 | save_and_clear_fpu(); |
820 | |
821 | opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; |
822 | switch (opf) { |
823 | default: |
824 | return -EINVAL; |
825 | |
826 | /* Pixel Formatting Instructions. */ |
827 | case FPACK16_OPF: |
828 | case FPACK32_OPF: |
829 | case FPACKFIX_OPF: |
830 | case FEXPAND_OPF: |
831 | case FPMERGE_OPF: |
832 | pformat(regs, insn, opf); |
833 | break; |
834 | |
835 | /* Partitioned Multiply Instructions */ |
836 | case FMUL8x16_OPF: |
837 | case FMUL8x16AU_OPF: |
838 | case FMUL8x16AL_OPF: |
839 | case FMUL8SUx16_OPF: |
840 | case FMUL8ULx16_OPF: |
841 | case FMULD8SUx16_OPF: |
842 | case FMULD8ULx16_OPF: |
843 | pmul(regs, insn, opf); |
844 | break; |
845 | |
846 | /* Pixel Compare Instructions */ |
847 | case FCMPGT16_OPF: |
848 | case FCMPGT32_OPF: |
849 | case FCMPLE16_OPF: |
850 | case FCMPLE32_OPF: |
851 | case FCMPNE16_OPF: |
852 | case FCMPNE32_OPF: |
853 | case FCMPEQ16_OPF: |
854 | case FCMPEQ32_OPF: |
855 | pcmp(regs, insn, opf); |
856 | break; |
857 | |
858 | /* Edge Handling Instructions */ |
859 | case EDGE8_OPF: |
860 | case EDGE8N_OPF: |
861 | case EDGE8L_OPF: |
862 | case EDGE8LN_OPF: |
863 | case EDGE16_OPF: |
864 | case EDGE16N_OPF: |
865 | case EDGE16L_OPF: |
866 | case EDGE16LN_OPF: |
867 | case EDGE32_OPF: |
868 | case EDGE32N_OPF: |
869 | case EDGE32L_OPF: |
870 | case EDGE32LN_OPF: |
871 | edge(regs, insn, opf); |
872 | break; |
873 | |
874 | /* Pixel Component Distance */ |
875 | case PDIST_OPF: |
876 | pdist(regs, insn); |
877 | break; |
878 | |
879 | /* Three-Dimensional Array Addressing Instructions */ |
880 | case ARRAY8_OPF: |
881 | case ARRAY16_OPF: |
882 | case ARRAY32_OPF: |
883 | array(regs, insn, opf); |
884 | break; |
885 | |
886 | /* Byte Mask and Shuffle Instructions */ |
887 | case BMASK_OPF: |
888 | bmask(regs, insn); |
889 | break; |
890 | |
891 | case BSHUFFLE_OPF: |
892 | bshuffle(regs, insn); |
893 | break; |
894 | } |
895 | |
896 | regs->tpc = regs->tnpc; |
897 | regs->tnpc += 4; |
898 | return 0; |
899 | } |
900 | |