1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * store hypervisor information instruction emulation functions. |
4 | * |
5 | * Copyright IBM Corp. 2016 |
6 | * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> |
7 | */ |
8 | #include <linux/errno.h> |
9 | #include <linux/pagemap.h> |
10 | #include <linux/vmalloc.h> |
11 | #include <linux/syscalls.h> |
12 | #include <linux/mutex.h> |
13 | #include <asm/asm-offsets.h> |
14 | #include <asm/sclp.h> |
15 | #include <asm/diag.h> |
16 | #include <asm/sysinfo.h> |
17 | #include <asm/ebcdic.h> |
18 | #include <asm/facility.h> |
19 | #include <asm/sthyi.h> |
20 | #include "entry.h" |
21 | |
22 | #define DED_WEIGHT 0xffff |
23 | /* |
24 | * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string |
25 | * as they are justified with spaces. |
26 | */ |
27 | #define CP 0xc3d7404040404040UL |
28 | #define IFL 0xc9c6d34040404040UL |
29 | |
30 | enum hdr_flags { |
31 | HDR_NOT_LPAR = 0x10, |
32 | HDR_STACK_INCM = 0x20, |
33 | HDR_STSI_UNAV = 0x40, |
34 | HDR_PERF_UNAV = 0x80, |
35 | }; |
36 | |
37 | enum mac_validity { |
38 | MAC_NAME_VLD = 0x20, |
39 | MAC_ID_VLD = 0x40, |
40 | MAC_CNT_VLD = 0x80, |
41 | }; |
42 | |
43 | enum par_flag { |
44 | PAR_MT_EN = 0x80, |
45 | }; |
46 | |
47 | enum par_validity { |
48 | PAR_GRP_VLD = 0x08, |
49 | PAR_ID_VLD = 0x10, |
50 | PAR_ABS_VLD = 0x20, |
51 | PAR_WGHT_VLD = 0x40, |
52 | PAR_PCNT_VLD = 0x80, |
53 | }; |
54 | |
55 | struct hdr_sctn { |
56 | u8 infhflg1; |
57 | u8 infhflg2; /* reserved */ |
58 | u8 infhval1; /* reserved */ |
59 | u8 infhval2; /* reserved */ |
60 | u8 reserved[3]; |
61 | u8 infhygct; |
62 | u16 infhtotl; |
63 | u16 infhdln; |
64 | u16 infmoff; |
65 | u16 infmlen; |
66 | u16 infpoff; |
67 | u16 infplen; |
68 | u16 infhoff1; |
69 | u16 infhlen1; |
70 | u16 infgoff1; |
71 | u16 infglen1; |
72 | u16 infhoff2; |
73 | u16 infhlen2; |
74 | u16 infgoff2; |
75 | u16 infglen2; |
76 | u16 infhoff3; |
77 | u16 infhlen3; |
78 | u16 infgoff3; |
79 | u16 infglen3; |
80 | u8 reserved2[4]; |
81 | } __packed; |
82 | |
83 | struct mac_sctn { |
84 | u8 infmflg1; /* reserved */ |
85 | u8 infmflg2; /* reserved */ |
86 | u8 infmval1; |
87 | u8 infmval2; /* reserved */ |
88 | u16 infmscps; |
89 | u16 infmdcps; |
90 | u16 infmsifl; |
91 | u16 infmdifl; |
92 | char infmname[8]; |
93 | char infmtype[4]; |
94 | char infmmanu[16]; |
95 | char infmseq[16]; |
96 | char infmpman[4]; |
97 | u8 reserved[4]; |
98 | } __packed; |
99 | |
100 | struct par_sctn { |
101 | u8 infpflg1; |
102 | u8 infpflg2; /* reserved */ |
103 | u8 infpval1; |
104 | u8 infpval2; /* reserved */ |
105 | u16 infppnum; |
106 | u16 infpscps; |
107 | u16 infpdcps; |
108 | u16 infpsifl; |
109 | u16 infpdifl; |
110 | u16 reserved; |
111 | char infppnam[8]; |
112 | u32 infpwbcp; |
113 | u32 infpabcp; |
114 | u32 infpwbif; |
115 | u32 infpabif; |
116 | char infplgnm[8]; |
117 | u32 infplgcp; |
118 | u32 infplgif; |
119 | } __packed; |
120 | |
121 | struct sthyi_sctns { |
122 | struct hdr_sctn hdr; |
123 | struct mac_sctn mac; |
124 | struct par_sctn par; |
125 | } __packed; |
126 | |
127 | struct cpu_inf { |
128 | u64 lpar_cap; |
129 | u64 lpar_grp_cap; |
130 | u64 lpar_weight; |
131 | u64 all_weight; |
132 | int cpu_num_ded; |
133 | int cpu_num_shd; |
134 | }; |
135 | |
136 | struct lpar_cpu_inf { |
137 | struct cpu_inf cp; |
138 | struct cpu_inf ifl; |
139 | }; |
140 | |
141 | /* |
142 | * STHYI requires extensive locking in the higher hypervisors |
143 | * and is very computational/memory expensive. Therefore we |
144 | * cache the retrieved data whose valid period is 1s. |
145 | */ |
146 | #define CACHE_VALID_JIFFIES HZ |
147 | |
148 | struct sthyi_info { |
149 | void *info; |
150 | unsigned long end; |
151 | }; |
152 | |
153 | static DEFINE_MUTEX(sthyi_mutex); |
154 | static struct sthyi_info sthyi_cache; |
155 | |
156 | static inline u64 cpu_id(u8 ctidx, void *diag224_buf) |
157 | { |
158 | return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); |
159 | } |
160 | |
161 | /* |
162 | * Scales the cpu capping from the lpar range to the one expected in |
163 | * sthyi data. |
164 | * |
165 | * diag204 reports a cap in hundredths of processor units. |
166 | * z/VM's range for one core is 0 - 0x10000. |
167 | */ |
168 | static u32 scale_cap(u32 in) |
169 | { |
170 | return (0x10000 * in) / 100; |
171 | } |
172 | |
173 | static void fill_hdr(struct sthyi_sctns *sctns) |
174 | { |
175 | sctns->hdr.infhdln = sizeof(sctns->hdr); |
176 | sctns->hdr.infmoff = sizeof(sctns->hdr); |
177 | sctns->hdr.infmlen = sizeof(sctns->mac); |
178 | sctns->hdr.infplen = sizeof(sctns->par); |
179 | sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; |
180 | sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; |
181 | } |
182 | |
183 | static void fill_stsi_mac(struct sthyi_sctns *sctns, |
184 | struct sysinfo_1_1_1 *sysinfo) |
185 | { |
186 | sclp_ocf_cpc_name_copy(sctns->mac.infmname); |
187 | if (*(u64 *)sctns->mac.infmname != 0) |
188 | sctns->mac.infmval1 |= MAC_NAME_VLD; |
189 | |
190 | if (stsi(sysinfo, 1, 1, 1)) |
191 | return; |
192 | |
193 | memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); |
194 | memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); |
195 | memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); |
196 | memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); |
197 | |
198 | sctns->mac.infmval1 |= MAC_ID_VLD; |
199 | } |
200 | |
201 | static void fill_stsi_par(struct sthyi_sctns *sctns, |
202 | struct sysinfo_2_2_2 *sysinfo) |
203 | { |
204 | if (stsi(sysinfo, 2, 2, 2)) |
205 | return; |
206 | |
207 | sctns->par.infppnum = sysinfo->lpar_number; |
208 | memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); |
209 | |
210 | sctns->par.infpval1 |= PAR_ID_VLD; |
211 | } |
212 | |
213 | static void fill_stsi(struct sthyi_sctns *sctns) |
214 | { |
215 | void *sysinfo; |
216 | |
217 | /* Errors are handled through the validity bits in the response. */ |
218 | sysinfo = (void *)__get_free_page(GFP_KERNEL); |
219 | if (!sysinfo) |
220 | return; |
221 | |
222 | fill_stsi_mac(sctns, sysinfo); |
223 | fill_stsi_par(sctns, sysinfo); |
224 | |
225 | free_pages(addr: (unsigned long)sysinfo, order: 0); |
226 | } |
227 | |
228 | static void fill_diag_mac(struct sthyi_sctns *sctns, |
229 | struct diag204_x_phys_block *block, |
230 | void *diag224_buf) |
231 | { |
232 | int i; |
233 | |
234 | for (i = 0; i < block->hdr.cpus; i++) { |
235 | switch (cpu_id(ctidx: block->cpus[i].ctidx, diag224_buf)) { |
236 | case CP: |
237 | if (block->cpus[i].weight == DED_WEIGHT) |
238 | sctns->mac.infmdcps++; |
239 | else |
240 | sctns->mac.infmscps++; |
241 | break; |
242 | case IFL: |
243 | if (block->cpus[i].weight == DED_WEIGHT) |
244 | sctns->mac.infmdifl++; |
245 | else |
246 | sctns->mac.infmsifl++; |
247 | break; |
248 | } |
249 | } |
250 | sctns->mac.infmval1 |= MAC_CNT_VLD; |
251 | } |
252 | |
253 | /* Returns a pointer to the the next partition block. */ |
254 | static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, |
255 | bool this_lpar, |
256 | void *diag224_buf, |
257 | struct diag204_x_part_block *block) |
258 | { |
259 | int i, capped = 0, weight_cp = 0, weight_ifl = 0; |
260 | struct cpu_inf *cpu_inf; |
261 | |
262 | for (i = 0; i < block->hdr.rcpus; i++) { |
263 | if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) |
264 | continue; |
265 | |
266 | switch (cpu_id(ctidx: block->cpus[i].ctidx, diag224_buf)) { |
267 | case CP: |
268 | cpu_inf = &part_inf->cp; |
269 | if (block->cpus[i].cur_weight < DED_WEIGHT) |
270 | weight_cp |= block->cpus[i].cur_weight; |
271 | break; |
272 | case IFL: |
273 | cpu_inf = &part_inf->ifl; |
274 | if (block->cpus[i].cur_weight < DED_WEIGHT) |
275 | weight_ifl |= block->cpus[i].cur_weight; |
276 | break; |
277 | default: |
278 | continue; |
279 | } |
280 | |
281 | if (!this_lpar) |
282 | continue; |
283 | |
284 | capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; |
285 | cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; |
286 | cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; |
287 | |
288 | if (block->cpus[i].weight == DED_WEIGHT) |
289 | cpu_inf->cpu_num_ded += 1; |
290 | else |
291 | cpu_inf->cpu_num_shd += 1; |
292 | } |
293 | |
294 | if (this_lpar && capped) { |
295 | part_inf->cp.lpar_weight = weight_cp; |
296 | part_inf->ifl.lpar_weight = weight_ifl; |
297 | } |
298 | part_inf->cp.all_weight += weight_cp; |
299 | part_inf->ifl.all_weight += weight_ifl; |
300 | return (struct diag204_x_part_block *)&block->cpus[i]; |
301 | } |
302 | |
303 | static void fill_diag(struct sthyi_sctns *sctns) |
304 | { |
305 | int i, r, pages; |
306 | bool this_lpar; |
307 | void *diag204_buf; |
308 | void *diag224_buf = NULL; |
309 | struct diag204_x_info_blk_hdr *ti_hdr; |
310 | struct diag204_x_part_block *part_block; |
311 | struct diag204_x_phys_block *phys_block; |
312 | struct lpar_cpu_inf lpar_inf = {}; |
313 | |
314 | /* Errors are handled through the validity bits in the response. */ |
315 | pages = diag204((unsigned long)DIAG204_SUBC_RSI | |
316 | (unsigned long)DIAG204_INFO_EXT, 0, NULL); |
317 | if (pages <= 0) |
318 | return; |
319 | |
320 | diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE), |
321 | PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, |
322 | caller: __builtin_return_address(0)); |
323 | if (!diag204_buf) |
324 | return; |
325 | |
326 | r = diag204((unsigned long)DIAG204_SUBC_STIB7 | |
327 | (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); |
328 | if (r < 0) |
329 | goto out; |
330 | |
331 | diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); |
332 | if (!diag224_buf || diag224(diag224_buf)) |
333 | goto out; |
334 | |
335 | ti_hdr = diag204_buf; |
336 | part_block = diag204_buf + sizeof(*ti_hdr); |
337 | |
338 | for (i = 0; i < ti_hdr->npar; i++) { |
339 | /* |
340 | * For the calling lpar we also need to get the cpu |
341 | * caps and weights. The time information block header |
342 | * specifies the offset to the partition block of the |
343 | * caller lpar, so we know when we process its data. |
344 | */ |
345 | this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; |
346 | part_block = lpar_cpu_inf(part_inf: &lpar_inf, this_lpar, diag224_buf, |
347 | block: part_block); |
348 | } |
349 | |
350 | phys_block = (struct diag204_x_phys_block *)part_block; |
351 | part_block = diag204_buf + ti_hdr->this_part; |
352 | if (part_block->hdr.mtid) |
353 | sctns->par.infpflg1 = PAR_MT_EN; |
354 | |
355 | sctns->par.infpval1 |= PAR_GRP_VLD; |
356 | sctns->par.infplgcp = scale_cap(in: lpar_inf.cp.lpar_grp_cap); |
357 | sctns->par.infplgif = scale_cap(in: lpar_inf.ifl.lpar_grp_cap); |
358 | memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, |
359 | sizeof(sctns->par.infplgnm)); |
360 | |
361 | sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; |
362 | sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; |
363 | sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; |
364 | sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; |
365 | sctns->par.infpval1 |= PAR_PCNT_VLD; |
366 | |
367 | sctns->par.infpabcp = scale_cap(in: lpar_inf.cp.lpar_cap); |
368 | sctns->par.infpabif = scale_cap(in: lpar_inf.ifl.lpar_cap); |
369 | sctns->par.infpval1 |= PAR_ABS_VLD; |
370 | |
371 | /* |
372 | * Everything below needs global performance data to be |
373 | * meaningful. |
374 | */ |
375 | if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { |
376 | sctns->hdr.infhflg1 |= HDR_PERF_UNAV; |
377 | goto out; |
378 | } |
379 | |
380 | fill_diag_mac(sctns, block: phys_block, diag224_buf); |
381 | |
382 | if (lpar_inf.cp.lpar_weight) { |
383 | sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * |
384 | lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; |
385 | } |
386 | |
387 | if (lpar_inf.ifl.lpar_weight) { |
388 | sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * |
389 | lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; |
390 | } |
391 | sctns->par.infpval1 |= PAR_WGHT_VLD; |
392 | |
393 | out: |
394 | free_page((unsigned long)diag224_buf); |
395 | vfree(addr: diag204_buf); |
396 | } |
397 | |
398 | static int sthyi(u64 vaddr, u64 *rc) |
399 | { |
400 | union register_pair r1 = { .even = 0, }; /* subcode */ |
401 | union register_pair r2 = { .even = vaddr, }; |
402 | int cc; |
403 | |
404 | asm volatile( |
405 | ".insn rre,0xB2560000,%[r1],%[r2]\n" |
406 | "ipm %[cc]\n" |
407 | "srl %[cc],28\n" |
408 | : [cc] "=&d" (cc), [r2] "+&d" (r2.pair) |
409 | : [r1] "d" (r1.pair) |
410 | : "memory" , "cc" ); |
411 | *rc = r2.odd; |
412 | return cc; |
413 | } |
414 | |
415 | static int fill_dst(void *dst, u64 *rc) |
416 | { |
417 | struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; |
418 | |
419 | /* |
420 | * If the facility is on, we don't want to emulate the instruction. |
421 | * We ask the hypervisor to provide the data. |
422 | */ |
423 | if (test_facility(74)) |
424 | return sthyi(vaddr: (u64)dst, rc); |
425 | |
426 | fill_hdr(sctns); |
427 | fill_stsi(sctns); |
428 | fill_diag(sctns); |
429 | *rc = 0; |
430 | return 0; |
431 | } |
432 | |
433 | static int sthyi_init_cache(void) |
434 | { |
435 | if (sthyi_cache.info) |
436 | return 0; |
437 | sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL); |
438 | if (!sthyi_cache.info) |
439 | return -ENOMEM; |
440 | sthyi_cache.end = jiffies - 1; /* expired */ |
441 | return 0; |
442 | } |
443 | |
444 | static int sthyi_update_cache(u64 *rc) |
445 | { |
446 | int r; |
447 | |
448 | memset(sthyi_cache.info, 0, PAGE_SIZE); |
449 | r = fill_dst(dst: sthyi_cache.info, rc); |
450 | if (r) |
451 | return r; |
452 | sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; |
453 | return r; |
454 | } |
455 | |
456 | /* |
457 | * sthyi_fill - Fill page with data returned by the STHYI instruction |
458 | * |
459 | * @dst: Pointer to zeroed page |
460 | * @rc: Pointer for storing the return code of the instruction |
461 | * |
462 | * Fills the destination with system information returned by the STHYI |
463 | * instruction. The data is generated by emulation or execution of STHYI, |
464 | * if available. The return value is either a negative error value or |
465 | * the condition code that would be returned, the rc parameter is the |
466 | * return code which is passed in register R2 + 1. |
467 | */ |
468 | int sthyi_fill(void *dst, u64 *rc) |
469 | { |
470 | int r; |
471 | |
472 | mutex_lock(&sthyi_mutex); |
473 | r = sthyi_init_cache(); |
474 | if (r) |
475 | goto out; |
476 | |
477 | if (time_is_before_jiffies(sthyi_cache.end)) { |
478 | /* cache expired */ |
479 | r = sthyi_update_cache(rc); |
480 | if (r) |
481 | goto out; |
482 | } |
483 | *rc = 0; |
484 | memcpy(dst, sthyi_cache.info, PAGE_SIZE); |
485 | out: |
486 | mutex_unlock(lock: &sthyi_mutex); |
487 | return r; |
488 | } |
489 | EXPORT_SYMBOL_GPL(sthyi_fill); |
490 | |
491 | SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer, |
492 | u64 __user *, return_code, unsigned long, flags) |
493 | { |
494 | u64 sthyi_rc; |
495 | void *info; |
496 | int r; |
497 | |
498 | if (flags) |
499 | return -EINVAL; |
500 | if (function_code != STHYI_FC_CP_IFL_CAP) |
501 | return -EOPNOTSUPP; |
502 | info = (void *)get_zeroed_page(GFP_KERNEL); |
503 | if (!info) |
504 | return -ENOMEM; |
505 | r = sthyi_fill(info, &sthyi_rc); |
506 | if (r < 0) |
507 | goto out; |
508 | if (return_code && put_user(sthyi_rc, return_code)) { |
509 | r = -EFAULT; |
510 | goto out; |
511 | } |
512 | if (copy_to_user(to: buffer, from: info, PAGE_SIZE)) |
513 | r = -EFAULT; |
514 | out: |
515 | free_page((unsigned long)info); |
516 | return r; |
517 | } |
518 | |