1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright 2020-21 IBM Corp. |
4 | */ |
5 | |
6 | #define pr_fmt(fmt) "vas: " fmt |
7 | |
8 | #include <linux/module.h> |
9 | #include <linux/kernel.h> |
10 | #include <linux/export.h> |
11 | #include <linux/types.h> |
12 | #include <linux/delay.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/interrupt.h> |
15 | #include <linux/irqdomain.h> |
16 | #include <asm/machdep.h> |
17 | #include <asm/hvcall.h> |
18 | #include <asm/plpar_wrappers.h> |
19 | #include <asm/firmware.h> |
20 | #include <asm/vphn.h> |
21 | #include <asm/vas.h> |
22 | #include "vas.h" |
23 | |
24 | #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul |
25 | #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul |
26 | /* The hypervisor allows one credit per window right now */ |
27 | #define DEF_WIN_CREDS 1 |
28 | |
29 | static struct vas_all_caps caps_all; |
30 | static bool copypaste_feat; |
31 | static struct hv_vas_cop_feat_caps hv_cop_caps; |
32 | |
33 | static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; |
34 | static DEFINE_MUTEX(vas_pseries_mutex); |
35 | static bool migration_in_progress; |
36 | |
37 | static long hcall_return_busy_check(long rc) |
38 | { |
39 | /* Check if we are stalled for some time */ |
40 | if (H_IS_LONG_BUSY(rc)) { |
41 | msleep(msecs: get_longbusy_msecs(rc)); |
42 | rc = H_BUSY; |
43 | } else if (rc == H_BUSY) { |
44 | cond_resched(); |
45 | } |
46 | |
47 | return rc; |
48 | } |
49 | |
50 | /* |
51 | * Allocate VAS window hcall |
52 | */ |
53 | static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, |
54 | u8 wintype, u16 credits) |
55 | { |
56 | long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; |
57 | long rc; |
58 | |
59 | do { |
60 | rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, |
61 | credits, domain[0], domain[1], domain[2], |
62 | domain[3], domain[4], domain[5]); |
63 | |
64 | rc = hcall_return_busy_check(rc); |
65 | } while (rc == H_BUSY); |
66 | |
67 | if (rc == H_SUCCESS) { |
68 | if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { |
69 | pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n" ); |
70 | return -ENOTSUPP; |
71 | } |
72 | win->vas_win.winid = retbuf[0]; |
73 | win->win_addr = retbuf[1]; |
74 | win->complete_irq = retbuf[2]; |
75 | win->fault_irq = retbuf[3]; |
76 | return 0; |
77 | } |
78 | |
79 | pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n" , |
80 | rc, wintype, credits); |
81 | |
82 | return -EIO; |
83 | } |
84 | |
85 | /* |
86 | * Deallocate VAS window hcall. |
87 | */ |
88 | static int h_deallocate_vas_window(u64 winid) |
89 | { |
90 | long rc; |
91 | |
92 | do { |
93 | rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); |
94 | |
95 | rc = hcall_return_busy_check(rc); |
96 | } while (rc == H_BUSY); |
97 | |
98 | if (rc == H_SUCCESS) |
99 | return 0; |
100 | |
101 | pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n" , |
102 | rc, winid); |
103 | return -EIO; |
104 | } |
105 | |
106 | /* |
107 | * Modify VAS window. |
108 | * After the window is opened with allocate window hcall, configure it |
109 | * with flags and LPAR PID before using. |
110 | */ |
111 | static int h_modify_vas_window(struct pseries_vas_window *win) |
112 | { |
113 | long rc; |
114 | |
115 | /* |
116 | * AMR value is not supported in Linux VAS implementation. |
117 | * The hypervisor ignores it if 0 is passed. |
118 | */ |
119 | do { |
120 | rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, |
121 | win->vas_win.winid, win->pid, 0, |
122 | VAS_MOD_WIN_FLAGS, 0); |
123 | |
124 | rc = hcall_return_busy_check(rc); |
125 | } while (rc == H_BUSY); |
126 | |
127 | if (rc == H_SUCCESS) |
128 | return 0; |
129 | |
130 | pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n" , |
131 | rc, win->vas_win.winid, win->pid); |
132 | return -EIO; |
133 | } |
134 | |
135 | /* |
136 | * This hcall is used to determine the capabilities from the hypervisor. |
137 | * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES |
138 | * @query_type: If 0 is passed, the hypervisor returns the overall |
139 | * capabilities which provides all feature(s) that are |
140 | * available. Then query the hypervisor to get the |
141 | * corresponding capabilities for the specific feature. |
142 | * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS |
143 | * and VAS GZIP Default capabilities. |
144 | * H_QUERY_NX_CAPABILITIES provides NX GZIP |
145 | * capabilities. |
146 | * @result: Return buffer to save capabilities. |
147 | */ |
148 | int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) |
149 | { |
150 | long rc; |
151 | |
152 | rc = plpar_hcall_norets(hcall, query_type, result); |
153 | |
154 | if (rc == H_SUCCESS) |
155 | return 0; |
156 | |
157 | /* H_FUNCTION means HV does not support VAS so don't print an error */ |
158 | if (rc != H_FUNCTION) { |
159 | pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n" , |
160 | (hcall == H_QUERY_VAS_CAPABILITIES) ? |
161 | "H_QUERY_VAS_CAPABILITIES" : |
162 | "H_QUERY_NX_CAPABILITIES" , |
163 | rc, query_type, result); |
164 | } |
165 | |
166 | return -EIO; |
167 | } |
168 | EXPORT_SYMBOL_GPL(h_query_vas_capabilities); |
169 | |
170 | /* |
171 | * hcall to get fault CRB from the hypervisor. |
172 | */ |
173 | static int h_get_nx_fault(u32 winid, u64 buffer) |
174 | { |
175 | long rc; |
176 | |
177 | rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); |
178 | |
179 | if (rc == H_SUCCESS) |
180 | return 0; |
181 | |
182 | pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n" , |
183 | rc, winid, buffer); |
184 | return -EIO; |
185 | |
186 | } |
187 | |
188 | /* |
189 | * Handle the fault interrupt. |
190 | * When the fault interrupt is received for each window, query the |
191 | * hypervisor to get the fault CRB on the specific fault. Then |
192 | * process the CRB by updating CSB or send signal if the user space |
193 | * CSB is invalid. |
194 | * Note: The hypervisor forwards an interrupt for each fault request. |
195 | * So one fault CRB to process for each H_GET_NX_FAULT hcall. |
196 | */ |
197 | static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) |
198 | { |
199 | struct pseries_vas_window *txwin = data; |
200 | struct coprocessor_request_block crb; |
201 | struct vas_user_win_ref *tsk_ref; |
202 | int rc; |
203 | |
204 | while (atomic_read(v: &txwin->pending_faults)) { |
205 | rc = h_get_nx_fault(winid: txwin->vas_win.winid, buffer: (u64)virt_to_phys(&crb)); |
206 | if (!rc) { |
207 | tsk_ref = &txwin->vas_win.task_ref; |
208 | vas_dump_crb(&crb); |
209 | vas_update_csb(&crb, tsk_ref); |
210 | } |
211 | atomic_dec(v: &txwin->pending_faults); |
212 | } |
213 | |
214 | return IRQ_HANDLED; |
215 | } |
216 | |
217 | /* |
218 | * irq_default_primary_handler() can be used only with IRQF_ONESHOT |
219 | * which disables IRQ before executing the thread handler and enables |
220 | * it after. But this disabling interrupt sets the VAS IRQ OFF |
221 | * state in the hypervisor. If the NX generates fault interrupt |
222 | * during this window, the hypervisor will not deliver this |
223 | * interrupt to the LPAR. So use VAS specific IRQ handler instead |
224 | * of calling the default primary handler. |
225 | */ |
226 | static irqreturn_t pseries_vas_irq_handler(int irq, void *data) |
227 | { |
228 | struct pseries_vas_window *txwin = data; |
229 | |
230 | /* |
231 | * The thread hanlder will process this interrupt if it is |
232 | * already running. |
233 | */ |
234 | atomic_inc(v: &txwin->pending_faults); |
235 | |
236 | return IRQ_WAKE_THREAD; |
237 | } |
238 | |
239 | /* |
240 | * Allocate window and setup IRQ mapping. |
241 | */ |
242 | static int allocate_setup_window(struct pseries_vas_window *txwin, |
243 | u64 *domain, u8 wintype) |
244 | { |
245 | int rc; |
246 | |
247 | rc = h_allocate_vas_window(win: txwin, domain, wintype, DEF_WIN_CREDS); |
248 | if (rc) |
249 | return rc; |
250 | /* |
251 | * On PowerVM, the hypervisor setup and forwards the fault |
252 | * interrupt per window. So the IRQ setup and fault handling |
253 | * will be done for each open window separately. |
254 | */ |
255 | txwin->fault_virq = irq_create_mapping(NULL, hwirq: txwin->fault_irq); |
256 | if (!txwin->fault_virq) { |
257 | pr_err("Failed irq mapping %d\n" , txwin->fault_irq); |
258 | rc = -EINVAL; |
259 | goto out_win; |
260 | } |
261 | |
262 | txwin->name = kasprintf(GFP_KERNEL, fmt: "vas-win-%d" , |
263 | txwin->vas_win.winid); |
264 | if (!txwin->name) { |
265 | rc = -ENOMEM; |
266 | goto out_irq; |
267 | } |
268 | |
269 | rc = request_threaded_irq(irq: txwin->fault_virq, |
270 | handler: pseries_vas_irq_handler, |
271 | thread_fn: pseries_vas_fault_thread_fn, flags: 0, |
272 | name: txwin->name, dev: txwin); |
273 | if (rc) { |
274 | pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n" , |
275 | txwin->vas_win.winid, txwin->fault_virq, rc); |
276 | goto out_free; |
277 | } |
278 | |
279 | txwin->vas_win.wcreds_max = DEF_WIN_CREDS; |
280 | |
281 | return 0; |
282 | out_free: |
283 | kfree(objp: txwin->name); |
284 | out_irq: |
285 | irq_dispose_mapping(virq: txwin->fault_virq); |
286 | out_win: |
287 | h_deallocate_vas_window(winid: txwin->vas_win.winid); |
288 | return rc; |
289 | } |
290 | |
291 | static inline void free_irq_setup(struct pseries_vas_window *txwin) |
292 | { |
293 | free_irq(txwin->fault_virq, txwin); |
294 | kfree(objp: txwin->name); |
295 | irq_dispose_mapping(virq: txwin->fault_virq); |
296 | } |
297 | |
298 | static struct vas_window *vas_allocate_window(int vas_id, u64 flags, |
299 | enum vas_cop_type cop_type) |
300 | { |
301 | long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; |
302 | struct vas_cop_feat_caps *cop_feat_caps; |
303 | struct vas_caps *caps; |
304 | struct pseries_vas_window *txwin; |
305 | int rc; |
306 | |
307 | txwin = kzalloc(size: sizeof(*txwin), GFP_KERNEL); |
308 | if (!txwin) |
309 | return ERR_PTR(error: -ENOMEM); |
310 | |
311 | /* |
312 | * A VAS window can have many credits which means that many |
313 | * requests can be issued simultaneously. But the hypervisor |
314 | * restricts one credit per window. |
315 | * The hypervisor introduces 2 different types of credits: |
316 | * Default credit type (Uses normal priority FIFO): |
317 | * A limited number of credits are assigned to partitions |
318 | * based on processor entitlement. But these credits may be |
319 | * over-committed on a system depends on whether the CPUs |
320 | * are in shared or dedicated modes - that is, more requests |
321 | * may be issued across the system than NX can service at |
322 | * once which can result in paste command failure (RMA_busy). |
323 | * Then the process has to resend requests or fall-back to |
324 | * SW compression. |
325 | * Quality of Service (QoS) credit type (Uses high priority FIFO): |
326 | * To avoid NX HW contention, the system admins can assign |
327 | * QoS credits for each LPAR so that this partition is |
328 | * guaranteed access to NX resources. These credits are |
329 | * assigned to partitions via the HMC. |
330 | * Refer PAPR for more information. |
331 | * |
332 | * Allocate window with QoS credits if user requested. Otherwise |
333 | * default credits are used. |
334 | */ |
335 | if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) |
336 | caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; |
337 | else |
338 | caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; |
339 | |
340 | cop_feat_caps = &caps->caps; |
341 | |
342 | if (atomic_inc_return(v: &cop_feat_caps->nr_used_credits) > |
343 | atomic_read(v: &cop_feat_caps->nr_total_credits)) { |
344 | pr_err_ratelimited("Credits are not available to allocate window\n" ); |
345 | rc = -EINVAL; |
346 | goto out; |
347 | } |
348 | |
349 | if (vas_id == -1) { |
350 | /* |
351 | * The user space is requesting to allocate a window on |
352 | * a VAS instance where the process is executing. |
353 | * On PowerVM, domain values are passed to the hypervisor |
354 | * to select VAS instance. Useful if the process is |
355 | * affinity to NUMA node. |
356 | * The hypervisor selects VAS instance if |
357 | * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. |
358 | * The h_allocate_vas_window hcall is defined to take a |
359 | * domain values as specified by h_home_node_associativity, |
360 | * So no unpacking needs to be done. |
361 | */ |
362 | rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, |
363 | VPHN_FLAG_VCPU, hard_smp_processor_id()); |
364 | if (rc != H_SUCCESS) { |
365 | pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n" , rc); |
366 | goto out; |
367 | } |
368 | } |
369 | |
370 | txwin->pid = mfspr(SPRN_PID); |
371 | |
372 | /* |
373 | * Allocate / Deallocate window hcalls and setup / free IRQs |
374 | * have to be protected with mutex. |
375 | * Open VAS window: Allocate window hcall and setup IRQ |
376 | * Close VAS window: Deallocate window hcall and free IRQ |
377 | * The hypervisor waits until all NX requests are |
378 | * completed before closing the window. So expects OS |
379 | * to handle NX faults, means IRQ can be freed only |
380 | * after the deallocate window hcall is returned. |
381 | * So once the window is closed with deallocate hcall before |
382 | * the IRQ is freed, it can be assigned to new allocate |
383 | * hcall with the same fault IRQ by the hypervisor. It can |
384 | * result in setup IRQ fail for the new window since the |
385 | * same fault IRQ is not freed by the OS before. |
386 | */ |
387 | mutex_lock(&vas_pseries_mutex); |
388 | if (migration_in_progress) { |
389 | rc = -EBUSY; |
390 | } else { |
391 | rc = allocate_setup_window(txwin, domain: (u64 *)&domain[0], |
392 | wintype: cop_feat_caps->win_type); |
393 | if (!rc) |
394 | caps->nr_open_wins_progress++; |
395 | } |
396 | |
397 | mutex_unlock(lock: &vas_pseries_mutex); |
398 | if (rc) |
399 | goto out; |
400 | |
401 | /* |
402 | * Modify window and it is ready to use. |
403 | */ |
404 | rc = h_modify_vas_window(win: txwin); |
405 | if (!rc) |
406 | rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); |
407 | if (rc) |
408 | goto out_free; |
409 | |
410 | txwin->win_type = cop_feat_caps->win_type; |
411 | |
412 | /* |
413 | * The migration SUSPEND thread sets migration_in_progress and |
414 | * closes all open windows from the list. But the window is |
415 | * added to the list after open and modify HCALLs. So possible |
416 | * that migration_in_progress is set before modify HCALL which |
417 | * may cause some windows are still open when the hypervisor |
418 | * initiates the migration. |
419 | * So checks the migration_in_progress flag again and close all |
420 | * open windows. |
421 | * |
422 | * Possible to lose the acquired credit with DLPAR core |
423 | * removal after the window is opened. So if there are any |
424 | * closed windows (means with lost credits), do not give new |
425 | * window to user space. New windows will be opened only |
426 | * after the existing windows are reopened when credits are |
427 | * available. |
428 | */ |
429 | mutex_lock(&vas_pseries_mutex); |
430 | if (!caps->nr_close_wins && !migration_in_progress) { |
431 | list_add(new: &txwin->win_list, head: &caps->list); |
432 | caps->nr_open_windows++; |
433 | caps->nr_open_wins_progress--; |
434 | mutex_unlock(lock: &vas_pseries_mutex); |
435 | vas_user_win_add_mm_context(&txwin->vas_win.task_ref); |
436 | return &txwin->vas_win; |
437 | } |
438 | mutex_unlock(lock: &vas_pseries_mutex); |
439 | |
440 | put_vas_user_win_ref(&txwin->vas_win.task_ref); |
441 | rc = -EBUSY; |
442 | pr_err_ratelimited("No credit is available to allocate window\n" ); |
443 | |
444 | out_free: |
445 | /* |
446 | * Window is not operational. Free IRQ before closing |
447 | * window so that do not have to hold mutex. |
448 | */ |
449 | free_irq_setup(txwin); |
450 | h_deallocate_vas_window(winid: txwin->vas_win.winid); |
451 | /* |
452 | * Hold mutex and reduce nr_open_wins_progress counter. |
453 | */ |
454 | mutex_lock(&vas_pseries_mutex); |
455 | caps->nr_open_wins_progress--; |
456 | mutex_unlock(lock: &vas_pseries_mutex); |
457 | out: |
458 | atomic_dec(v: &cop_feat_caps->nr_used_credits); |
459 | kfree(objp: txwin); |
460 | return ERR_PTR(error: rc); |
461 | } |
462 | |
463 | static u64 vas_paste_address(struct vas_window *vwin) |
464 | { |
465 | struct pseries_vas_window *win; |
466 | |
467 | win = container_of(vwin, struct pseries_vas_window, vas_win); |
468 | return win->win_addr; |
469 | } |
470 | |
471 | static int deallocate_free_window(struct pseries_vas_window *win) |
472 | { |
473 | int rc = 0; |
474 | |
475 | /* |
476 | * The hypervisor waits for all requests including faults |
477 | * are processed before closing the window - Means all |
478 | * credits have to be returned. In the case of fault |
479 | * request, a credit is returned after OS issues |
480 | * H_GET_NX_FAULT hcall. |
481 | * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW |
482 | * hcall. |
483 | */ |
484 | rc = h_deallocate_vas_window(winid: win->vas_win.winid); |
485 | if (!rc) |
486 | free_irq_setup(txwin: win); |
487 | |
488 | return rc; |
489 | } |
490 | |
491 | static int vas_deallocate_window(struct vas_window *vwin) |
492 | { |
493 | struct pseries_vas_window *win; |
494 | struct vas_cop_feat_caps *caps; |
495 | int rc = 0; |
496 | |
497 | if (!vwin) |
498 | return -EINVAL; |
499 | |
500 | win = container_of(vwin, struct pseries_vas_window, vas_win); |
501 | |
502 | /* Should not happen */ |
503 | if (win->win_type >= VAS_MAX_FEAT_TYPE) { |
504 | pr_err("Window (%u): Invalid window type %u\n" , |
505 | vwin->winid, win->win_type); |
506 | return -EINVAL; |
507 | } |
508 | |
509 | caps = &vascaps[win->win_type].caps; |
510 | mutex_lock(&vas_pseries_mutex); |
511 | /* |
512 | * VAS window is already closed in the hypervisor when |
513 | * lost the credit or with migration. So just remove the entry |
514 | * from the list, remove task references and free vas_window |
515 | * struct. |
516 | */ |
517 | if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && |
518 | !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { |
519 | rc = deallocate_free_window(win); |
520 | if (rc) { |
521 | mutex_unlock(lock: &vas_pseries_mutex); |
522 | return rc; |
523 | } |
524 | } else |
525 | vascaps[win->win_type].nr_close_wins--; |
526 | |
527 | list_del(entry: &win->win_list); |
528 | atomic_dec(v: &caps->nr_used_credits); |
529 | vascaps[win->win_type].nr_open_windows--; |
530 | mutex_unlock(lock: &vas_pseries_mutex); |
531 | |
532 | mm_context_remove_vas_window(vwin->task_ref.mm); |
533 | put_vas_user_win_ref(&vwin->task_ref); |
534 | |
535 | kfree(objp: win); |
536 | return 0; |
537 | } |
538 | |
539 | static const struct vas_user_win_ops vops_pseries = { |
540 | .open_win = vas_allocate_window, /* Open and configure window */ |
541 | .paste_addr = vas_paste_address, /* To do copy/paste */ |
542 | .close_win = vas_deallocate_window, /* Close window */ |
543 | }; |
544 | |
545 | /* |
546 | * Supporting only nx-gzip coprocessor type now, but this API code |
547 | * extended to other coprocessor types later. |
548 | */ |
549 | int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, |
550 | const char *name) |
551 | { |
552 | if (!copypaste_feat) |
553 | return -ENOTSUPP; |
554 | |
555 | return vas_register_coproc_api(mod, cop_type, name, &vops_pseries); |
556 | } |
557 | EXPORT_SYMBOL_GPL(vas_register_api_pseries); |
558 | |
559 | void vas_unregister_api_pseries(void) |
560 | { |
561 | vas_unregister_coproc_api(); |
562 | } |
563 | EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); |
564 | |
565 | /* |
566 | * Get the specific capabilities based on the feature type. |
567 | * Right now supports GZIP default and GZIP QoS capabilities. |
568 | */ |
569 | static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, |
570 | struct hv_vas_cop_feat_caps *hv_caps) |
571 | { |
572 | struct vas_cop_feat_caps *caps; |
573 | struct vas_caps *vcaps; |
574 | int rc = 0; |
575 | |
576 | vcaps = &vascaps[type]; |
577 | memset(vcaps, 0, sizeof(*vcaps)); |
578 | INIT_LIST_HEAD(list: &vcaps->list); |
579 | |
580 | vcaps->feat = feat; |
581 | caps = &vcaps->caps; |
582 | |
583 | rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, |
584 | (u64)virt_to_phys(hv_caps)); |
585 | if (rc) |
586 | return rc; |
587 | |
588 | caps->user_mode = hv_caps->user_mode; |
589 | if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { |
590 | pr_err("User space COPY/PASTE is not supported\n" ); |
591 | return -ENOTSUPP; |
592 | } |
593 | |
594 | caps->descriptor = be64_to_cpu(hv_caps->descriptor); |
595 | caps->win_type = hv_caps->win_type; |
596 | if (caps->win_type >= VAS_MAX_FEAT_TYPE) { |
597 | pr_err("Unsupported window type %u\n" , caps->win_type); |
598 | return -EINVAL; |
599 | } |
600 | caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); |
601 | caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); |
602 | atomic_set(v: &caps->nr_total_credits, |
603 | be16_to_cpu(hv_caps->target_lpar_creds)); |
604 | if (feat == VAS_GZIP_DEF_FEAT) { |
605 | caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); |
606 | |
607 | if (caps->max_win_creds < DEF_WIN_CREDS) { |
608 | pr_err("Window creds(%u) > max allowed window creds(%u)\n" , |
609 | DEF_WIN_CREDS, caps->max_win_creds); |
610 | return -EINVAL; |
611 | } |
612 | } |
613 | |
614 | rc = sysfs_add_vas_caps(caps); |
615 | if (rc) |
616 | return rc; |
617 | |
618 | copypaste_feat = true; |
619 | |
620 | return 0; |
621 | } |
622 | |
623 | /* |
624 | * VAS windows can be closed due to lost credits when the core is |
625 | * removed. So reopen them if credits are available due to DLPAR |
626 | * core add and set the window active status. When NX sees the page |
627 | * fault on the unmapped paste address, the kernel handles the fault |
628 | * by setting the remapping to new paste address if the window is |
629 | * active. |
630 | */ |
631 | static int reconfig_open_windows(struct vas_caps *vcaps, int creds, |
632 | bool migrate) |
633 | { |
634 | long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; |
635 | struct vas_cop_feat_caps *caps = &vcaps->caps; |
636 | struct pseries_vas_window *win = NULL, *tmp; |
637 | int rc, mv_ents = 0; |
638 | int flag; |
639 | |
640 | /* |
641 | * Nothing to do if there are no closed windows. |
642 | */ |
643 | if (!vcaps->nr_close_wins) |
644 | return 0; |
645 | |
646 | /* |
647 | * For the core removal, the hypervisor reduces the credits |
648 | * assigned to the LPAR and the kernel closes VAS windows |
649 | * in the hypervisor depends on reduced credits. The kernel |
650 | * uses LIFO (the last windows that are opened will be closed |
651 | * first) and expects to open in the same order when credits |
652 | * are available. |
653 | * For example, 40 windows are closed when the LPAR lost 2 cores |
654 | * (dedicated). If 1 core is added, this LPAR can have 20 more |
655 | * credits. It means the kernel can reopen 20 windows. So move |
656 | * 20 entries in the VAS windows lost and reopen next 20 windows. |
657 | * For partition migration, reopen all windows that are closed |
658 | * during resume. |
659 | */ |
660 | if ((vcaps->nr_close_wins > creds) && !migrate) |
661 | mv_ents = vcaps->nr_close_wins - creds; |
662 | |
663 | list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { |
664 | if (!mv_ents) |
665 | break; |
666 | |
667 | mv_ents--; |
668 | } |
669 | |
670 | /* |
671 | * Open windows if they are closed only with migration or |
672 | * DLPAR (lost credit) before. |
673 | */ |
674 | if (migrate) |
675 | flag = VAS_WIN_MIGRATE_CLOSE; |
676 | else |
677 | flag = VAS_WIN_NO_CRED_CLOSE; |
678 | |
679 | list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { |
680 | /* |
681 | * This window is closed with DLPAR and migration events. |
682 | * So reopen the window with the last event. |
683 | * The user space is not suspended with the current |
684 | * migration notifier. So the user space can issue DLPAR |
685 | * CPU hotplug while migration in progress. In this case |
686 | * this window will be opened with the last event. |
687 | */ |
688 | if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && |
689 | (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { |
690 | win->vas_win.status &= ~flag; |
691 | continue; |
692 | } |
693 | |
694 | /* |
695 | * Nothing to do on this window if it is not closed |
696 | * with this flag |
697 | */ |
698 | if (!(win->vas_win.status & flag)) |
699 | continue; |
700 | |
701 | rc = allocate_setup_window(txwin: win, domain: (u64 *)&domain[0], |
702 | wintype: caps->win_type); |
703 | if (rc) |
704 | return rc; |
705 | |
706 | rc = h_modify_vas_window(win); |
707 | if (rc) |
708 | goto out; |
709 | |
710 | mutex_lock(&win->vas_win.task_ref.mmap_mutex); |
711 | /* |
712 | * Set window status to active |
713 | */ |
714 | win->vas_win.status &= ~flag; |
715 | mutex_unlock(lock: &win->vas_win.task_ref.mmap_mutex); |
716 | win->win_type = caps->win_type; |
717 | if (!--vcaps->nr_close_wins) |
718 | break; |
719 | } |
720 | |
721 | return 0; |
722 | out: |
723 | /* |
724 | * Window modify HCALL failed. So close the window to the |
725 | * hypervisor and return. |
726 | */ |
727 | free_irq_setup(txwin: win); |
728 | h_deallocate_vas_window(winid: win->vas_win.winid); |
729 | return rc; |
730 | } |
731 | |
732 | /* |
733 | * The hypervisor reduces the available credits if the LPAR lost core. It |
734 | * means the excessive windows should not be active and the user space |
735 | * should not be using these windows to send compression requests to NX. |
736 | * So the kernel closes the excessive windows and unmap the paste address |
737 | * such that the user space receives paste instruction failure. Then up to |
738 | * the user space to fall back to SW compression and manage with the |
739 | * existing windows. |
740 | */ |
741 | static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, |
742 | bool migrate) |
743 | { |
744 | struct pseries_vas_window *win, *tmp; |
745 | struct vas_user_win_ref *task_ref; |
746 | struct vm_area_struct *vma; |
747 | int rc = 0, flag; |
748 | |
749 | if (migrate) |
750 | flag = VAS_WIN_MIGRATE_CLOSE; |
751 | else |
752 | flag = VAS_WIN_NO_CRED_CLOSE; |
753 | |
754 | list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { |
755 | /* |
756 | * This window is already closed due to lost credit |
757 | * or for migration before. Go for next window. |
758 | * For migration, nothing to do since this window |
759 | * closed for DLPAR and will be reopened even on |
760 | * the destination system with other DLPAR operation. |
761 | */ |
762 | if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || |
763 | (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { |
764 | win->vas_win.status |= flag; |
765 | continue; |
766 | } |
767 | |
768 | task_ref = &win->vas_win.task_ref; |
769 | /* |
770 | * VAS mmap (coproc_mmap()) and its fault handler |
771 | * (vas_mmap_fault()) are called after holding mmap lock. |
772 | * So hold mmap mutex after mmap_lock to avoid deadlock. |
773 | */ |
774 | mmap_write_lock(mm: task_ref->mm); |
775 | mutex_lock(&task_ref->mmap_mutex); |
776 | vma = task_ref->vma; |
777 | /* |
778 | * Number of available credits are reduced, So select |
779 | * and close windows. |
780 | */ |
781 | win->vas_win.status |= flag; |
782 | |
783 | /* |
784 | * vma is set in the original mapping. But this mapping |
785 | * is done with mmap() after the window is opened with ioctl. |
786 | * so we may not see the original mapping if the core remove |
787 | * is done before the original mmap() and after the ioctl. |
788 | */ |
789 | if (vma) |
790 | zap_vma_pages(vma); |
791 | |
792 | mutex_unlock(lock: &task_ref->mmap_mutex); |
793 | mmap_write_unlock(mm: task_ref->mm); |
794 | /* |
795 | * Close VAS window in the hypervisor, but do not |
796 | * free vas_window struct since it may be reused |
797 | * when the credit is available later (DLPAR with |
798 | * adding cores). This struct will be used |
799 | * later when the process issued with close(FD). |
800 | */ |
801 | rc = deallocate_free_window(win); |
802 | /* |
803 | * This failure is from the hypervisor. |
804 | * No way to stop migration for these failures. |
805 | * So ignore error and continue closing other windows. |
806 | */ |
807 | if (rc && !migrate) |
808 | return rc; |
809 | |
810 | vcap->nr_close_wins++; |
811 | |
812 | /* |
813 | * For migration, do not depend on lpar_creds in case if |
814 | * mismatch with the hypervisor value (should not happen). |
815 | * So close all active windows in the list and will be |
816 | * reopened windows based on the new lpar_creds on the |
817 | * destination system during resume. |
818 | */ |
819 | if (!migrate && !--excess_creds) |
820 | break; |
821 | } |
822 | |
823 | return 0; |
824 | } |
825 | |
826 | /* |
827 | * Get new VAS capabilities when the core add/removal configuration |
828 | * changes. Reconfig window configurations based on the credits |
829 | * availability from this new capabilities. |
830 | */ |
831 | int vas_reconfig_capabilties(u8 type, int new_nr_creds) |
832 | { |
833 | struct vas_cop_feat_caps *caps; |
834 | int old_nr_creds; |
835 | struct vas_caps *vcaps; |
836 | int rc = 0, nr_active_wins; |
837 | |
838 | if (type >= VAS_MAX_FEAT_TYPE) { |
839 | pr_err("Invalid credit type %d\n" , type); |
840 | return -EINVAL; |
841 | } |
842 | |
843 | vcaps = &vascaps[type]; |
844 | caps = &vcaps->caps; |
845 | |
846 | mutex_lock(&vas_pseries_mutex); |
847 | |
848 | old_nr_creds = atomic_read(v: &caps->nr_total_credits); |
849 | |
850 | atomic_set(v: &caps->nr_total_credits, i: new_nr_creds); |
851 | /* |
852 | * The total number of available credits may be decreased or |
853 | * increased with DLPAR operation. Means some windows have to be |
854 | * closed / reopened. Hold the vas_pseries_mutex so that the |
855 | * user space can not open new windows. |
856 | */ |
857 | if (old_nr_creds < new_nr_creds) { |
858 | /* |
859 | * If the existing target credits is less than the new |
860 | * target, reopen windows if they are closed due to |
861 | * the previous DLPAR (core removal). |
862 | */ |
863 | rc = reconfig_open_windows(vcaps, creds: new_nr_creds - old_nr_creds, |
864 | migrate: false); |
865 | } else { |
866 | /* |
867 | * # active windows is more than new LPAR available |
868 | * credits. So close the excessive windows. |
869 | * On pseries, each window will have 1 credit. |
870 | */ |
871 | nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; |
872 | if (nr_active_wins > new_nr_creds) |
873 | rc = reconfig_close_windows(vcap: vcaps, |
874 | excess_creds: nr_active_wins - new_nr_creds, |
875 | migrate: false); |
876 | } |
877 | |
878 | mutex_unlock(lock: &vas_pseries_mutex); |
879 | return rc; |
880 | } |
881 | |
882 | int pseries_vas_dlpar_cpu(void) |
883 | { |
884 | int new_nr_creds, rc; |
885 | |
886 | /* |
887 | * NX-GZIP is not enabled. Nothing to do for DLPAR event |
888 | */ |
889 | if (!copypaste_feat) |
890 | return 0; |
891 | |
892 | |
893 | rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, |
894 | vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, |
895 | (u64)virt_to_phys(&hv_cop_caps)); |
896 | if (!rc) { |
897 | new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); |
898 | rc = vas_reconfig_capabilties(type: VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds); |
899 | } |
900 | |
901 | if (rc) |
902 | pr_err("Failed reconfig VAS capabilities with DLPAR\n" ); |
903 | |
904 | return rc; |
905 | } |
906 | |
907 | /* |
908 | * Total number of default credits available (target_credits) |
909 | * in LPAR depends on number of cores configured. It varies based on |
910 | * whether processors are in shared mode or dedicated mode. |
911 | * Get the notifier when CPU configuration is changed with DLPAR |
912 | * operation so that get the new target_credits (vas default capabilities) |
913 | * and then update the existing windows usage if needed. |
914 | */ |
915 | static int pseries_vas_notifier(struct notifier_block *nb, |
916 | unsigned long action, void *data) |
917 | { |
918 | struct of_reconfig_data *rd = data; |
919 | struct device_node *dn = rd->dn; |
920 | const __be32 *intserv = NULL; |
921 | int len; |
922 | |
923 | /* |
924 | * For shared CPU partition, the hypervisor assigns total credits |
925 | * based on entitled core capacity. So updating VAS windows will |
926 | * be called from lparcfg_write(). |
927 | */ |
928 | if (is_shared_processor()) |
929 | return NOTIFY_OK; |
930 | |
931 | if ((action == OF_RECONFIG_ATTACH_NODE) || |
932 | (action == OF_RECONFIG_DETACH_NODE)) |
933 | intserv = of_get_property(node: dn, name: "ibm,ppc-interrupt-server#s" , |
934 | lenp: &len); |
935 | /* |
936 | * Processor config is not changed |
937 | */ |
938 | if (!intserv) |
939 | return NOTIFY_OK; |
940 | |
941 | return pseries_vas_dlpar_cpu(); |
942 | } |
943 | |
944 | static struct notifier_block pseries_vas_nb = { |
945 | .notifier_call = pseries_vas_notifier, |
946 | }; |
947 | |
948 | /* |
949 | * For LPM, all windows have to be closed on the source partition |
950 | * before migration and reopen them on the destination partition |
951 | * after migration. So closing windows during suspend and |
952 | * reopen them during resume. |
953 | */ |
954 | int vas_migration_handler(int action) |
955 | { |
956 | struct vas_cop_feat_caps *caps; |
957 | int old_nr_creds, new_nr_creds = 0; |
958 | struct vas_caps *vcaps; |
959 | int i, rc = 0; |
960 | |
961 | pr_info("VAS migration event %d\n" , action); |
962 | |
963 | /* |
964 | * NX-GZIP is not enabled. Nothing to do for migration. |
965 | */ |
966 | if (!copypaste_feat) |
967 | return rc; |
968 | |
969 | if (action == VAS_SUSPEND) |
970 | migration_in_progress = true; |
971 | else |
972 | migration_in_progress = false; |
973 | |
974 | for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { |
975 | vcaps = &vascaps[i]; |
976 | caps = &vcaps->caps; |
977 | old_nr_creds = atomic_read(v: &caps->nr_total_credits); |
978 | |
979 | rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, |
980 | vcaps->feat, |
981 | (u64)virt_to_phys(&hv_cop_caps)); |
982 | if (!rc) { |
983 | new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); |
984 | /* |
985 | * Should not happen. But incase print messages, close |
986 | * all windows in the list during suspend and reopen |
987 | * windows based on new lpar_creds on the destination |
988 | * system. |
989 | */ |
990 | if (old_nr_creds != new_nr_creds) { |
991 | pr_err("Target credits mismatch with the hypervisor\n" ); |
992 | pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n" , |
993 | action, old_nr_creds, new_nr_creds); |
994 | pr_err("Used creds: %d, Active creds: %d\n" , |
995 | atomic_read(&caps->nr_used_credits), |
996 | vcaps->nr_open_windows - vcaps->nr_close_wins); |
997 | } |
998 | } else { |
999 | pr_err("state(%d): Get VAS capabilities failed with %d\n" , |
1000 | action, rc); |
1001 | /* |
1002 | * We can not stop migration with the current lpm |
1003 | * implementation. So continue closing all windows in |
1004 | * the list (during suspend) and return without |
1005 | * opening windows (during resume) if VAS capabilities |
1006 | * HCALL failed. |
1007 | */ |
1008 | if (action == VAS_RESUME) |
1009 | goto out; |
1010 | } |
1011 | |
1012 | switch (action) { |
1013 | case VAS_SUSPEND: |
1014 | mutex_lock(&vas_pseries_mutex); |
1015 | rc = reconfig_close_windows(vcap: vcaps, excess_creds: vcaps->nr_open_windows, |
1016 | migrate: true); |
1017 | /* |
1018 | * Windows are included in the list after successful |
1019 | * open. So wait for closing these in-progress open |
1020 | * windows in vas_allocate_window() which will be |
1021 | * done if the migration_in_progress is set. |
1022 | */ |
1023 | while (vcaps->nr_open_wins_progress) { |
1024 | mutex_unlock(lock: &vas_pseries_mutex); |
1025 | msleep(msecs: 10); |
1026 | mutex_lock(&vas_pseries_mutex); |
1027 | } |
1028 | mutex_unlock(lock: &vas_pseries_mutex); |
1029 | break; |
1030 | case VAS_RESUME: |
1031 | mutex_lock(&vas_pseries_mutex); |
1032 | atomic_set(v: &caps->nr_total_credits, i: new_nr_creds); |
1033 | rc = reconfig_open_windows(vcaps, creds: new_nr_creds, migrate: true); |
1034 | mutex_unlock(lock: &vas_pseries_mutex); |
1035 | break; |
1036 | default: |
1037 | /* should not happen */ |
1038 | pr_err("Invalid migration action %d\n" , action); |
1039 | rc = -EINVAL; |
1040 | goto out; |
1041 | } |
1042 | |
1043 | /* |
1044 | * Ignore errors during suspend and return for resume. |
1045 | */ |
1046 | if (rc && (action == VAS_RESUME)) |
1047 | goto out; |
1048 | } |
1049 | |
1050 | pr_info("VAS migration event (%d) successful\n" , action); |
1051 | |
1052 | out: |
1053 | return rc; |
1054 | } |
1055 | |
1056 | static int __init pseries_vas_init(void) |
1057 | { |
1058 | struct hv_vas_all_caps *hv_caps; |
1059 | int rc = 0; |
1060 | |
1061 | /* |
1062 | * Linux supports user space COPY/PASTE only with Radix |
1063 | */ |
1064 | if (!radix_enabled()) { |
1065 | copypaste_feat = false; |
1066 | pr_err("API is supported only with radix page tables\n" ); |
1067 | return -ENOTSUPP; |
1068 | } |
1069 | |
1070 | hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); |
1071 | if (!hv_caps) |
1072 | return -ENOMEM; |
1073 | /* |
1074 | * Get VAS overall capabilities by passing 0 to feature type. |
1075 | */ |
1076 | rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, |
1077 | (u64)virt_to_phys(hv_caps)); |
1078 | if (rc) |
1079 | goto out; |
1080 | |
1081 | caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); |
1082 | caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); |
1083 | |
1084 | sysfs_pseries_vas_init(vas_caps: &caps_all); |
1085 | |
1086 | /* |
1087 | * QOS capabilities available |
1088 | */ |
1089 | if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { |
1090 | rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, |
1091 | VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); |
1092 | |
1093 | if (rc) |
1094 | goto out; |
1095 | } |
1096 | /* |
1097 | * Default capabilities available |
1098 | */ |
1099 | if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) |
1100 | rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, |
1101 | VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); |
1102 | |
1103 | if (!rc && copypaste_feat) { |
1104 | if (firmware_has_feature(FW_FEATURE_LPAR)) |
1105 | of_reconfig_notifier_register(&pseries_vas_nb); |
1106 | |
1107 | pr_info("GZIP feature is available\n" ); |
1108 | } else { |
1109 | /* |
1110 | * Should not happen, but only when get default |
1111 | * capabilities HCALL failed. So disable copy paste |
1112 | * feature. |
1113 | */ |
1114 | copypaste_feat = false; |
1115 | } |
1116 | |
1117 | out: |
1118 | kfree(objp: hv_caps); |
1119 | return rc; |
1120 | } |
1121 | machine_device_initcall(pseries, pseries_vas_init); |
1122 | |