1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Xen hypercall batching. |
4 | * |
5 | * Xen allows multiple hypercalls to be issued at once, using the |
6 | * multicall interface. This allows the cost of trapping into the |
7 | * hypervisor to be amortized over several calls. |
8 | * |
9 | * This file implements a simple interface for multicalls. There's a |
10 | * per-cpu buffer of outstanding multicalls. When you want to queue a |
11 | * multicall for issuing, you can allocate a multicall slot for the |
12 | * call and its arguments, along with storage for space which is |
13 | * pointed to by the arguments (for passing pointers to structures, |
14 | * etc). When the multicall is actually issued, all the space for the |
15 | * commands and allocated memory is freed for reuse. |
16 | * |
17 | * Multicalls are flushed whenever any of the buffers get full, or |
18 | * when explicitly requested. There's no way to get per-multicall |
19 | * return results back. It will BUG if any of the multicalls fail. |
20 | * |
21 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
22 | */ |
23 | #include <linux/percpu.h> |
24 | #include <linux/hardirq.h> |
25 | #include <linux/debugfs.h> |
26 | #include <linux/jump_label.h> |
27 | #include <linux/printk.h> |
28 | |
29 | #include <asm/xen/hypercall.h> |
30 | |
31 | #include "xen-ops.h" |
32 | |
33 | #define MC_BATCH 32 |
34 | |
35 | #define MC_ARGS (MC_BATCH * 16) |
36 | |
37 | |
38 | struct mc_buffer { |
39 | unsigned mcidx, argidx, cbidx; |
40 | struct multicall_entry entries[MC_BATCH]; |
41 | unsigned char args[MC_ARGS]; |
42 | struct callback { |
43 | void (*fn)(void *); |
44 | void *data; |
45 | } callbacks[MC_BATCH]; |
46 | }; |
47 | |
48 | struct mc_debug_data { |
49 | struct multicall_entry entries[MC_BATCH]; |
50 | void *caller[MC_BATCH]; |
51 | size_t argsz[MC_BATCH]; |
52 | unsigned long *args[MC_BATCH]; |
53 | }; |
54 | |
55 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); |
56 | static struct mc_debug_data mc_debug_data_early __initdata; |
57 | static struct mc_debug_data __percpu *mc_debug_data_ptr; |
58 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); |
59 | |
60 | static struct static_key mc_debug __ro_after_init; |
61 | static bool mc_debug_enabled __initdata; |
62 | |
63 | static struct mc_debug_data * __ref get_mc_debug(void) |
64 | { |
65 | if (!mc_debug_data_ptr) |
66 | return &mc_debug_data_early; |
67 | |
68 | return this_cpu_ptr(mc_debug_data_ptr); |
69 | } |
70 | |
71 | static int __init xen_parse_mc_debug(char *arg) |
72 | { |
73 | mc_debug_enabled = true; |
74 | static_key_slow_inc(key: &mc_debug); |
75 | |
76 | return 0; |
77 | } |
78 | early_param("xen_mc_debug" , xen_parse_mc_debug); |
79 | |
80 | static int __init mc_debug_enable(void) |
81 | { |
82 | unsigned long flags; |
83 | struct mc_debug_data __percpu *mcdb; |
84 | |
85 | if (!mc_debug_enabled) |
86 | return 0; |
87 | |
88 | mcdb = alloc_percpu(struct mc_debug_data); |
89 | if (!mcdb) { |
90 | pr_err("xen_mc_debug inactive\n" ); |
91 | static_key_slow_dec(key: &mc_debug); |
92 | return -ENOMEM; |
93 | } |
94 | |
95 | /* Be careful when switching to percpu debug data. */ |
96 | local_irq_save(flags); |
97 | xen_mc_flush(); |
98 | mc_debug_data_ptr = mcdb; |
99 | local_irq_restore(flags); |
100 | |
101 | pr_info("xen_mc_debug active\n" ); |
102 | |
103 | return 0; |
104 | } |
105 | early_initcall(mc_debug_enable); |
106 | |
107 | /* Number of parameters of hypercalls used via multicalls. */ |
108 | static const uint8_t hpcpars[] = { |
109 | [__HYPERVISOR_mmu_update] = 4, |
110 | [__HYPERVISOR_stack_switch] = 2, |
111 | [__HYPERVISOR_fpu_taskswitch] = 1, |
112 | [__HYPERVISOR_update_descriptor] = 2, |
113 | [__HYPERVISOR_update_va_mapping] = 3, |
114 | [__HYPERVISOR_mmuext_op] = 4, |
115 | }; |
116 | |
117 | static void print_debug_data(struct mc_buffer *b, struct mc_debug_data *mcdb, |
118 | int idx) |
119 | { |
120 | unsigned int arg; |
121 | unsigned int opidx = mcdb->entries[idx].op & 0xff; |
122 | unsigned int pars = 0; |
123 | |
124 | pr_err(" call %2d: op=%lu result=%ld caller=%pS " , idx + 1, |
125 | mcdb->entries[idx].op, b->entries[idx].result, |
126 | mcdb->caller[idx]); |
127 | if (opidx < ARRAY_SIZE(hpcpars)) |
128 | pars = hpcpars[opidx]; |
129 | if (pars) { |
130 | pr_cont("pars=" ); |
131 | for (arg = 0; arg < pars; arg++) |
132 | pr_cont("%lx " , mcdb->entries[idx].args[arg]); |
133 | } |
134 | if (mcdb->argsz[idx]) { |
135 | pr_cont("args=" ); |
136 | for (arg = 0; arg < mcdb->argsz[idx] / 8; arg++) |
137 | pr_cont("%lx " , mcdb->args[idx][arg]); |
138 | } |
139 | pr_cont("\n" ); |
140 | } |
141 | |
142 | void xen_mc_flush(void) |
143 | { |
144 | struct mc_buffer *b = this_cpu_ptr(&mc_buffer); |
145 | struct multicall_entry *mc; |
146 | struct mc_debug_data *mcdb = NULL; |
147 | int ret = 0; |
148 | unsigned long flags; |
149 | int i; |
150 | |
151 | BUG_ON(preemptible()); |
152 | |
153 | /* Disable interrupts in case someone comes in and queues |
154 | something in the middle */ |
155 | local_irq_save(flags); |
156 | |
157 | trace_xen_mc_flush(mcidx: b->mcidx, argidx: b->argidx, cbidx: b->cbidx); |
158 | |
159 | if (static_key_false(key: &mc_debug)) { |
160 | mcdb = get_mc_debug(); |
161 | memcpy(mcdb->entries, b->entries, |
162 | b->mcidx * sizeof(struct multicall_entry)); |
163 | } |
164 | |
165 | switch (b->mcidx) { |
166 | case 0: |
167 | /* no-op */ |
168 | BUG_ON(b->argidx != 0); |
169 | break; |
170 | |
171 | case 1: |
172 | /* Singleton multicall - bypass multicall machinery |
173 | and just do the call directly. */ |
174 | mc = &b->entries[0]; |
175 | |
176 | mc->result = xen_single_call(call: mc->op, a1: mc->args[0], a2: mc->args[1], |
177 | a3: mc->args[2], a4: mc->args[3], |
178 | a5: mc->args[4]); |
179 | ret = mc->result < 0; |
180 | break; |
181 | |
182 | default: |
183 | if (HYPERVISOR_multicall(call_list: b->entries, nr_calls: b->mcidx) != 0) |
184 | BUG(); |
185 | for (i = 0; i < b->mcidx; i++) |
186 | if (b->entries[i].result < 0) |
187 | ret++; |
188 | } |
189 | |
190 | if (WARN_ON(ret)) { |
191 | pr_err("%d of %d multicall(s) failed: cpu %d\n" , |
192 | ret, b->mcidx, smp_processor_id()); |
193 | for (i = 0; i < b->mcidx; i++) { |
194 | if (static_key_false(key: &mc_debug)) { |
195 | print_debug_data(b, mcdb, idx: i); |
196 | } else if (b->entries[i].result < 0) { |
197 | pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n" , |
198 | i + 1, |
199 | b->entries[i].op, |
200 | b->entries[i].args[0], |
201 | b->entries[i].result); |
202 | } |
203 | } |
204 | } |
205 | |
206 | b->mcidx = 0; |
207 | b->argidx = 0; |
208 | |
209 | for (i = 0; i < b->cbidx; i++) { |
210 | struct callback *cb = &b->callbacks[i]; |
211 | |
212 | (*cb->fn)(cb->data); |
213 | } |
214 | b->cbidx = 0; |
215 | |
216 | local_irq_restore(flags); |
217 | } |
218 | |
219 | struct multicall_space __xen_mc_entry(size_t args) |
220 | { |
221 | struct mc_buffer *b = this_cpu_ptr(&mc_buffer); |
222 | struct multicall_space ret; |
223 | unsigned argidx = roundup(b->argidx, sizeof(u64)); |
224 | |
225 | trace_xen_mc_entry_alloc(args); |
226 | |
227 | BUG_ON(preemptible()); |
228 | BUG_ON(b->argidx >= MC_ARGS); |
229 | |
230 | if (unlikely(b->mcidx == MC_BATCH || |
231 | (argidx + args) >= MC_ARGS)) { |
232 | trace_xen_mc_flush_reason(reason: (b->mcidx == MC_BATCH) ? |
233 | XEN_MC_FL_BATCH : XEN_MC_FL_ARGS); |
234 | xen_mc_flush(); |
235 | argidx = roundup(b->argidx, sizeof(u64)); |
236 | } |
237 | |
238 | ret.mc = &b->entries[b->mcidx]; |
239 | if (static_key_false(key: &mc_debug)) { |
240 | struct mc_debug_data *mcdb = get_mc_debug(); |
241 | |
242 | mcdb->caller[b->mcidx] = __builtin_return_address(0); |
243 | mcdb->argsz[b->mcidx] = args; |
244 | mcdb->args[b->mcidx] = (unsigned long *)(&b->args[argidx]); |
245 | } |
246 | b->mcidx++; |
247 | ret.args = &b->args[argidx]; |
248 | b->argidx = argidx + args; |
249 | |
250 | BUG_ON(b->argidx >= MC_ARGS); |
251 | return ret; |
252 | } |
253 | |
254 | struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) |
255 | { |
256 | struct mc_buffer *b = this_cpu_ptr(&mc_buffer); |
257 | struct multicall_space ret = { NULL, NULL }; |
258 | |
259 | BUG_ON(preemptible()); |
260 | BUG_ON(b->argidx >= MC_ARGS); |
261 | |
262 | if (unlikely(b->mcidx == 0 || |
263 | b->entries[b->mcidx - 1].op != op)) { |
264 | trace_xen_mc_extend_args(op, args: size, res: XEN_MC_XE_BAD_OP); |
265 | goto out; |
266 | } |
267 | |
268 | if (unlikely((b->argidx + size) >= MC_ARGS)) { |
269 | trace_xen_mc_extend_args(op, args: size, res: XEN_MC_XE_NO_SPACE); |
270 | goto out; |
271 | } |
272 | |
273 | ret.mc = &b->entries[b->mcidx - 1]; |
274 | ret.args = &b->args[b->argidx]; |
275 | b->argidx += size; |
276 | |
277 | BUG_ON(b->argidx >= MC_ARGS); |
278 | |
279 | trace_xen_mc_extend_args(op, args: size, res: XEN_MC_XE_OK); |
280 | out: |
281 | return ret; |
282 | } |
283 | |
284 | void xen_mc_callback(void (*fn)(void *), void *data) |
285 | { |
286 | struct mc_buffer *b = this_cpu_ptr(&mc_buffer); |
287 | struct callback *cb; |
288 | |
289 | if (b->cbidx == MC_BATCH) { |
290 | trace_xen_mc_flush_reason(reason: XEN_MC_FL_CALLBACK); |
291 | xen_mc_flush(); |
292 | } |
293 | |
294 | trace_xen_mc_callback(fn, data); |
295 | |
296 | cb = &b->callbacks[b->cbidx++]; |
297 | cb->fn = fn; |
298 | cb->data = data; |
299 | } |
300 | |