1 | /* |
---|---|
2 | * Freescale Hypervisor Management Driver |
3 | |
4 | * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. |
5 | * Author: Timur Tabi <timur@freescale.com> |
6 | * |
7 | * This file is licensed under the terms of the GNU General Public License |
8 | * version 2. This program is licensed "as is" without any warranty of any |
9 | * kind, whether express or implied. |
10 | * |
11 | * The Freescale hypervisor management driver provides several services to |
12 | * drivers and applications related to the Freescale hypervisor: |
13 | * |
14 | * 1. An ioctl interface for querying and managing partitions. |
15 | * |
16 | * 2. A file interface to reading incoming doorbells. |
17 | * |
18 | * 3. An interrupt handler for shutting down the partition upon receiving the |
19 | * shutdown doorbell from a manager partition. |
20 | * |
21 | * 4. A kernel interface for receiving callbacks when a managed partition |
22 | * shuts down. |
23 | */ |
24 | |
25 | #include <linux/kernel.h> |
26 | #include <linux/module.h> |
27 | #include <linux/init.h> |
28 | #include <linux/types.h> |
29 | #include <linux/err.h> |
30 | #include <linux/fs.h> |
31 | #include <linux/miscdevice.h> |
32 | #include <linux/mm.h> |
33 | #include <linux/pagemap.h> |
34 | #include <linux/slab.h> |
35 | #include <linux/poll.h> |
36 | #include <linux/of.h> |
37 | #include <linux/of_irq.h> |
38 | #include <linux/reboot.h> |
39 | #include <linux/uaccess.h> |
40 | #include <linux/notifier.h> |
41 | #include <linux/interrupt.h> |
42 | |
43 | #include <linux/io.h> |
44 | #include <asm/fsl_hcalls.h> |
45 | |
46 | #include <linux/fsl_hypervisor.h> |
47 | |
48 | static BLOCKING_NOTIFIER_HEAD(failover_subscribers); |
49 | |
50 | /* |
51 | * Ioctl interface for FSL_HV_IOCTL_PARTITION_RESTART |
52 | * |
53 | * Restart a running partition |
54 | */ |
55 | static long ioctl_restart(struct fsl_hv_ioctl_restart __user *p) |
56 | { |
57 | struct fsl_hv_ioctl_restart param; |
58 | |
59 | /* Get the parameters from the user */ |
60 | if (copy_from_user(to: ¶m, from: p, n: sizeof(struct fsl_hv_ioctl_restart))) |
61 | return -EFAULT; |
62 | |
63 | param.ret = fh_partition_restart(param.partition); |
64 | |
65 | if (copy_to_user(to: &p->ret, from: ¶m.ret, n: sizeof(__u32))) |
66 | return -EFAULT; |
67 | |
68 | return 0; |
69 | } |
70 | |
71 | /* |
72 | * Ioctl interface for FSL_HV_IOCTL_PARTITION_STATUS |
73 | * |
74 | * Query the status of a partition |
75 | */ |
76 | static long ioctl_status(struct fsl_hv_ioctl_status __user *p) |
77 | { |
78 | struct fsl_hv_ioctl_status param; |
79 | u32 status; |
80 | |
81 | /* Get the parameters from the user */ |
82 | if (copy_from_user(to: ¶m, from: p, n: sizeof(struct fsl_hv_ioctl_status))) |
83 | return -EFAULT; |
84 | |
85 | param.ret = fh_partition_get_status(param.partition, &status); |
86 | if (!param.ret) |
87 | param.status = status; |
88 | |
89 | if (copy_to_user(to: p, from: ¶m, n: sizeof(struct fsl_hv_ioctl_status))) |
90 | return -EFAULT; |
91 | |
92 | return 0; |
93 | } |
94 | |
95 | /* |
96 | * Ioctl interface for FSL_HV_IOCTL_PARTITION_START |
97 | * |
98 | * Start a stopped partition. |
99 | */ |
100 | static long ioctl_start(struct fsl_hv_ioctl_start __user *p) |
101 | { |
102 | struct fsl_hv_ioctl_start param; |
103 | |
104 | /* Get the parameters from the user */ |
105 | if (copy_from_user(to: ¶m, from: p, n: sizeof(struct fsl_hv_ioctl_start))) |
106 | return -EFAULT; |
107 | |
108 | param.ret = fh_partition_start(param.partition, param.entry_point, |
109 | param.load); |
110 | |
111 | if (copy_to_user(to: &p->ret, from: ¶m.ret, n: sizeof(__u32))) |
112 | return -EFAULT; |
113 | |
114 | return 0; |
115 | } |
116 | |
117 | /* |
118 | * Ioctl interface for FSL_HV_IOCTL_PARTITION_STOP |
119 | * |
120 | * Stop a running partition |
121 | */ |
122 | static long ioctl_stop(struct fsl_hv_ioctl_stop __user *p) |
123 | { |
124 | struct fsl_hv_ioctl_stop param; |
125 | |
126 | /* Get the parameters from the user */ |
127 | if (copy_from_user(to: ¶m, from: p, n: sizeof(struct fsl_hv_ioctl_stop))) |
128 | return -EFAULT; |
129 | |
130 | param.ret = fh_partition_stop(param.partition); |
131 | |
132 | if (copy_to_user(to: &p->ret, from: ¶m.ret, n: sizeof(__u32))) |
133 | return -EFAULT; |
134 | |
135 | return 0; |
136 | } |
137 | |
138 | /* |
139 | * Ioctl interface for FSL_HV_IOCTL_MEMCPY |
140 | * |
141 | * The FH_MEMCPY hypercall takes an array of address/address/size structures |
142 | * to represent the data being copied. As a convenience to the user, this |
143 | * ioctl takes a user-create buffer and a pointer to a guest physically |
144 | * contiguous buffer in the remote partition, and creates the |
145 | * address/address/size array for the hypercall. |
146 | */ |
147 | static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p) |
148 | { |
149 | struct fsl_hv_ioctl_memcpy param; |
150 | |
151 | struct page **pages = NULL; |
152 | void *sg_list_unaligned = NULL; |
153 | struct fh_sg_list *sg_list = NULL; |
154 | |
155 | unsigned int num_pages; |
156 | unsigned long lb_offset; /* Offset within a page of the local buffer */ |
157 | |
158 | unsigned int i; |
159 | long ret = 0; |
160 | int num_pinned = 0; /* return value from get_user_pages_fast() */ |
161 | phys_addr_t remote_paddr; /* The next address in the remote buffer */ |
162 | uint32_t count; /* The number of bytes left to copy */ |
163 | |
164 | /* Get the parameters from the user */ |
165 | if (copy_from_user(to: ¶m, from: p, n: sizeof(struct fsl_hv_ioctl_memcpy))) |
166 | return -EFAULT; |
167 | |
168 | /* |
169 | * One partition must be local, the other must be remote. In other |
170 | * words, if source and target are both -1, or are both not -1, then |
171 | * return an error. |
172 | */ |
173 | if ((param.source == -1) == (param.target == -1)) |
174 | return -EINVAL; |
175 | |
176 | /* |
177 | * The array of pages returned by get_user_pages_fast() covers only |
178 | * page-aligned memory. Since the user buffer is probably not |
179 | * page-aligned, we need to handle the discrepancy. |
180 | * |
181 | * We calculate the offset within a page of the S/G list, and make |
182 | * adjustments accordingly. This will result in a page list that looks |
183 | * like this: |
184 | * |
185 | * ---- <-- first page starts before the buffer |
186 | * | | |
187 | * |////|-> ---- |
188 | * |////| | | |
189 | * ---- | | |
190 | * | | |
191 | * ---- | | |
192 | * |////| | | |
193 | * |////| | | |
194 | * |////| | | |
195 | * ---- | | |
196 | * | | |
197 | * ---- | | |
198 | * |////| | | |
199 | * |////| | | |
200 | * |////| | | |
201 | * ---- | | |
202 | * | | |
203 | * ---- | | |
204 | * |////| | | |
205 | * |////|-> ---- |
206 | * | | <-- last page ends after the buffer |
207 | * ---- |
208 | * |
209 | * The distance between the start of the first page and the start of the |
210 | * buffer is lb_offset. The hashed (///) areas are the parts of the |
211 | * page list that contain the actual buffer. |
212 | * |
213 | * The advantage of this approach is that the number of pages is |
214 | * equal to the number of entries in the S/G list that we give to the |
215 | * hypervisor. |
216 | */ |
217 | lb_offset = param.local_vaddr & (PAGE_SIZE - 1); |
218 | if (param.count == 0 || |
219 | param.count > U64_MAX - lb_offset - PAGE_SIZE + 1) |
220 | return -EINVAL; |
221 | num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT; |
222 | |
223 | /* Allocate the buffers we need */ |
224 | |
225 | /* |
226 | * 'pages' is an array of struct page pointers that's initialized by |
227 | * get_user_pages_fast(). |
228 | */ |
229 | pages = kcalloc(n: num_pages, size: sizeof(struct page *), GFP_KERNEL); |
230 | if (!pages) { |
231 | pr_debug("fsl-hv: could not allocate page list\n"); |
232 | return -ENOMEM; |
233 | } |
234 | |
235 | /* |
236 | * sg_list is the list of fh_sg_list objects that we pass to the |
237 | * hypervisor. |
238 | */ |
239 | sg_list_unaligned = kmalloc(num_pages * sizeof(struct fh_sg_list) + |
240 | sizeof(struct fh_sg_list) - 1, GFP_KERNEL); |
241 | if (!sg_list_unaligned) { |
242 | pr_debug("fsl-hv: could not allocate S/G list\n"); |
243 | ret = -ENOMEM; |
244 | goto free_pages; |
245 | } |
246 | sg_list = PTR_ALIGN(sg_list_unaligned, sizeof(struct fh_sg_list)); |
247 | |
248 | /* Get the physical addresses of the source buffer */ |
249 | num_pinned = get_user_pages_fast(start: param.local_vaddr - lb_offset, |
250 | nr_pages: num_pages, gup_flags: param.source != -1 ? FOLL_WRITE : 0, pages); |
251 | |
252 | if (num_pinned != num_pages) { |
253 | pr_debug("fsl-hv: could not lock source buffer\n"); |
254 | ret = (num_pinned < 0) ? num_pinned : -EFAULT; |
255 | goto exit; |
256 | } |
257 | |
258 | /* |
259 | * Build the fh_sg_list[] array. The first page is special |
260 | * because it's misaligned. |
261 | */ |
262 | if (param.source == -1) { |
263 | sg_list[0].source = page_to_phys(pages[0]) + lb_offset; |
264 | sg_list[0].target = param.remote_paddr; |
265 | } else { |
266 | sg_list[0].source = param.remote_paddr; |
267 | sg_list[0].target = page_to_phys(pages[0]) + lb_offset; |
268 | } |
269 | sg_list[0].size = min_t(uint64_t, param.count, PAGE_SIZE - lb_offset); |
270 | |
271 | remote_paddr = param.remote_paddr + sg_list[0].size; |
272 | count = param.count - sg_list[0].size; |
273 | |
274 | for (i = 1; i < num_pages; i++) { |
275 | if (param.source == -1) { |
276 | /* local to remote */ |
277 | sg_list[i].source = page_to_phys(pages[i]); |
278 | sg_list[i].target = remote_paddr; |
279 | } else { |
280 | /* remote to local */ |
281 | sg_list[i].source = remote_paddr; |
282 | sg_list[i].target = page_to_phys(pages[i]); |
283 | } |
284 | sg_list[i].size = min_t(uint64_t, count, PAGE_SIZE); |
285 | |
286 | remote_paddr += sg_list[i].size; |
287 | count -= sg_list[i].size; |
288 | } |
289 | |
290 | param.ret = fh_partition_memcpy(param.source, param.target, |
291 | virt_to_phys(address: sg_list), num_pages); |
292 | |
293 | exit: |
294 | if (pages && (num_pinned > 0)) { |
295 | for (i = 0; i < num_pinned; i++) |
296 | put_page(page: pages[i]); |
297 | } |
298 | |
299 | kfree(objp: sg_list_unaligned); |
300 | free_pages: |
301 | kfree(objp: pages); |
302 | |
303 | if (!ret) |
304 | if (copy_to_user(to: &p->ret, from: ¶m.ret, n: sizeof(__u32))) |
305 | return -EFAULT; |
306 | |
307 | return ret; |
308 | } |
309 | |
310 | /* |
311 | * Ioctl interface for FSL_HV_IOCTL_DOORBELL |
312 | * |
313 | * Ring a doorbell |
314 | */ |
315 | static long ioctl_doorbell(struct fsl_hv_ioctl_doorbell __user *p) |
316 | { |
317 | struct fsl_hv_ioctl_doorbell param; |
318 | |
319 | /* Get the parameters from the user. */ |
320 | if (copy_from_user(to: ¶m, from: p, n: sizeof(struct fsl_hv_ioctl_doorbell))) |
321 | return -EFAULT; |
322 | |
323 | param.ret = ev_doorbell_send(param.doorbell); |
324 | |
325 | if (copy_to_user(to: &p->ret, from: ¶m.ret, n: sizeof(__u32))) |
326 | return -EFAULT; |
327 | |
328 | return 0; |
329 | } |
330 | |
331 | static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set) |
332 | { |
333 | struct fsl_hv_ioctl_prop param; |
334 | char __user *upath, *upropname; |
335 | void __user *upropval; |
336 | char *path, *propname; |
337 | void *propval; |
338 | int ret = 0; |
339 | |
340 | /* Get the parameters from the user. */ |
341 | if (copy_from_user(to: ¶m, from: p, n: sizeof(struct fsl_hv_ioctl_prop))) |
342 | return -EFAULT; |
343 | |
344 | upath = (char __user *)(uintptr_t)param.path; |
345 | upropname = (char __user *)(uintptr_t)param.propname; |
346 | upropval = (void __user *)(uintptr_t)param.propval; |
347 | |
348 | path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN); |
349 | if (IS_ERR(ptr: path)) |
350 | return PTR_ERR(ptr: path); |
351 | |
352 | propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN); |
353 | if (IS_ERR(ptr: propname)) { |
354 | ret = PTR_ERR(ptr: propname); |
355 | goto err_free_path; |
356 | } |
357 | |
358 | if (param.proplen > FH_DTPROP_MAX_PROPLEN) { |
359 | ret = -EINVAL; |
360 | goto err_free_propname; |
361 | } |
362 | |
363 | propval = kmalloc(size: param.proplen, GFP_KERNEL); |
364 | if (!propval) { |
365 | ret = -ENOMEM; |
366 | goto err_free_propname; |
367 | } |
368 | |
369 | if (set) { |
370 | if (copy_from_user(to: propval, from: upropval, n: param.proplen)) { |
371 | ret = -EFAULT; |
372 | goto err_free_propval; |
373 | } |
374 | |
375 | param.ret = fh_partition_set_dtprop(param.handle, |
376 | virt_to_phys(address: path), |
377 | virt_to_phys(address: propname), |
378 | virt_to_phys(address: propval), |
379 | param.proplen); |
380 | } else { |
381 | param.ret = fh_partition_get_dtprop(param.handle, |
382 | virt_to_phys(address: path), |
383 | virt_to_phys(address: propname), |
384 | virt_to_phys(address: propval), |
385 | ¶m.proplen); |
386 | |
387 | if (param.ret == 0) { |
388 | if (copy_to_user(to: upropval, from: propval, n: param.proplen) || |
389 | put_user(param.proplen, &p->proplen)) { |
390 | ret = -EFAULT; |
391 | goto err_free_propval; |
392 | } |
393 | } |
394 | } |
395 | |
396 | if (put_user(param.ret, &p->ret)) |
397 | ret = -EFAULT; |
398 | |
399 | err_free_propval: |
400 | kfree(objp: propval); |
401 | err_free_propname: |
402 | kfree(objp: propname); |
403 | err_free_path: |
404 | kfree(objp: path); |
405 | |
406 | return ret; |
407 | } |
408 | |
409 | /* |
410 | * Ioctl main entry point |
411 | */ |
412 | static long fsl_hv_ioctl(struct file *file, unsigned int cmd, |
413 | unsigned long argaddr) |
414 | { |
415 | void __user *arg = (void __user *)argaddr; |
416 | long ret; |
417 | |
418 | switch (cmd) { |
419 | case FSL_HV_IOCTL_PARTITION_RESTART: |
420 | ret = ioctl_restart(p: arg); |
421 | break; |
422 | case FSL_HV_IOCTL_PARTITION_GET_STATUS: |
423 | ret = ioctl_status(p: arg); |
424 | break; |
425 | case FSL_HV_IOCTL_PARTITION_START: |
426 | ret = ioctl_start(p: arg); |
427 | break; |
428 | case FSL_HV_IOCTL_PARTITION_STOP: |
429 | ret = ioctl_stop(p: arg); |
430 | break; |
431 | case FSL_HV_IOCTL_MEMCPY: |
432 | ret = ioctl_memcpy(p: arg); |
433 | break; |
434 | case FSL_HV_IOCTL_DOORBELL: |
435 | ret = ioctl_doorbell(p: arg); |
436 | break; |
437 | case FSL_HV_IOCTL_GETPROP: |
438 | ret = ioctl_dtprop(p: arg, set: 0); |
439 | break; |
440 | case FSL_HV_IOCTL_SETPROP: |
441 | ret = ioctl_dtprop(p: arg, set: 1); |
442 | break; |
443 | default: |
444 | pr_debug("fsl-hv: bad ioctl dir=%u type=%u cmd=%u size=%u\n", |
445 | _IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), |
446 | _IOC_SIZE(cmd)); |
447 | return -ENOTTY; |
448 | } |
449 | |
450 | return ret; |
451 | } |
452 | |
453 | /* Linked list of processes that have us open */ |
454 | static struct list_head db_list; |
455 | |
456 | /* spinlock for db_list */ |
457 | static DEFINE_SPINLOCK(db_list_lock); |
458 | |
459 | /* The size of the doorbell event queue. This must be a power of two. */ |
460 | #define QSIZE 16 |
461 | |
462 | /* Returns the next head/tail pointer, wrapping around the queue if necessary */ |
463 | #define nextp(x) (((x) + 1) & (QSIZE - 1)) |
464 | |
465 | /* Per-open data structure */ |
466 | struct doorbell_queue { |
467 | struct list_head list; |
468 | spinlock_t lock; |
469 | wait_queue_head_t wait; |
470 | unsigned int head; |
471 | unsigned int tail; |
472 | uint32_t q[QSIZE]; |
473 | }; |
474 | |
475 | /* Linked list of ISRs that we registered */ |
476 | struct list_head isr_list; |
477 | |
478 | /* Per-ISR data structure */ |
479 | struct doorbell_isr { |
480 | struct list_head list; |
481 | unsigned int irq; |
482 | uint32_t doorbell; /* The doorbell handle */ |
483 | uint32_t partition; /* The partition handle, if used */ |
484 | }; |
485 | |
486 | /* |
487 | * Add a doorbell to all of the doorbell queues |
488 | */ |
489 | static void fsl_hv_queue_doorbell(uint32_t doorbell) |
490 | { |
491 | struct doorbell_queue *dbq; |
492 | unsigned long flags; |
493 | |
494 | /* Prevent another core from modifying db_list */ |
495 | spin_lock_irqsave(&db_list_lock, flags); |
496 | |
497 | list_for_each_entry(dbq, &db_list, list) { |
498 | if (dbq->head != nextp(dbq->tail)) { |
499 | dbq->q[dbq->tail] = doorbell; |
500 | /* |
501 | * This memory barrier eliminates the need to grab |
502 | * the spinlock for dbq. |
503 | */ |
504 | smp_wmb(); |
505 | dbq->tail = nextp(dbq->tail); |
506 | wake_up_interruptible(&dbq->wait); |
507 | } |
508 | } |
509 | |
510 | spin_unlock_irqrestore(lock: &db_list_lock, flags); |
511 | } |
512 | |
513 | /* |
514 | * Interrupt handler for all doorbells |
515 | * |
516 | * We use the same interrupt handler for all doorbells. Whenever a doorbell |
517 | * is rung, and we receive an interrupt, we just put the handle for that |
518 | * doorbell (passed to us as *data) into all of the queues. |
519 | */ |
520 | static irqreturn_t fsl_hv_isr(int irq, void *data) |
521 | { |
522 | fsl_hv_queue_doorbell(doorbell: (uintptr_t) data); |
523 | |
524 | return IRQ_HANDLED; |
525 | } |
526 | |
527 | /* |
528 | * State change thread function |
529 | * |
530 | * The state change notification arrives in an interrupt, but we can't call |
531 | * blocking_notifier_call_chain() in an interrupt handler. We could call |
532 | * atomic_notifier_call_chain(), but that would require the clients' call-back |
533 | * function to run in interrupt context. Since we don't want to impose that |
534 | * restriction on the clients, we use a threaded IRQ to process the |
535 | * notification in kernel context. |
536 | */ |
537 | static irqreturn_t fsl_hv_state_change_thread(int irq, void *data) |
538 | { |
539 | struct doorbell_isr *dbisr = data; |
540 | |
541 | blocking_notifier_call_chain(nh: &failover_subscribers, val: dbisr->partition, |
542 | NULL); |
543 | |
544 | return IRQ_HANDLED; |
545 | } |
546 | |
547 | /* |
548 | * Interrupt handler for state-change doorbells |
549 | */ |
550 | static irqreturn_t fsl_hv_state_change_isr(int irq, void *data) |
551 | { |
552 | unsigned int status; |
553 | struct doorbell_isr *dbisr = data; |
554 | int ret; |
555 | |
556 | /* It's still a doorbell, so add it to all the queues. */ |
557 | fsl_hv_queue_doorbell(doorbell: dbisr->doorbell); |
558 | |
559 | /* Determine the new state, and if it's stopped, notify the clients. */ |
560 | ret = fh_partition_get_status(dbisr->partition, &status); |
561 | if (!ret && (status == FH_PARTITION_STOPPED)) |
562 | return IRQ_WAKE_THREAD; |
563 | |
564 | return IRQ_HANDLED; |
565 | } |
566 | |
567 | /* |
568 | * Returns a bitmask indicating whether a read will block |
569 | */ |
570 | static __poll_t fsl_hv_poll(struct file *filp, struct poll_table_struct *p) |
571 | { |
572 | struct doorbell_queue *dbq = filp->private_data; |
573 | unsigned long flags; |
574 | __poll_t mask; |
575 | |
576 | spin_lock_irqsave(&dbq->lock, flags); |
577 | |
578 | poll_wait(filp, wait_address: &dbq->wait, p); |
579 | mask = (dbq->head == dbq->tail) ? 0 : (EPOLLIN | EPOLLRDNORM); |
580 | |
581 | spin_unlock_irqrestore(lock: &dbq->lock, flags); |
582 | |
583 | return mask; |
584 | } |
585 | |
586 | /* |
587 | * Return the handles for any incoming doorbells |
588 | * |
589 | * If there are doorbell handles in the queue for this open instance, then |
590 | * return them to the caller as an array of 32-bit integers. Otherwise, |
591 | * block until there is at least one handle to return. |
592 | */ |
593 | static ssize_t fsl_hv_read(struct file *filp, char __user *buf, size_t len, |
594 | loff_t *off) |
595 | { |
596 | struct doorbell_queue *dbq = filp->private_data; |
597 | uint32_t __user *p = (uint32_t __user *) buf; /* for put_user() */ |
598 | unsigned long flags; |
599 | ssize_t count = 0; |
600 | |
601 | /* Make sure we stop when the user buffer is full. */ |
602 | while (len >= sizeof(uint32_t)) { |
603 | uint32_t dbell; /* Local copy of doorbell queue data */ |
604 | |
605 | spin_lock_irqsave(&dbq->lock, flags); |
606 | |
607 | /* |
608 | * If the queue is empty, then either we're done or we need |
609 | * to block. If the application specified O_NONBLOCK, then |
610 | * we return the appropriate error code. |
611 | */ |
612 | if (dbq->head == dbq->tail) { |
613 | spin_unlock_irqrestore(lock: &dbq->lock, flags); |
614 | if (count) |
615 | break; |
616 | if (filp->f_flags & O_NONBLOCK) |
617 | return -EAGAIN; |
618 | if (wait_event_interruptible(dbq->wait, |
619 | dbq->head != dbq->tail)) |
620 | return -ERESTARTSYS; |
621 | continue; |
622 | } |
623 | |
624 | /* |
625 | * Even though we have an smp_wmb() in the ISR, the core |
626 | * might speculatively execute the "dbell = ..." below while |
627 | * it's evaluating the if-statement above. In that case, the |
628 | * value put into dbell could be stale if the core accepts the |
629 | * speculation. To prevent that, we need a read memory barrier |
630 | * here as well. |
631 | */ |
632 | smp_rmb(); |
633 | |
634 | /* Copy the data to a temporary local buffer, because |
635 | * we can't call copy_to_user() from inside a spinlock |
636 | */ |
637 | dbell = dbq->q[dbq->head]; |
638 | dbq->head = nextp(dbq->head); |
639 | |
640 | spin_unlock_irqrestore(lock: &dbq->lock, flags); |
641 | |
642 | if (put_user(dbell, p)) |
643 | return -EFAULT; |
644 | p++; |
645 | count += sizeof(uint32_t); |
646 | len -= sizeof(uint32_t); |
647 | } |
648 | |
649 | return count; |
650 | } |
651 | |
652 | /* |
653 | * Open the driver and prepare for reading doorbells. |
654 | * |
655 | * Every time an application opens the driver, we create a doorbell queue |
656 | * for that file handle. This queue is used for any incoming doorbells. |
657 | */ |
658 | static int fsl_hv_open(struct inode *inode, struct file *filp) |
659 | { |
660 | struct doorbell_queue *dbq; |
661 | unsigned long flags; |
662 | |
663 | dbq = kzalloc(size: sizeof(struct doorbell_queue), GFP_KERNEL); |
664 | if (!dbq) { |
665 | pr_err("fsl-hv: out of memory\n"); |
666 | return -ENOMEM; |
667 | } |
668 | |
669 | spin_lock_init(&dbq->lock); |
670 | init_waitqueue_head(&dbq->wait); |
671 | |
672 | spin_lock_irqsave(&db_list_lock, flags); |
673 | list_add(new: &dbq->list, head: &db_list); |
674 | spin_unlock_irqrestore(lock: &db_list_lock, flags); |
675 | |
676 | filp->private_data = dbq; |
677 | |
678 | return 0; |
679 | } |
680 | |
681 | /* |
682 | * Close the driver |
683 | */ |
684 | static int fsl_hv_close(struct inode *inode, struct file *filp) |
685 | { |
686 | struct doorbell_queue *dbq = filp->private_data; |
687 | unsigned long flags; |
688 | |
689 | spin_lock_irqsave(&db_list_lock, flags); |
690 | list_del(entry: &dbq->list); |
691 | spin_unlock_irqrestore(lock: &db_list_lock, flags); |
692 | |
693 | kfree(objp: dbq); |
694 | |
695 | return 0; |
696 | } |
697 | |
698 | static const struct file_operations fsl_hv_fops = { |
699 | .owner = THIS_MODULE, |
700 | .open = fsl_hv_open, |
701 | .release = fsl_hv_close, |
702 | .poll = fsl_hv_poll, |
703 | .read = fsl_hv_read, |
704 | .unlocked_ioctl = fsl_hv_ioctl, |
705 | .compat_ioctl = compat_ptr_ioctl, |
706 | }; |
707 | |
708 | static struct miscdevice fsl_hv_misc_dev = { |
709 | MISC_DYNAMIC_MINOR, |
710 | "fsl-hv", |
711 | &fsl_hv_fops |
712 | }; |
713 | |
714 | static irqreturn_t fsl_hv_shutdown_isr(int irq, void *data) |
715 | { |
716 | orderly_poweroff(force: false); |
717 | |
718 | return IRQ_HANDLED; |
719 | } |
720 | |
721 | /* |
722 | * Returns the handle of the parent of the given node |
723 | * |
724 | * The handle is the value of the 'hv-handle' property |
725 | */ |
726 | static int get_parent_handle(struct device_node *np) |
727 | { |
728 | struct device_node *parent; |
729 | const uint32_t *prop; |
730 | uint32_t handle; |
731 | int len; |
732 | |
733 | parent = of_get_parent(node: np); |
734 | if (!parent) |
735 | /* It's not really possible for this to fail */ |
736 | return -ENODEV; |
737 | |
738 | /* |
739 | * The proper name for the handle property is "hv-handle", but some |
740 | * older versions of the hypervisor used "reg". |
741 | */ |
742 | prop = of_get_property(node: parent, name: "hv-handle", lenp: &len); |
743 | if (!prop) |
744 | prop = of_get_property(node: parent, name: "reg", lenp: &len); |
745 | |
746 | if (!prop || (len != sizeof(uint32_t))) { |
747 | /* This can happen only if the node is malformed */ |
748 | of_node_put(node: parent); |
749 | return -ENODEV; |
750 | } |
751 | |
752 | handle = be32_to_cpup(p: prop); |
753 | of_node_put(node: parent); |
754 | |
755 | return handle; |
756 | } |
757 | |
758 | /* |
759 | * Register a callback for failover events |
760 | * |
761 | * This function is called by device drivers to register their callback |
762 | * functions for fail-over events. |
763 | */ |
764 | int fsl_hv_failover_register(struct notifier_block *nb) |
765 | { |
766 | return blocking_notifier_chain_register(nh: &failover_subscribers, nb); |
767 | } |
768 | EXPORT_SYMBOL(fsl_hv_failover_register); |
769 | |
770 | /* |
771 | * Unregister a callback for failover events |
772 | */ |
773 | int fsl_hv_failover_unregister(struct notifier_block *nb) |
774 | { |
775 | return blocking_notifier_chain_unregister(nh: &failover_subscribers, nb); |
776 | } |
777 | EXPORT_SYMBOL(fsl_hv_failover_unregister); |
778 | |
779 | /* |
780 | * Return TRUE if we're running under FSL hypervisor |
781 | * |
782 | * This function checks to see if we're running under the Freescale |
783 | * hypervisor, and returns zero if we're not, or non-zero if we are. |
784 | * |
785 | * First, it checks if MSR[GS]==1, which means we're running under some |
786 | * hypervisor. Then it checks if there is a hypervisor node in the device |
787 | * tree. Currently, that means there needs to be a node in the root called |
788 | * "hypervisor" and which has a property named "fsl,hv-version". |
789 | */ |
790 | static int has_fsl_hypervisor(void) |
791 | { |
792 | struct device_node *node; |
793 | int ret; |
794 | |
795 | node = of_find_node_by_path(path: "/hypervisor"); |
796 | if (!node) |
797 | return 0; |
798 | |
799 | ret = of_property_present(np: node, propname: "fsl,hv-version"); |
800 | |
801 | of_node_put(node); |
802 | |
803 | return ret; |
804 | } |
805 | |
806 | /* |
807 | * Freescale hypervisor management driver init |
808 | * |
809 | * This function is called when this module is loaded. |
810 | * |
811 | * Register ourselves as a miscellaneous driver. This will register the |
812 | * fops structure and create the right sysfs entries for udev. |
813 | */ |
814 | static int __init fsl_hypervisor_init(void) |
815 | { |
816 | struct device_node *np; |
817 | struct doorbell_isr *dbisr, *n; |
818 | int ret; |
819 | |
820 | pr_info("Freescale hypervisor management driver\n"); |
821 | |
822 | if (!has_fsl_hypervisor()) { |
823 | pr_info("fsl-hv: no hypervisor found\n"); |
824 | return -ENODEV; |
825 | } |
826 | |
827 | ret = misc_register(misc: &fsl_hv_misc_dev); |
828 | if (ret) { |
829 | pr_err("fsl-hv: cannot register device\n"); |
830 | return ret; |
831 | } |
832 | |
833 | INIT_LIST_HEAD(list: &db_list); |
834 | INIT_LIST_HEAD(list: &isr_list); |
835 | |
836 | for_each_compatible_node(np, NULL, "epapr,hv-receive-doorbell") { |
837 | unsigned int irq; |
838 | const uint32_t *handle; |
839 | |
840 | handle = of_get_property(node: np, name: "interrupts", NULL); |
841 | irq = irq_of_parse_and_map(node: np, index: 0); |
842 | if (!handle || !irq) { |
843 | pr_err("fsl-hv: no 'interrupts' property in %pOF node\n", |
844 | np); |
845 | continue; |
846 | } |
847 | |
848 | dbisr = kzalloc(size: sizeof(*dbisr), GFP_KERNEL); |
849 | if (!dbisr) |
850 | goto out_of_memory; |
851 | |
852 | dbisr->irq = irq; |
853 | dbisr->doorbell = be32_to_cpup(p: handle); |
854 | |
855 | if (of_device_is_compatible(device: np, "fsl,hv-shutdown-doorbell")) { |
856 | /* The shutdown doorbell gets its own ISR */ |
857 | ret = request_irq(irq, handler: fsl_hv_shutdown_isr, flags: 0, |
858 | name: np->name, NULL); |
859 | } else if (of_device_is_compatible(device: np, |
860 | "fsl,hv-state-change-doorbell")) { |
861 | /* |
862 | * The state change doorbell triggers a notification if |
863 | * the state of the managed partition changes to |
864 | * "stopped". We need a separate interrupt handler for |
865 | * that, and we also need to know the handle of the |
866 | * target partition, not just the handle of the |
867 | * doorbell. |
868 | */ |
869 | dbisr->partition = ret = get_parent_handle(np); |
870 | if (ret < 0) { |
871 | pr_err("fsl-hv: node %pOF has missing or " |
872 | "malformed parent\n", np); |
873 | kfree(objp: dbisr); |
874 | continue; |
875 | } |
876 | ret = request_threaded_irq(irq, handler: fsl_hv_state_change_isr, |
877 | thread_fn: fsl_hv_state_change_thread, |
878 | flags: 0, name: np->name, dev: dbisr); |
879 | } else |
880 | ret = request_irq(irq, handler: fsl_hv_isr, flags: 0, name: np->name, dev: dbisr); |
881 | |
882 | if (ret < 0) { |
883 | pr_err("fsl-hv: could not request irq %u for node %pOF\n", |
884 | irq, np); |
885 | kfree(objp: dbisr); |
886 | continue; |
887 | } |
888 | |
889 | list_add(new: &dbisr->list, head: &isr_list); |
890 | |
891 | pr_info("fsl-hv: registered handler for doorbell %u\n", |
892 | dbisr->doorbell); |
893 | } |
894 | |
895 | return 0; |
896 | |
897 | out_of_memory: |
898 | list_for_each_entry_safe(dbisr, n, &isr_list, list) { |
899 | free_irq(dbisr->irq, dbisr); |
900 | list_del(entry: &dbisr->list); |
901 | kfree(objp: dbisr); |
902 | } |
903 | |
904 | misc_deregister(misc: &fsl_hv_misc_dev); |
905 | |
906 | return -ENOMEM; |
907 | } |
908 | |
909 | /* |
910 | * Freescale hypervisor management driver termination |
911 | * |
912 | * This function is called when this driver is unloaded. |
913 | */ |
914 | static void __exit fsl_hypervisor_exit(void) |
915 | { |
916 | struct doorbell_isr *dbisr, *n; |
917 | |
918 | list_for_each_entry_safe(dbisr, n, &isr_list, list) { |
919 | free_irq(dbisr->irq, dbisr); |
920 | list_del(entry: &dbisr->list); |
921 | kfree(objp: dbisr); |
922 | } |
923 | |
924 | misc_deregister(misc: &fsl_hv_misc_dev); |
925 | } |
926 | |
927 | module_init(fsl_hypervisor_init); |
928 | module_exit(fsl_hypervisor_exit); |
929 | |
930 | MODULE_AUTHOR("Timur Tabi <timur@freescale.com>"); |
931 | MODULE_DESCRIPTION("Freescale hypervisor management driver"); |
932 | MODULE_LICENSE("GPL v2"); |
933 |
Definitions
- failover_subscribers
- ioctl_restart
- ioctl_status
- ioctl_start
- ioctl_stop
- ioctl_memcpy
- ioctl_doorbell
- ioctl_dtprop
- fsl_hv_ioctl
- db_list
- db_list_lock
- doorbell_queue
- isr_list
- doorbell_isr
- fsl_hv_queue_doorbell
- fsl_hv_isr
- fsl_hv_state_change_thread
- fsl_hv_state_change_isr
- fsl_hv_poll
- fsl_hv_read
- fsl_hv_open
- fsl_hv_close
- fsl_hv_fops
- fsl_hv_misc_dev
- fsl_hv_shutdown_isr
- get_parent_handle
- fsl_hv_failover_register
- fsl_hv_failover_unregister
- has_fsl_hypervisor
- fsl_hypervisor_init
Improve your Profiling and Debugging skills
Find out more