1// SPDX-License-Identifier: GPL-2.0
2/*
3 * ACRN_HSM: Handle I/O requests
4 *
5 * Copyright (C) 2020 Intel Corporation. All rights reserved.
6 *
7 * Authors:
8 * Jason Chen CJ <jason.cj.chen@intel.com>
9 * Fengwei Yin <fengwei.yin@intel.com>
10 */
11
12#include <linux/interrupt.h>
13#include <linux/io.h>
14#include <linux/kthread.h>
15#include <linux/mm.h>
16#include <linux/slab.h>
17
18#include <asm/acrn.h>
19
20#include "acrn_drv.h"
21
22static void ioreq_pause(void);
23static void ioreq_resume(void);
24
25static void ioreq_dispatcher(struct work_struct *work);
26static struct workqueue_struct *ioreq_wq;
27static DECLARE_WORK(ioreq_work, ioreq_dispatcher);
28
29static inline bool has_pending_request(struct acrn_ioreq_client *client)
30{
31 return !bitmap_empty(src: client->ioreqs_map, ACRN_IO_REQUEST_MAX);
32}
33
34static inline bool is_destroying(struct acrn_ioreq_client *client)
35{
36 return test_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags);
37}
38
39static int ioreq_complete_request(struct acrn_vm *vm, u16 vcpu,
40 struct acrn_io_request *acrn_req)
41{
42 bool polling_mode;
43 int ret = 0;
44
45 polling_mode = acrn_req->completion_polling;
46 /* Add barrier() to make sure the writes are done before completion */
47 smp_store_release(&acrn_req->processed, ACRN_IOREQ_STATE_COMPLETE);
48
49 /*
50 * To fulfill the requirement of real-time in several industry
51 * scenarios, like automotive, ACRN can run under the partition mode,
52 * in which User VMs and Service VM are bound to dedicated CPU cores.
53 * Polling mode of handling the I/O request is introduced to achieve a
54 * faster I/O request handling. In polling mode, the hypervisor polls
55 * I/O request's completion. Once an I/O request is marked as
56 * ACRN_IOREQ_STATE_COMPLETE, hypervisor resumes from the polling point
57 * to continue the I/O request flow. Thus, the completion notification
58 * from HSM of I/O request is not needed. Please note,
59 * completion_polling needs to be read before the I/O request being
60 * marked as ACRN_IOREQ_STATE_COMPLETE to avoid racing with the
61 * hypervisor.
62 */
63 if (!polling_mode) {
64 ret = hcall_notify_req_finish(vmid: vm->vmid, vcpu);
65 if (ret < 0)
66 dev_err(acrn_dev.this_device,
67 "Notify I/O request finished failed!\n");
68 }
69
70 return ret;
71}
72
73static int acrn_ioreq_complete_request(struct acrn_ioreq_client *client,
74 u16 vcpu,
75 struct acrn_io_request *acrn_req)
76{
77 int ret;
78
79 if (vcpu >= client->vm->vcpu_num)
80 return -EINVAL;
81
82 clear_bit(nr: vcpu, addr: client->ioreqs_map);
83 if (!acrn_req) {
84 acrn_req = (struct acrn_io_request *)client->vm->ioreq_buf;
85 acrn_req += vcpu;
86 }
87
88 ret = ioreq_complete_request(vm: client->vm, vcpu, acrn_req);
89
90 return ret;
91}
92
93int acrn_ioreq_request_default_complete(struct acrn_vm *vm, u16 vcpu)
94{
95 int ret = 0;
96
97 spin_lock_bh(lock: &vm->ioreq_clients_lock);
98 if (vm->default_client)
99 ret = acrn_ioreq_complete_request(client: vm->default_client,
100 vcpu, NULL);
101 spin_unlock_bh(lock: &vm->ioreq_clients_lock);
102
103 return ret;
104}
105
106/**
107 * acrn_ioreq_range_add() - Add an iorange monitored by an ioreq client
108 * @client: The ioreq client
109 * @type: Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO)
110 * @start: Start address of iorange
111 * @end: End address of iorange
112 *
113 * Return: 0 on success, <0 on error
114 */
115int acrn_ioreq_range_add(struct acrn_ioreq_client *client,
116 u32 type, u64 start, u64 end)
117{
118 struct acrn_ioreq_range *range;
119
120 if (end < start) {
121 dev_err(acrn_dev.this_device,
122 "Invalid IO range [0x%llx,0x%llx]\n", start, end);
123 return -EINVAL;
124 }
125
126 range = kzalloc(size: sizeof(*range), GFP_KERNEL);
127 if (!range)
128 return -ENOMEM;
129
130 range->type = type;
131 range->start = start;
132 range->end = end;
133
134 write_lock_bh(&client->range_lock);
135 list_add(new: &range->list, head: &client->range_list);
136 write_unlock_bh(&client->range_lock);
137
138 return 0;
139}
140
141/**
142 * acrn_ioreq_range_del() - Del an iorange monitored by an ioreq client
143 * @client: The ioreq client
144 * @type: Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO)
145 * @start: Start address of iorange
146 * @end: End address of iorange
147 */
148void acrn_ioreq_range_del(struct acrn_ioreq_client *client,
149 u32 type, u64 start, u64 end)
150{
151 struct acrn_ioreq_range *range;
152
153 write_lock_bh(&client->range_lock);
154 list_for_each_entry(range, &client->range_list, list) {
155 if (type == range->type &&
156 start == range->start &&
157 end == range->end) {
158 list_del(entry: &range->list);
159 kfree(objp: range);
160 break;
161 }
162 }
163 write_unlock_bh(&client->range_lock);
164}
165
166/*
167 * ioreq_task() is the execution entity of handler thread of an I/O client.
168 * The handler callback of the I/O client is called within the handler thread.
169 */
170static int ioreq_task(void *data)
171{
172 struct acrn_ioreq_client *client = data;
173 struct acrn_io_request *req;
174 unsigned long *ioreqs_map;
175 int vcpu, ret;
176
177 /*
178 * Lockless access to ioreqs_map is safe, because
179 * 1) set_bit() and clear_bit() are atomic operations.
180 * 2) I/O requests arrives serialized. The access flow of ioreqs_map is:
181 * set_bit() - in ioreq_work handler
182 * Handler callback handles corresponding I/O request
183 * clear_bit() - in handler thread (include ACRN userspace)
184 * Mark corresponding I/O request completed
185 * Loop again if a new I/O request occurs
186 */
187 ioreqs_map = client->ioreqs_map;
188 while (!kthread_should_stop()) {
189 acrn_ioreq_client_wait(client);
190 while (has_pending_request(client)) {
191 vcpu = find_first_bit(addr: ioreqs_map, size: client->vm->vcpu_num);
192 req = client->vm->ioreq_buf->req_slot + vcpu;
193 ret = client->handler(client, req);
194 if (ret < 0) {
195 dev_err(acrn_dev.this_device,
196 "IO handle failure: %d\n", ret);
197 break;
198 }
199 acrn_ioreq_complete_request(client, vcpu, acrn_req: req);
200 }
201 }
202
203 return 0;
204}
205
206/*
207 * For the non-default I/O clients, give them chance to complete the current
208 * I/O requests if there are any. For the default I/O client, it is safe to
209 * clear all pending I/O requests because the clearing request is from ACRN
210 * userspace.
211 */
212void acrn_ioreq_request_clear(struct acrn_vm *vm)
213{
214 struct acrn_ioreq_client *client;
215 bool has_pending = false;
216 unsigned long vcpu;
217 int retry = 10;
218
219 /*
220 * IO requests of this VM will be completed directly in
221 * acrn_ioreq_dispatch if ACRN_VM_FLAG_CLEARING_IOREQ flag is set.
222 */
223 set_bit(ACRN_VM_FLAG_CLEARING_IOREQ, addr: &vm->flags);
224
225 /*
226 * acrn_ioreq_request_clear is only called in VM reset case. Simply
227 * wait 100ms in total for the IO requests' completion.
228 */
229 do {
230 spin_lock_bh(lock: &vm->ioreq_clients_lock);
231 list_for_each_entry(client, &vm->ioreq_clients, list) {
232 has_pending = has_pending_request(client);
233 if (has_pending)
234 break;
235 }
236 spin_unlock_bh(lock: &vm->ioreq_clients_lock);
237
238 if (has_pending)
239 schedule_timeout_interruptible(HZ / 100);
240 } while (has_pending && --retry > 0);
241 if (retry == 0)
242 dev_warn(acrn_dev.this_device,
243 "%s cannot flush pending request!\n", client->name);
244
245 /* Clear all ioreqs belonging to the default client */
246 spin_lock_bh(lock: &vm->ioreq_clients_lock);
247 client = vm->default_client;
248 if (client) {
249 for_each_set_bit(vcpu, client->ioreqs_map, ACRN_IO_REQUEST_MAX)
250 acrn_ioreq_complete_request(client, vcpu, NULL);
251 }
252 spin_unlock_bh(lock: &vm->ioreq_clients_lock);
253
254 /* Clear ACRN_VM_FLAG_CLEARING_IOREQ flag after the clearing */
255 clear_bit(ACRN_VM_FLAG_CLEARING_IOREQ, addr: &vm->flags);
256}
257
258int acrn_ioreq_client_wait(struct acrn_ioreq_client *client)
259{
260 if (client->is_default) {
261 /*
262 * In the default client, a user space thread waits on the
263 * waitqueue. The is_destroying() check is used to notify user
264 * space the client is going to be destroyed.
265 */
266 wait_event_interruptible(client->wq,
267 has_pending_request(client) ||
268 is_destroying(client));
269 if (is_destroying(client))
270 return -ENODEV;
271 } else {
272 wait_event_interruptible(client->wq,
273 has_pending_request(client) ||
274 kthread_should_stop());
275 }
276
277 return 0;
278}
279
280static bool is_cfg_addr(struct acrn_io_request *req)
281{
282 return ((req->type == ACRN_IOREQ_TYPE_PORTIO) &&
283 (req->reqs.pio_request.address == 0xcf8));
284}
285
286static bool is_cfg_data(struct acrn_io_request *req)
287{
288 return ((req->type == ACRN_IOREQ_TYPE_PORTIO) &&
289 ((req->reqs.pio_request.address >= 0xcfc) &&
290 (req->reqs.pio_request.address < (0xcfc + 4))));
291}
292
293/* The low 8-bit of supported pci_reg addr.*/
294#define PCI_LOWREG_MASK 0xFC
295/* The high 4-bit of supported pci_reg addr */
296#define PCI_HIGHREG_MASK 0xF00
297/* Max number of supported functions */
298#define PCI_FUNCMAX 7
299/* Max number of supported slots */
300#define PCI_SLOTMAX 31
301/* Max number of supported buses */
302#define PCI_BUSMAX 255
303#define CONF1_ENABLE 0x80000000UL
304/*
305 * A PCI configuration space access via PIO 0xCF8 and 0xCFC normally has two
306 * following steps:
307 * 1) writes address into 0xCF8 port
308 * 2) accesses data in/from 0xCFC
309 * This function combines such paired PCI configuration space I/O requests into
310 * one ACRN_IOREQ_TYPE_PCICFG type I/O request and continues the processing.
311 */
312static bool handle_cf8cfc(struct acrn_vm *vm,
313 struct acrn_io_request *req, u16 vcpu)
314{
315 int offset, pci_cfg_addr, pci_reg;
316 bool is_handled = false;
317
318 if (is_cfg_addr(req)) {
319 WARN_ON(req->reqs.pio_request.size != 4);
320 if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_WRITE)
321 vm->pci_conf_addr = req->reqs.pio_request.value;
322 else
323 req->reqs.pio_request.value = vm->pci_conf_addr;
324 is_handled = true;
325 } else if (is_cfg_data(req)) {
326 if (!(vm->pci_conf_addr & CONF1_ENABLE)) {
327 if (req->reqs.pio_request.direction ==
328 ACRN_IOREQ_DIR_READ)
329 req->reqs.pio_request.value = 0xffffffff;
330 is_handled = true;
331 } else {
332 offset = req->reqs.pio_request.address - 0xcfc;
333
334 req->type = ACRN_IOREQ_TYPE_PCICFG;
335 pci_cfg_addr = vm->pci_conf_addr;
336 req->reqs.pci_request.bus =
337 (pci_cfg_addr >> 16) & PCI_BUSMAX;
338 req->reqs.pci_request.dev =
339 (pci_cfg_addr >> 11) & PCI_SLOTMAX;
340 req->reqs.pci_request.func =
341 (pci_cfg_addr >> 8) & PCI_FUNCMAX;
342 pci_reg = (pci_cfg_addr & PCI_LOWREG_MASK) +
343 ((pci_cfg_addr >> 16) & PCI_HIGHREG_MASK);
344 req->reqs.pci_request.reg = pci_reg + offset;
345 }
346 }
347
348 if (is_handled)
349 ioreq_complete_request(vm, vcpu, acrn_req: req);
350
351 return is_handled;
352}
353
354static bool acrn_in_range(struct acrn_ioreq_range *range,
355 struct acrn_io_request *req)
356{
357 bool ret = false;
358
359 if (range->type == req->type) {
360 switch (req->type) {
361 case ACRN_IOREQ_TYPE_MMIO:
362 if (req->reqs.mmio_request.address >= range->start &&
363 (req->reqs.mmio_request.address +
364 req->reqs.mmio_request.size - 1) <= range->end)
365 ret = true;
366 break;
367 case ACRN_IOREQ_TYPE_PORTIO:
368 if (req->reqs.pio_request.address >= range->start &&
369 (req->reqs.pio_request.address +
370 req->reqs.pio_request.size - 1) <= range->end)
371 ret = true;
372 break;
373 default:
374 break;
375 }
376 }
377
378 return ret;
379}
380
381static struct acrn_ioreq_client *find_ioreq_client(struct acrn_vm *vm,
382 struct acrn_io_request *req)
383{
384 struct acrn_ioreq_client *client, *found = NULL;
385 struct acrn_ioreq_range *range;
386
387 lockdep_assert_held(&vm->ioreq_clients_lock);
388
389 list_for_each_entry(client, &vm->ioreq_clients, list) {
390 read_lock_bh(&client->range_lock);
391 list_for_each_entry(range, &client->range_list, list) {
392 if (acrn_in_range(range, req)) {
393 found = client;
394 break;
395 }
396 }
397 read_unlock_bh(&client->range_lock);
398 if (found)
399 break;
400 }
401 return found ? found : vm->default_client;
402}
403
404/**
405 * acrn_ioreq_client_create() - Create an ioreq client
406 * @vm: The VM that this client belongs to
407 * @handler: The ioreq_handler of ioreq client acrn_hsm will create a kernel
408 * thread and call the handler to handle I/O requests.
409 * @priv: Private data for the handler
410 * @is_default: If it is the default client
411 * @name: The name of ioreq client
412 *
413 * Return: acrn_ioreq_client pointer on success, NULL on error
414 */
415struct acrn_ioreq_client *acrn_ioreq_client_create(struct acrn_vm *vm,
416 ioreq_handler_t handler,
417 void *priv, bool is_default,
418 const char *name)
419{
420 struct acrn_ioreq_client *client;
421
422 if (!handler && !is_default) {
423 dev_dbg(acrn_dev.this_device,
424 "Cannot create non-default client w/o handler!\n");
425 return NULL;
426 }
427 client = kzalloc(size: sizeof(*client), GFP_KERNEL);
428 if (!client)
429 return NULL;
430
431 client->handler = handler;
432 client->vm = vm;
433 client->priv = priv;
434 client->is_default = is_default;
435 if (name)
436 strncpy(p: client->name, q: name, size: sizeof(client->name) - 1);
437 rwlock_init(&client->range_lock);
438 INIT_LIST_HEAD(list: &client->range_list);
439 init_waitqueue_head(&client->wq);
440
441 if (client->handler) {
442 client->thread = kthread_run(ioreq_task, client, "VM%u-%s",
443 client->vm->vmid, client->name);
444 if (IS_ERR(ptr: client->thread)) {
445 kfree(objp: client);
446 return NULL;
447 }
448 }
449
450 spin_lock_bh(lock: &vm->ioreq_clients_lock);
451 if (is_default)
452 vm->default_client = client;
453 else
454 list_add(new: &client->list, head: &vm->ioreq_clients);
455 spin_unlock_bh(lock: &vm->ioreq_clients_lock);
456
457 dev_dbg(acrn_dev.this_device, "Created ioreq client %s.\n", name);
458 return client;
459}
460
461/**
462 * acrn_ioreq_client_destroy() - Destroy an ioreq client
463 * @client: The ioreq client
464 */
465void acrn_ioreq_client_destroy(struct acrn_ioreq_client *client)
466{
467 struct acrn_ioreq_range *range, *next;
468 struct acrn_vm *vm = client->vm;
469
470 dev_dbg(acrn_dev.this_device,
471 "Destroy ioreq client %s.\n", client->name);
472 ioreq_pause();
473 set_bit(ACRN_IOREQ_CLIENT_DESTROYING, addr: &client->flags);
474 if (client->is_default)
475 wake_up_interruptible(&client->wq);
476 else
477 kthread_stop(k: client->thread);
478
479 spin_lock_bh(lock: &vm->ioreq_clients_lock);
480 if (client->is_default)
481 vm->default_client = NULL;
482 else
483 list_del(entry: &client->list);
484 spin_unlock_bh(lock: &vm->ioreq_clients_lock);
485
486 write_lock_bh(&client->range_lock);
487 list_for_each_entry_safe(range, next, &client->range_list, list) {
488 list_del(entry: &range->list);
489 kfree(objp: range);
490 }
491 write_unlock_bh(&client->range_lock);
492 kfree(objp: client);
493
494 ioreq_resume();
495}
496
497static int acrn_ioreq_dispatch(struct acrn_vm *vm)
498{
499 struct acrn_ioreq_client *client;
500 struct acrn_io_request *req;
501 int i;
502
503 for (i = 0; i < vm->vcpu_num; i++) {
504 req = vm->ioreq_buf->req_slot + i;
505
506 /* barrier the read of processed of acrn_io_request */
507 if (smp_load_acquire(&req->processed) ==
508 ACRN_IOREQ_STATE_PENDING) {
509 /* Complete the IO request directly in clearing stage */
510 if (test_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags)) {
511 ioreq_complete_request(vm, vcpu: i, acrn_req: req);
512 continue;
513 }
514 if (handle_cf8cfc(vm, req, vcpu: i))
515 continue;
516
517 spin_lock_bh(lock: &vm->ioreq_clients_lock);
518 client = find_ioreq_client(vm, req);
519 if (!client) {
520 dev_err(acrn_dev.this_device,
521 "Failed to find ioreq client!\n");
522 spin_unlock_bh(lock: &vm->ioreq_clients_lock);
523 return -EINVAL;
524 }
525 if (!client->is_default)
526 req->kernel_handled = 1;
527 else
528 req->kernel_handled = 0;
529 /*
530 * Add barrier() to make sure the writes are done
531 * before setting ACRN_IOREQ_STATE_PROCESSING
532 */
533 smp_store_release(&req->processed,
534 ACRN_IOREQ_STATE_PROCESSING);
535 set_bit(nr: i, addr: client->ioreqs_map);
536 wake_up_interruptible(&client->wq);
537 spin_unlock_bh(lock: &vm->ioreq_clients_lock);
538 }
539 }
540
541 return 0;
542}
543
544static void ioreq_dispatcher(struct work_struct *work)
545{
546 struct acrn_vm *vm;
547
548 read_lock(&acrn_vm_list_lock);
549 list_for_each_entry(vm, &acrn_vm_list, list) {
550 if (!vm->ioreq_buf)
551 break;
552 acrn_ioreq_dispatch(vm);
553 }
554 read_unlock(&acrn_vm_list_lock);
555}
556
557static void ioreq_intr_handler(void)
558{
559 queue_work(wq: ioreq_wq, work: &ioreq_work);
560}
561
562static void ioreq_pause(void)
563{
564 /* Flush and unarm the handler to ensure no I/O requests pending */
565 acrn_remove_intr_handler();
566 drain_workqueue(wq: ioreq_wq);
567}
568
569static void ioreq_resume(void)
570{
571 /* Schedule after enabling in case other clients miss interrupt */
572 acrn_setup_intr_handler(handler: ioreq_intr_handler);
573 queue_work(wq: ioreq_wq, work: &ioreq_work);
574}
575
576int acrn_ioreq_intr_setup(void)
577{
578 acrn_setup_intr_handler(handler: ioreq_intr_handler);
579 ioreq_wq = alloc_ordered_workqueue("ioreq_wq",
580 WQ_HIGHPRI | WQ_MEM_RECLAIM);
581 if (!ioreq_wq) {
582 dev_err(acrn_dev.this_device, "Failed to alloc workqueue!\n");
583 acrn_remove_intr_handler();
584 return -ENOMEM;
585 }
586 return 0;
587}
588
589void acrn_ioreq_intr_remove(void)
590{
591 if (ioreq_wq)
592 destroy_workqueue(wq: ioreq_wq);
593 acrn_remove_intr_handler();
594}
595
596int acrn_ioreq_init(struct acrn_vm *vm, u64 buf_vma)
597{
598 struct acrn_ioreq_buffer *set_buffer;
599 struct page *page;
600 int ret;
601
602 if (vm->ioreq_buf)
603 return -EEXIST;
604
605 set_buffer = kzalloc(size: sizeof(*set_buffer), GFP_KERNEL);
606 if (!set_buffer)
607 return -ENOMEM;
608
609 ret = pin_user_pages_fast(start: buf_vma, nr_pages: 1,
610 gup_flags: FOLL_WRITE | FOLL_LONGTERM, pages: &page);
611 if (unlikely(ret != 1) || !page) {
612 dev_err(acrn_dev.this_device, "Failed to pin ioreq page!\n");
613 ret = -EFAULT;
614 goto free_buf;
615 }
616
617 vm->ioreq_buf = page_address(page);
618 vm->ioreq_page = page;
619 set_buffer->ioreq_buf = page_to_phys(page);
620 ret = hcall_set_ioreq_buffer(vmid: vm->vmid, virt_to_phys(address: set_buffer));
621 if (ret < 0) {
622 dev_err(acrn_dev.this_device, "Failed to init ioreq buffer!\n");
623 unpin_user_page(page);
624 vm->ioreq_buf = NULL;
625 goto free_buf;
626 }
627
628 dev_dbg(acrn_dev.this_device,
629 "Init ioreq buffer %pK!\n", vm->ioreq_buf);
630 ret = 0;
631free_buf:
632 kfree(objp: set_buffer);
633 return ret;
634}
635
636void acrn_ioreq_deinit(struct acrn_vm *vm)
637{
638 struct acrn_ioreq_client *client, *next;
639
640 dev_dbg(acrn_dev.this_device,
641 "Deinit ioreq buffer %pK!\n", vm->ioreq_buf);
642 /* Destroy all clients belonging to this VM */
643 list_for_each_entry_safe(client, next, &vm->ioreq_clients, list)
644 acrn_ioreq_client_destroy(client);
645 if (vm->default_client)
646 acrn_ioreq_client_destroy(client: vm->default_client);
647
648 if (vm->ioreq_buf && vm->ioreq_page) {
649 unpin_user_page(page: vm->ioreq_page);
650 vm->ioreq_buf = NULL;
651 }
652}
653

source code of linux/drivers/virt/acrn/ioreq.c