1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* |
3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/slab.h> |
26 | #include <linux/list.h> |
27 | #include "kfd_device_queue_manager.h" |
28 | #include "kfd_priv.h" |
29 | #include "kfd_kernel_queue.h" |
30 | #include "amdgpu_amdkfd.h" |
31 | |
32 | static inline struct process_queue_node *get_queue_by_qid( |
33 | struct process_queue_manager *pqm, unsigned int qid) |
34 | { |
35 | struct process_queue_node *pqn; |
36 | |
37 | list_for_each_entry(pqn, &pqm->queues, process_queue_list) { |
38 | if ((pqn->q && pqn->q->properties.queue_id == qid) || |
39 | (pqn->kq && pqn->kq->queue->properties.queue_id == qid)) |
40 | return pqn; |
41 | } |
42 | |
43 | return NULL; |
44 | } |
45 | |
46 | static int assign_queue_slot_by_qid(struct process_queue_manager *pqm, |
47 | unsigned int qid) |
48 | { |
49 | if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) |
50 | return -EINVAL; |
51 | |
52 | if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) { |
53 | pr_err("Cannot create new queue because requested qid(%u) is in use\n" , qid); |
54 | return -ENOSPC; |
55 | } |
56 | |
57 | return 0; |
58 | } |
59 | |
60 | static int find_available_queue_slot(struct process_queue_manager *pqm, |
61 | unsigned int *qid) |
62 | { |
63 | unsigned long found; |
64 | |
65 | found = find_first_zero_bit(pqm->queue_slot_bitmap, |
66 | KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); |
67 | |
68 | pr_debug("The new slot id %lu\n" , found); |
69 | |
70 | if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { |
71 | pr_info("Cannot open more queues for process with pasid 0x%x\n" , |
72 | pqm->process->pasid); |
73 | return -ENOMEM; |
74 | } |
75 | |
76 | set_bit(found, pqm->queue_slot_bitmap); |
77 | *qid = found; |
78 | |
79 | return 0; |
80 | } |
81 | |
82 | void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) |
83 | { |
84 | struct kfd_dev *dev = pdd->dev; |
85 | |
86 | if (pdd->already_dequeued) |
87 | return; |
88 | |
89 | dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); |
90 | pdd->already_dequeued = true; |
91 | } |
92 | |
93 | int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, |
94 | void *gws) |
95 | { |
96 | struct kfd_dev *dev = NULL; |
97 | struct process_queue_node *pqn; |
98 | struct kfd_process_device *pdd; |
99 | struct kgd_mem *mem = NULL; |
100 | int ret; |
101 | |
102 | pqn = get_queue_by_qid(pqm, qid); |
103 | if (!pqn) { |
104 | pr_err("Queue id does not match any known queue\n" ); |
105 | return -EINVAL; |
106 | } |
107 | |
108 | if (pqn->q) |
109 | dev = pqn->q->device; |
110 | if (WARN_ON(!dev)) |
111 | return -ENODEV; |
112 | |
113 | pdd = kfd_get_process_device_data(dev, pqm->process); |
114 | if (!pdd) { |
115 | pr_err("Process device data doesn't exist\n" ); |
116 | return -EINVAL; |
117 | } |
118 | |
119 | /* Only allow one queue per process can have GWS assigned */ |
120 | if (gws && pdd->qpd.num_gws) |
121 | return -EBUSY; |
122 | |
123 | if (!gws && pdd->qpd.num_gws == 0) |
124 | return -EINVAL; |
125 | |
126 | if (gws) |
127 | ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, |
128 | gws, &mem); |
129 | else |
130 | ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, |
131 | pqn->q->gws); |
132 | if (unlikely(ret)) |
133 | return ret; |
134 | |
135 | pqn->q->gws = mem; |
136 | pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; |
137 | |
138 | return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, |
139 | pqn->q, NULL); |
140 | } |
141 | |
142 | void kfd_process_dequeue_from_all_devices(struct kfd_process *p) |
143 | { |
144 | int i; |
145 | |
146 | for (i = 0; i < p->n_pdds; i++) |
147 | kfd_process_dequeue_from_device(p->pdds[i]); |
148 | } |
149 | |
150 | int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) |
151 | { |
152 | INIT_LIST_HEAD(&pqm->queues); |
153 | pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, |
154 | GFP_KERNEL); |
155 | if (!pqm->queue_slot_bitmap) |
156 | return -ENOMEM; |
157 | pqm->process = p; |
158 | |
159 | return 0; |
160 | } |
161 | |
162 | void pqm_uninit(struct process_queue_manager *pqm) |
163 | { |
164 | struct process_queue_node *pqn, *next; |
165 | |
166 | list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { |
167 | if (pqn->q && pqn->q->gws) |
168 | amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, |
169 | pqn->q->gws); |
170 | kfd_procfs_del_queue(pqn->q); |
171 | uninit_queue(pqn->q); |
172 | list_del(&pqn->process_queue_list); |
173 | kfree(pqn); |
174 | } |
175 | |
176 | bitmap_free(pqm->queue_slot_bitmap); |
177 | pqm->queue_slot_bitmap = NULL; |
178 | } |
179 | |
180 | static int init_user_queue(struct process_queue_manager *pqm, |
181 | struct kfd_dev *dev, struct queue **q, |
182 | struct queue_properties *q_properties, |
183 | struct file *f, struct amdgpu_bo *wptr_bo, |
184 | unsigned int qid) |
185 | { |
186 | int retval; |
187 | |
188 | /* Doorbell initialized in user space*/ |
189 | q_properties->doorbell_ptr = NULL; |
190 | |
191 | /* let DQM handle it*/ |
192 | q_properties->vmid = 0; |
193 | q_properties->queue_id = qid; |
194 | |
195 | retval = init_queue(q, q_properties); |
196 | if (retval != 0) |
197 | return retval; |
198 | |
199 | (*q)->device = dev; |
200 | (*q)->process = pqm->process; |
201 | |
202 | if (dev->shared_resources.enable_mes) { |
203 | retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, |
204 | AMDGPU_MES_GANG_CTX_SIZE, |
205 | &(*q)->gang_ctx_bo, |
206 | &(*q)->gang_ctx_gpu_addr, |
207 | &(*q)->gang_ctx_cpu_ptr, |
208 | false); |
209 | if (retval) { |
210 | pr_err("failed to allocate gang context bo\n" ); |
211 | goto cleanup; |
212 | } |
213 | memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); |
214 | (*q)->wptr_bo = wptr_bo; |
215 | } |
216 | |
217 | pr_debug("PQM After init queue" ); |
218 | return 0; |
219 | |
220 | cleanup: |
221 | if (dev->shared_resources.enable_mes) |
222 | uninit_queue(*q); |
223 | return retval; |
224 | } |
225 | |
226 | int pqm_create_queue(struct process_queue_manager *pqm, |
227 | struct kfd_dev *dev, |
228 | struct file *f, |
229 | struct queue_properties *properties, |
230 | unsigned int *qid, |
231 | struct amdgpu_bo *wptr_bo, |
232 | const struct kfd_criu_queue_priv_data *q_data, |
233 | const void *restore_mqd, |
234 | const void *restore_ctl_stack, |
235 | uint32_t *p_doorbell_offset_in_process) |
236 | { |
237 | int retval; |
238 | struct kfd_process_device *pdd; |
239 | struct queue *q; |
240 | struct process_queue_node *pqn; |
241 | struct kernel_queue *kq; |
242 | enum kfd_queue_type type = properties->type; |
243 | unsigned int max_queues = 127; /* HWS limit */ |
244 | |
245 | q = NULL; |
246 | kq = NULL; |
247 | |
248 | pdd = kfd_get_process_device_data(dev, pqm->process); |
249 | if (!pdd) { |
250 | pr_err("Process device data doesn't exist\n" ); |
251 | return -1; |
252 | } |
253 | |
254 | /* |
255 | * for debug process, verify that it is within the static queues limit |
256 | * currently limit is set to half of the total avail HQD slots |
257 | * If we are just about to create DIQ, the is_debug flag is not set yet |
258 | * Hence we also check the type as well |
259 | */ |
260 | if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) |
261 | max_queues = dev->device_info.max_no_of_hqd/2; |
262 | |
263 | if (pdd->qpd.queue_count >= max_queues) |
264 | return -ENOSPC; |
265 | |
266 | if (q_data) { |
267 | retval = assign_queue_slot_by_qid(pqm, q_data->q_id); |
268 | *qid = q_data->q_id; |
269 | } else |
270 | retval = find_available_queue_slot(pqm, qid); |
271 | |
272 | if (retval != 0) |
273 | return retval; |
274 | |
275 | if (list_empty(&pdd->qpd.queues_list) && |
276 | list_empty(&pdd->qpd.priv_queue_list)) |
277 | dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); |
278 | |
279 | pqn = kzalloc(sizeof(*pqn), GFP_KERNEL); |
280 | if (!pqn) { |
281 | retval = -ENOMEM; |
282 | goto err_allocate_pqn; |
283 | } |
284 | |
285 | switch (type) { |
286 | case KFD_QUEUE_TYPE_SDMA: |
287 | case KFD_QUEUE_TYPE_SDMA_XGMI: |
288 | /* SDMA queues are always allocated statically no matter |
289 | * which scheduler mode is used. We also do not need to |
290 | * check whether a SDMA queue can be allocated here, because |
291 | * allocate_sdma_queue() in create_queue() has the |
292 | * corresponding check logic. |
293 | */ |
294 | retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); |
295 | if (retval != 0) |
296 | goto err_create_queue; |
297 | pqn->q = q; |
298 | pqn->kq = NULL; |
299 | retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data, |
300 | restore_mqd, restore_ctl_stack); |
301 | print_queue(q); |
302 | break; |
303 | |
304 | case KFD_QUEUE_TYPE_COMPUTE: |
305 | /* check if there is over subscription */ |
306 | if ((dev->dqm->sched_policy == |
307 | KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && |
308 | ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || |
309 | (dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) { |
310 | pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n" ); |
311 | retval = -EPERM; |
312 | goto err_create_queue; |
313 | } |
314 | |
315 | retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); |
316 | if (retval != 0) |
317 | goto err_create_queue; |
318 | pqn->q = q; |
319 | pqn->kq = NULL; |
320 | retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data, |
321 | restore_mqd, restore_ctl_stack); |
322 | print_queue(q); |
323 | break; |
324 | case KFD_QUEUE_TYPE_DIQ: |
325 | kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ); |
326 | if (!kq) { |
327 | retval = -ENOMEM; |
328 | goto err_create_queue; |
329 | } |
330 | kq->queue->properties.queue_id = *qid; |
331 | pqn->kq = kq; |
332 | pqn->q = NULL; |
333 | retval = dev->dqm->ops.create_kernel_queue(dev->dqm, |
334 | kq, &pdd->qpd); |
335 | break; |
336 | default: |
337 | WARN(1, "Invalid queue type %d" , type); |
338 | retval = -EINVAL; |
339 | } |
340 | |
341 | if (retval != 0) { |
342 | pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n" , |
343 | pqm->process->pasid, type, retval); |
344 | goto err_create_queue; |
345 | } |
346 | |
347 | if (q && p_doorbell_offset_in_process) |
348 | /* Return the doorbell offset within the doorbell page |
349 | * to the caller so it can be passed up to user mode |
350 | * (in bytes). |
351 | * There are always 1024 doorbells per process, so in case |
352 | * of 8-byte doorbells, there are two doorbell pages per |
353 | * process. |
354 | */ |
355 | *p_doorbell_offset_in_process = |
356 | (q->properties.doorbell_off * sizeof(uint32_t)) & |
357 | (kfd_doorbell_process_slice(dev) - 1); |
358 | |
359 | pr_debug("PQM After DQM create queue\n" ); |
360 | |
361 | list_add(&pqn->process_queue_list, &pqm->queues); |
362 | |
363 | if (q) { |
364 | pr_debug("PQM done creating queue\n" ); |
365 | kfd_procfs_add_queue(q); |
366 | print_queue_properties(&q->properties); |
367 | } |
368 | |
369 | return retval; |
370 | |
371 | err_create_queue: |
372 | uninit_queue(q); |
373 | if (kq) |
374 | kernel_queue_uninit(kq, false); |
375 | kfree(pqn); |
376 | err_allocate_pqn: |
377 | /* check if queues list is empty unregister process from device */ |
378 | clear_bit(*qid, pqm->queue_slot_bitmap); |
379 | if (list_empty(&pdd->qpd.queues_list) && |
380 | list_empty(&pdd->qpd.priv_queue_list)) |
381 | dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); |
382 | return retval; |
383 | } |
384 | |
385 | int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) |
386 | { |
387 | struct process_queue_node *pqn; |
388 | struct kfd_process_device *pdd; |
389 | struct device_queue_manager *dqm; |
390 | struct kfd_dev *dev; |
391 | int retval; |
392 | |
393 | dqm = NULL; |
394 | |
395 | retval = 0; |
396 | |
397 | pqn = get_queue_by_qid(pqm, qid); |
398 | if (!pqn) { |
399 | pr_err("Queue id does not match any known queue\n" ); |
400 | return -EINVAL; |
401 | } |
402 | |
403 | dev = NULL; |
404 | if (pqn->kq) |
405 | dev = pqn->kq->dev; |
406 | if (pqn->q) |
407 | dev = pqn->q->device; |
408 | if (WARN_ON(!dev)) |
409 | return -ENODEV; |
410 | |
411 | pdd = kfd_get_process_device_data(dev, pqm->process); |
412 | if (!pdd) { |
413 | pr_err("Process device data doesn't exist\n" ); |
414 | return -1; |
415 | } |
416 | |
417 | if (pqn->kq) { |
418 | /* destroy kernel queue (DIQ) */ |
419 | dqm = pqn->kq->dev->dqm; |
420 | dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); |
421 | kernel_queue_uninit(pqn->kq, false); |
422 | } |
423 | |
424 | if (pqn->q) { |
425 | kfd_procfs_del_queue(pqn->q); |
426 | dqm = pqn->q->device->dqm; |
427 | retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); |
428 | if (retval) { |
429 | pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n" , |
430 | pqm->process->pasid, |
431 | pqn->q->properties.queue_id, retval); |
432 | if (retval != -ETIME) |
433 | goto err_destroy_queue; |
434 | } |
435 | |
436 | if (pqn->q->gws) { |
437 | amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, |
438 | pqn->q->gws); |
439 | pdd->qpd.num_gws = 0; |
440 | } |
441 | |
442 | if (dev->shared_resources.enable_mes) { |
443 | amdgpu_amdkfd_free_gtt_mem(dev->adev, |
444 | pqn->q->gang_ctx_bo); |
445 | if (pqn->q->wptr_bo) |
446 | amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); |
447 | |
448 | } |
449 | uninit_queue(pqn->q); |
450 | } |
451 | |
452 | list_del(&pqn->process_queue_list); |
453 | kfree(pqn); |
454 | clear_bit(qid, pqm->queue_slot_bitmap); |
455 | |
456 | if (list_empty(&pdd->qpd.queues_list) && |
457 | list_empty(&pdd->qpd.priv_queue_list)) |
458 | dqm->ops.unregister_process(dqm, &pdd->qpd); |
459 | |
460 | err_destroy_queue: |
461 | return retval; |
462 | } |
463 | |
464 | int pqm_update_queue_properties(struct process_queue_manager *pqm, |
465 | unsigned int qid, struct queue_properties *p) |
466 | { |
467 | int retval; |
468 | struct process_queue_node *pqn; |
469 | |
470 | pqn = get_queue_by_qid(pqm, qid); |
471 | if (!pqn) { |
472 | pr_debug("No queue %d exists for update operation\n" , qid); |
473 | return -EFAULT; |
474 | } |
475 | |
476 | pqn->q->properties.queue_address = p->queue_address; |
477 | pqn->q->properties.queue_size = p->queue_size; |
478 | pqn->q->properties.queue_percent = p->queue_percent; |
479 | pqn->q->properties.priority = p->priority; |
480 | |
481 | retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, |
482 | pqn->q, NULL); |
483 | if (retval != 0) |
484 | return retval; |
485 | |
486 | return 0; |
487 | } |
488 | |
489 | int pqm_update_mqd(struct process_queue_manager *pqm, |
490 | unsigned int qid, struct mqd_update_info *minfo) |
491 | { |
492 | int retval; |
493 | struct process_queue_node *pqn; |
494 | |
495 | pqn = get_queue_by_qid(pqm, qid); |
496 | if (!pqn) { |
497 | pr_debug("No queue %d exists for update operation\n" , qid); |
498 | return -EFAULT; |
499 | } |
500 | |
501 | /* ASICs that have WGPs must enforce pairwise enabled mask checks. */ |
502 | if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK && minfo->cu_mask.ptr && |
503 | KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) { |
504 | int i; |
505 | |
506 | for (i = 0; i < minfo->cu_mask.count; i += 2) { |
507 | uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3; |
508 | |
509 | if (cu_pair && cu_pair != 0x3) { |
510 | pr_debug("CUs must be adjacent pairwise enabled.\n" ); |
511 | return -EINVAL; |
512 | } |
513 | } |
514 | } |
515 | |
516 | retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, |
517 | pqn->q, minfo); |
518 | if (retval != 0) |
519 | return retval; |
520 | |
521 | return 0; |
522 | } |
523 | |
524 | struct kernel_queue *pqm_get_kernel_queue( |
525 | struct process_queue_manager *pqm, |
526 | unsigned int qid) |
527 | { |
528 | struct process_queue_node *pqn; |
529 | |
530 | pqn = get_queue_by_qid(pqm, qid); |
531 | if (pqn && pqn->kq) |
532 | return pqn->kq; |
533 | |
534 | return NULL; |
535 | } |
536 | |
537 | struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, |
538 | unsigned int qid) |
539 | { |
540 | struct process_queue_node *pqn; |
541 | |
542 | pqn = get_queue_by_qid(pqm, qid); |
543 | return pqn ? pqn->q : NULL; |
544 | } |
545 | |
546 | int pqm_get_wave_state(struct process_queue_manager *pqm, |
547 | unsigned int qid, |
548 | void __user *ctl_stack, |
549 | u32 *ctl_stack_used_size, |
550 | u32 *save_area_used_size) |
551 | { |
552 | struct process_queue_node *pqn; |
553 | |
554 | pqn = get_queue_by_qid(pqm, qid); |
555 | if (!pqn) { |
556 | pr_debug("amdkfd: No queue %d exists for operation\n" , |
557 | qid); |
558 | return -EFAULT; |
559 | } |
560 | |
561 | return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm, |
562 | pqn->q, |
563 | ctl_stack, |
564 | ctl_stack_used_size, |
565 | save_area_used_size); |
566 | } |
567 | |
568 | static int get_queue_data_sizes(struct kfd_process_device *pdd, |
569 | struct queue *q, |
570 | uint32_t *mqd_size, |
571 | uint32_t *ctl_stack_size) |
572 | { |
573 | int ret; |
574 | |
575 | ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm, |
576 | q->properties.queue_id, |
577 | mqd_size, |
578 | ctl_stack_size); |
579 | if (ret) |
580 | pr_err("Failed to get queue dump info (%d)\n" , ret); |
581 | |
582 | return ret; |
583 | } |
584 | |
585 | int kfd_process_get_queue_info(struct kfd_process *p, |
586 | uint32_t *num_queues, |
587 | uint64_t *priv_data_sizes) |
588 | { |
589 | uint32_t = 0; |
590 | struct queue *q; |
591 | int i; |
592 | int ret; |
593 | |
594 | *num_queues = 0; |
595 | |
596 | /* Run over all PDDs of the process */ |
597 | for (i = 0; i < p->n_pdds; i++) { |
598 | struct kfd_process_device *pdd = p->pdds[i]; |
599 | |
600 | list_for_each_entry(q, &pdd->qpd.queues_list, list) { |
601 | if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || |
602 | q->properties.type == KFD_QUEUE_TYPE_SDMA || |
603 | q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { |
604 | uint32_t mqd_size, ctl_stack_size; |
605 | |
606 | *num_queues = *num_queues + 1; |
607 | |
608 | ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size); |
609 | if (ret) |
610 | return ret; |
611 | |
612 | extra_data_sizes += mqd_size + ctl_stack_size; |
613 | } else { |
614 | pr_err("Unsupported queue type (%d)\n" , q->properties.type); |
615 | return -EOPNOTSUPP; |
616 | } |
617 | } |
618 | } |
619 | *priv_data_sizes = extra_data_sizes + |
620 | (*num_queues * sizeof(struct kfd_criu_queue_priv_data)); |
621 | |
622 | return 0; |
623 | } |
624 | |
625 | static int pqm_checkpoint_mqd(struct process_queue_manager *pqm, |
626 | unsigned int qid, |
627 | void *mqd, |
628 | void *ctl_stack) |
629 | { |
630 | struct process_queue_node *pqn; |
631 | |
632 | pqn = get_queue_by_qid(pqm, qid); |
633 | if (!pqn) { |
634 | pr_debug("amdkfd: No queue %d exists for operation\n" , qid); |
635 | return -EFAULT; |
636 | } |
637 | |
638 | if (!pqn->q->device->dqm->ops.checkpoint_mqd) { |
639 | pr_err("amdkfd: queue dumping not supported on this device\n" ); |
640 | return -EOPNOTSUPP; |
641 | } |
642 | |
643 | return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm, |
644 | pqn->q, mqd, ctl_stack); |
645 | } |
646 | |
647 | static int criu_checkpoint_queue(struct kfd_process_device *pdd, |
648 | struct queue *q, |
649 | struct kfd_criu_queue_priv_data *q_data) |
650 | { |
651 | uint8_t *mqd, *ctl_stack; |
652 | int ret; |
653 | |
654 | mqd = (void *)(q_data + 1); |
655 | ctl_stack = mqd + q_data->mqd_size; |
656 | |
657 | q_data->gpu_id = pdd->user_gpu_id; |
658 | q_data->type = q->properties.type; |
659 | q_data->format = q->properties.format; |
660 | q_data->q_id = q->properties.queue_id; |
661 | q_data->q_address = q->properties.queue_address; |
662 | q_data->q_size = q->properties.queue_size; |
663 | q_data->priority = q->properties.priority; |
664 | q_data->q_percent = q->properties.queue_percent; |
665 | q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr; |
666 | q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr; |
667 | q_data->doorbell_id = q->doorbell_id; |
668 | |
669 | q_data->sdma_id = q->sdma_id; |
670 | |
671 | q_data->eop_ring_buffer_address = |
672 | q->properties.eop_ring_buffer_address; |
673 | |
674 | q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size; |
675 | |
676 | q_data->ctx_save_restore_area_address = |
677 | q->properties.ctx_save_restore_area_address; |
678 | |
679 | q_data->ctx_save_restore_area_size = |
680 | q->properties.ctx_save_restore_area_size; |
681 | |
682 | q_data->gws = !!q->gws; |
683 | |
684 | ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack); |
685 | if (ret) { |
686 | pr_err("Failed checkpoint queue_mqd (%d)\n" , ret); |
687 | return ret; |
688 | } |
689 | |
690 | pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n" , q_data->gpu_id, q_data->q_id); |
691 | return ret; |
692 | } |
693 | |
694 | static int criu_checkpoint_queues_device(struct kfd_process_device *pdd, |
695 | uint8_t __user *user_priv, |
696 | unsigned int *q_index, |
697 | uint64_t *queues_priv_data_offset) |
698 | { |
699 | unsigned int q_private_data_size = 0; |
700 | uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */ |
701 | struct queue *q; |
702 | int ret = 0; |
703 | |
704 | list_for_each_entry(q, &pdd->qpd.queues_list, list) { |
705 | struct kfd_criu_queue_priv_data *q_data; |
706 | uint64_t q_data_size; |
707 | uint32_t mqd_size; |
708 | uint32_t ctl_stack_size; |
709 | |
710 | if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE && |
711 | q->properties.type != KFD_QUEUE_TYPE_SDMA && |
712 | q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) { |
713 | |
714 | pr_err("Unsupported queue type (%d)\n" , q->properties.type); |
715 | ret = -EOPNOTSUPP; |
716 | break; |
717 | } |
718 | |
719 | ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size); |
720 | if (ret) |
721 | break; |
722 | |
723 | q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size; |
724 | |
725 | /* Increase local buffer space if needed */ |
726 | if (q_private_data_size < q_data_size) { |
727 | kfree(q_private_data); |
728 | |
729 | q_private_data = kzalloc(q_data_size, GFP_KERNEL); |
730 | if (!q_private_data) { |
731 | ret = -ENOMEM; |
732 | break; |
733 | } |
734 | q_private_data_size = q_data_size; |
735 | } |
736 | |
737 | q_data = (struct kfd_criu_queue_priv_data *)q_private_data; |
738 | |
739 | /* data stored in this order: priv_data, mqd, ctl_stack */ |
740 | q_data->mqd_size = mqd_size; |
741 | q_data->ctl_stack_size = ctl_stack_size; |
742 | |
743 | ret = criu_checkpoint_queue(pdd, q, q_data); |
744 | if (ret) |
745 | break; |
746 | |
747 | q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE; |
748 | |
749 | ret = copy_to_user(user_priv + *queues_priv_data_offset, |
750 | q_data, q_data_size); |
751 | if (ret) { |
752 | ret = -EFAULT; |
753 | break; |
754 | } |
755 | *queues_priv_data_offset += q_data_size; |
756 | *q_index = *q_index + 1; |
757 | } |
758 | |
759 | kfree(q_private_data); |
760 | |
761 | return ret; |
762 | } |
763 | |
764 | int kfd_criu_checkpoint_queues(struct kfd_process *p, |
765 | uint8_t __user *user_priv_data, |
766 | uint64_t *priv_data_offset) |
767 | { |
768 | int ret = 0, pdd_index, q_index = 0; |
769 | |
770 | for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { |
771 | struct kfd_process_device *pdd = p->pdds[pdd_index]; |
772 | |
773 | /* |
774 | * criu_checkpoint_queues_device will copy data to user and update q_index and |
775 | * queues_priv_data_offset |
776 | */ |
777 | ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index, |
778 | priv_data_offset); |
779 | |
780 | if (ret) |
781 | break; |
782 | } |
783 | |
784 | return ret; |
785 | } |
786 | |
787 | static void set_queue_properties_from_criu(struct queue_properties *qp, |
788 | struct kfd_criu_queue_priv_data *q_data) |
789 | { |
790 | qp->is_interop = false; |
791 | qp->queue_percent = q_data->q_percent; |
792 | qp->priority = q_data->priority; |
793 | qp->queue_address = q_data->q_address; |
794 | qp->queue_size = q_data->q_size; |
795 | qp->read_ptr = (uint32_t *) q_data->read_ptr_addr; |
796 | qp->write_ptr = (uint32_t *) q_data->write_ptr_addr; |
797 | qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address; |
798 | qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size; |
799 | qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address; |
800 | qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size; |
801 | qp->ctl_stack_size = q_data->ctl_stack_size; |
802 | qp->type = q_data->type; |
803 | qp->format = q_data->format; |
804 | } |
805 | |
806 | int kfd_criu_restore_queue(struct kfd_process *p, |
807 | uint8_t __user *user_priv_ptr, |
808 | uint64_t *priv_data_offset, |
809 | uint64_t max_priv_data_size) |
810 | { |
811 | uint8_t *mqd, *ctl_stack, * = NULL; |
812 | struct kfd_criu_queue_priv_data *q_data; |
813 | struct kfd_process_device *pdd; |
814 | uint64_t ; |
815 | struct queue_properties qp; |
816 | unsigned int queue_id; |
817 | int ret = 0; |
818 | |
819 | if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size) |
820 | return -EINVAL; |
821 | |
822 | q_data = kmalloc(sizeof(*q_data), GFP_KERNEL); |
823 | if (!q_data) |
824 | return -ENOMEM; |
825 | |
826 | ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data)); |
827 | if (ret) { |
828 | ret = -EFAULT; |
829 | goto exit; |
830 | } |
831 | |
832 | *priv_data_offset += sizeof(*q_data); |
833 | q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size; |
834 | |
835 | if (*priv_data_offset + q_extra_data_size > max_priv_data_size) { |
836 | ret = -EINVAL; |
837 | goto exit; |
838 | } |
839 | |
840 | q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL); |
841 | if (!q_extra_data) { |
842 | ret = -ENOMEM; |
843 | goto exit; |
844 | } |
845 | |
846 | ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size); |
847 | if (ret) { |
848 | ret = -EFAULT; |
849 | goto exit; |
850 | } |
851 | |
852 | *priv_data_offset += q_extra_data_size; |
853 | |
854 | pdd = kfd_process_device_data_by_id(p, q_data->gpu_id); |
855 | if (!pdd) { |
856 | pr_err("Failed to get pdd\n" ); |
857 | ret = -EINVAL; |
858 | goto exit; |
859 | } |
860 | /* data stored in this order: mqd, ctl_stack */ |
861 | mqd = q_extra_data; |
862 | ctl_stack = mqd + q_data->mqd_size; |
863 | |
864 | memset(&qp, 0, sizeof(qp)); |
865 | set_queue_properties_from_criu(&qp, q_data); |
866 | |
867 | print_queue_properties(&qp); |
868 | |
869 | ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack, |
870 | NULL); |
871 | if (ret) { |
872 | pr_err("Failed to create new queue err:%d\n" , ret); |
873 | goto exit; |
874 | } |
875 | |
876 | if (q_data->gws) |
877 | ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws); |
878 | |
879 | exit: |
880 | if (ret) |
881 | pr_err("Failed to restore queue (%d)\n" , ret); |
882 | else |
883 | pr_debug("Queue id %d was restored successfully\n" , queue_id); |
884 | |
885 | kfree(q_data); |
886 | |
887 | return ret; |
888 | } |
889 | |
890 | int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, |
891 | unsigned int qid, |
892 | uint32_t *mqd_size, |
893 | uint32_t *ctl_stack_size) |
894 | { |
895 | struct process_queue_node *pqn; |
896 | |
897 | pqn = get_queue_by_qid(pqm, qid); |
898 | if (!pqn) { |
899 | pr_debug("amdkfd: No queue %d exists for operation\n" , qid); |
900 | return -EFAULT; |
901 | } |
902 | |
903 | if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) { |
904 | pr_err("amdkfd: queue dumping not supported on this device\n" ); |
905 | return -EOPNOTSUPP; |
906 | } |
907 | |
908 | pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm, |
909 | pqn->q, mqd_size, |
910 | ctl_stack_size); |
911 | return 0; |
912 | } |
913 | |
914 | #if defined(CONFIG_DEBUG_FS) |
915 | |
916 | int pqm_debugfs_mqds(struct seq_file *m, void *data) |
917 | { |
918 | struct process_queue_manager *pqm = data; |
919 | struct process_queue_node *pqn; |
920 | struct queue *q; |
921 | enum KFD_MQD_TYPE mqd_type; |
922 | struct mqd_manager *mqd_mgr; |
923 | int r = 0; |
924 | |
925 | list_for_each_entry(pqn, &pqm->queues, process_queue_list) { |
926 | if (pqn->q) { |
927 | q = pqn->q; |
928 | switch (q->properties.type) { |
929 | case KFD_QUEUE_TYPE_SDMA: |
930 | case KFD_QUEUE_TYPE_SDMA_XGMI: |
931 | seq_printf(m, " SDMA queue on device %x\n" , |
932 | q->device->id); |
933 | mqd_type = KFD_MQD_TYPE_SDMA; |
934 | break; |
935 | case KFD_QUEUE_TYPE_COMPUTE: |
936 | seq_printf(m, " Compute queue on device %x\n" , |
937 | q->device->id); |
938 | mqd_type = KFD_MQD_TYPE_CP; |
939 | break; |
940 | default: |
941 | seq_printf(m, |
942 | " Bad user queue type %d on device %x\n" , |
943 | q->properties.type, q->device->id); |
944 | continue; |
945 | } |
946 | mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; |
947 | } else if (pqn->kq) { |
948 | q = pqn->kq->queue; |
949 | mqd_mgr = pqn->kq->mqd_mgr; |
950 | switch (q->properties.type) { |
951 | case KFD_QUEUE_TYPE_DIQ: |
952 | seq_printf(m, " DIQ on device %x\n" , |
953 | pqn->kq->dev->id); |
954 | break; |
955 | default: |
956 | seq_printf(m, |
957 | " Bad kernel queue type %d on device %x\n" , |
958 | q->properties.type, |
959 | pqn->kq->dev->id); |
960 | continue; |
961 | } |
962 | } else { |
963 | seq_printf(m, |
964 | " Weird: Queue node with neither kernel nor user queue\n" ); |
965 | continue; |
966 | } |
967 | |
968 | r = mqd_mgr->debugfs_show_mqd(m, q->mqd); |
969 | if (r != 0) |
970 | break; |
971 | } |
972 | |
973 | return r; |
974 | } |
975 | |
976 | #endif |
977 | |