1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* |
3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/slab.h> |
26 | #include <linux/list.h> |
27 | #include "kfd_device_queue_manager.h" |
28 | #include "kfd_priv.h" |
29 | #include "kfd_kernel_queue.h" |
30 | #include "amdgpu_amdkfd.h" |
31 | |
32 | static inline struct process_queue_node *get_queue_by_qid( |
33 | struct process_queue_manager *pqm, unsigned int qid) |
34 | { |
35 | struct process_queue_node *pqn; |
36 | |
37 | list_for_each_entry(pqn, &pqm->queues, process_queue_list) { |
38 | if ((pqn->q && pqn->q->properties.queue_id == qid) || |
39 | (pqn->kq && pqn->kq->queue->properties.queue_id == qid)) |
40 | return pqn; |
41 | } |
42 | |
43 | return NULL; |
44 | } |
45 | |
46 | static int assign_queue_slot_by_qid(struct process_queue_manager *pqm, |
47 | unsigned int qid) |
48 | { |
49 | if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) |
50 | return -EINVAL; |
51 | |
52 | if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) { |
53 | pr_err("Cannot create new queue because requested qid(%u) is in use\n" , qid); |
54 | return -ENOSPC; |
55 | } |
56 | |
57 | return 0; |
58 | } |
59 | |
60 | static int find_available_queue_slot(struct process_queue_manager *pqm, |
61 | unsigned int *qid) |
62 | { |
63 | unsigned long found; |
64 | |
65 | found = find_first_zero_bit(addr: pqm->queue_slot_bitmap, |
66 | KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); |
67 | |
68 | pr_debug("The new slot id %lu\n" , found); |
69 | |
70 | if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { |
71 | pr_info("Cannot open more queues for process with pasid 0x%x\n" , |
72 | pqm->process->pasid); |
73 | return -ENOMEM; |
74 | } |
75 | |
76 | set_bit(nr: found, addr: pqm->queue_slot_bitmap); |
77 | *qid = found; |
78 | |
79 | return 0; |
80 | } |
81 | |
82 | void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) |
83 | { |
84 | struct kfd_node *dev = pdd->dev; |
85 | |
86 | if (pdd->already_dequeued) |
87 | return; |
88 | |
89 | dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); |
90 | if (dev->kfd->shared_resources.enable_mes) |
91 | amdgpu_mes_flush_shader_debugger(adev: dev->adev, process_context_addr: pdd->proc_ctx_gpu_addr); |
92 | pdd->already_dequeued = true; |
93 | } |
94 | |
95 | int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, |
96 | void *gws) |
97 | { |
98 | struct mqd_update_info minfo = {0}; |
99 | struct kfd_node *dev = NULL; |
100 | struct process_queue_node *pqn; |
101 | struct kfd_process_device *pdd; |
102 | struct kgd_mem *mem = NULL; |
103 | int ret; |
104 | |
105 | pqn = get_queue_by_qid(pqm, qid); |
106 | if (!pqn) { |
107 | pr_err("Queue id does not match any known queue\n" ); |
108 | return -EINVAL; |
109 | } |
110 | |
111 | if (pqn->q) |
112 | dev = pqn->q->device; |
113 | if (WARN_ON(!dev)) |
114 | return -ENODEV; |
115 | |
116 | pdd = kfd_get_process_device_data(dev, p: pqm->process); |
117 | if (!pdd) { |
118 | pr_err("Process device data doesn't exist\n" ); |
119 | return -EINVAL; |
120 | } |
121 | |
122 | /* Only allow one queue per process can have GWS assigned */ |
123 | if (gws && pdd->qpd.num_gws) |
124 | return -EBUSY; |
125 | |
126 | if (!gws && pdd->qpd.num_gws == 0) |
127 | return -EINVAL; |
128 | |
129 | if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) { |
130 | if (gws) |
131 | ret = amdgpu_amdkfd_add_gws_to_process(info: pdd->process->kgd_process_info, |
132 | gws, mem: &mem); |
133 | else |
134 | ret = amdgpu_amdkfd_remove_gws_from_process(info: pdd->process->kgd_process_info, |
135 | mem: pqn->q->gws); |
136 | if (unlikely(ret)) |
137 | return ret; |
138 | pqn->q->gws = mem; |
139 | } else { |
140 | /* |
141 | * Intentionally set GWS to a non-NULL value |
142 | * for devices that do not use GWS for global wave |
143 | * synchronization but require the formality |
144 | * of setting GWS for cooperative groups. |
145 | */ |
146 | pqn->q->gws = gws ? ERR_PTR(error: -ENOMEM) : NULL; |
147 | } |
148 | |
149 | pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; |
150 | minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0; |
151 | |
152 | return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, |
153 | pqn->q, &minfo); |
154 | } |
155 | |
156 | void kfd_process_dequeue_from_all_devices(struct kfd_process *p) |
157 | { |
158 | int i; |
159 | |
160 | for (i = 0; i < p->n_pdds; i++) |
161 | kfd_process_dequeue_from_device(pdd: p->pdds[i]); |
162 | } |
163 | |
164 | int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) |
165 | { |
166 | INIT_LIST_HEAD(list: &pqm->queues); |
167 | pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, |
168 | GFP_KERNEL); |
169 | if (!pqm->queue_slot_bitmap) |
170 | return -ENOMEM; |
171 | pqm->process = p; |
172 | |
173 | return 0; |
174 | } |
175 | |
176 | static void pqm_clean_queue_resource(struct process_queue_manager *pqm, |
177 | struct process_queue_node *pqn) |
178 | { |
179 | struct kfd_node *dev; |
180 | struct kfd_process_device *pdd; |
181 | |
182 | dev = pqn->q->device; |
183 | |
184 | pdd = kfd_get_process_device_data(dev, p: pqm->process); |
185 | if (!pdd) { |
186 | pr_err("Process device data doesn't exist\n" ); |
187 | return; |
188 | } |
189 | |
190 | if (pqn->q->gws) { |
191 | if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && |
192 | !dev->kfd->shared_resources.enable_mes) |
193 | amdgpu_amdkfd_remove_gws_from_process( |
194 | info: pqm->process->kgd_process_info, mem: pqn->q->gws); |
195 | pdd->qpd.num_gws = 0; |
196 | } |
197 | |
198 | if (dev->kfd->shared_resources.enable_mes) { |
199 | amdgpu_amdkfd_free_gtt_mem(adev: dev->adev, mem_obj: pqn->q->gang_ctx_bo); |
200 | if (pqn->q->wptr_bo) |
201 | amdgpu_amdkfd_free_gtt_mem(adev: dev->adev, mem_obj: pqn->q->wptr_bo); |
202 | } |
203 | } |
204 | |
205 | void pqm_uninit(struct process_queue_manager *pqm) |
206 | { |
207 | struct process_queue_node *pqn, *next; |
208 | |
209 | list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { |
210 | if (pqn->q) |
211 | pqm_clean_queue_resource(pqm, pqn); |
212 | |
213 | kfd_procfs_del_queue(q: pqn->q); |
214 | uninit_queue(q: pqn->q); |
215 | list_del(entry: &pqn->process_queue_list); |
216 | kfree(objp: pqn); |
217 | } |
218 | |
219 | bitmap_free(bitmap: pqm->queue_slot_bitmap); |
220 | pqm->queue_slot_bitmap = NULL; |
221 | } |
222 | |
223 | static int init_user_queue(struct process_queue_manager *pqm, |
224 | struct kfd_node *dev, struct queue **q, |
225 | struct queue_properties *q_properties, |
226 | struct file *f, struct amdgpu_bo *wptr_bo, |
227 | unsigned int qid) |
228 | { |
229 | int retval; |
230 | |
231 | /* Doorbell initialized in user space*/ |
232 | q_properties->doorbell_ptr = NULL; |
233 | q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW); |
234 | |
235 | /* let DQM handle it*/ |
236 | q_properties->vmid = 0; |
237 | q_properties->queue_id = qid; |
238 | |
239 | retval = init_queue(q, properties: q_properties); |
240 | if (retval != 0) |
241 | return retval; |
242 | |
243 | (*q)->device = dev; |
244 | (*q)->process = pqm->process; |
245 | |
246 | if (dev->kfd->shared_resources.enable_mes) { |
247 | retval = amdgpu_amdkfd_alloc_gtt_mem(adev: dev->adev, |
248 | AMDGPU_MES_GANG_CTX_SIZE, |
249 | mem_obj: &(*q)->gang_ctx_bo, |
250 | gpu_addr: &(*q)->gang_ctx_gpu_addr, |
251 | cpu_ptr: &(*q)->gang_ctx_cpu_ptr, |
252 | mqd_gfx9: false); |
253 | if (retval) { |
254 | pr_err("failed to allocate gang context bo\n" ); |
255 | goto cleanup; |
256 | } |
257 | memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); |
258 | (*q)->wptr_bo = wptr_bo; |
259 | } |
260 | |
261 | pr_debug("PQM After init queue" ); |
262 | return 0; |
263 | |
264 | cleanup: |
265 | uninit_queue(q: *q); |
266 | *q = NULL; |
267 | return retval; |
268 | } |
269 | |
270 | int pqm_create_queue(struct process_queue_manager *pqm, |
271 | struct kfd_node *dev, |
272 | struct file *f, |
273 | struct queue_properties *properties, |
274 | unsigned int *qid, |
275 | struct amdgpu_bo *wptr_bo, |
276 | const struct kfd_criu_queue_priv_data *q_data, |
277 | const void *restore_mqd, |
278 | const void *restore_ctl_stack, |
279 | uint32_t *p_doorbell_offset_in_process) |
280 | { |
281 | int retval; |
282 | struct kfd_process_device *pdd; |
283 | struct queue *q; |
284 | struct process_queue_node *pqn; |
285 | struct kernel_queue *kq; |
286 | enum kfd_queue_type type = properties->type; |
287 | unsigned int max_queues = 127; /* HWS limit */ |
288 | |
289 | /* |
290 | * On GFX 9.4.3, increase the number of queues that |
291 | * can be created to 255. No HWS limit on GFX 9.4.3. |
292 | */ |
293 | if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) |
294 | max_queues = 255; |
295 | |
296 | q = NULL; |
297 | kq = NULL; |
298 | |
299 | pdd = kfd_get_process_device_data(dev, p: pqm->process); |
300 | if (!pdd) { |
301 | pr_err("Process device data doesn't exist\n" ); |
302 | return -1; |
303 | } |
304 | |
305 | /* |
306 | * for debug process, verify that it is within the static queues limit |
307 | * currently limit is set to half of the total avail HQD slots |
308 | * If we are just about to create DIQ, the is_debug flag is not set yet |
309 | * Hence we also check the type as well |
310 | */ |
311 | if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) |
312 | max_queues = dev->kfd->device_info.max_no_of_hqd/2; |
313 | |
314 | if (pdd->qpd.queue_count >= max_queues) |
315 | return -ENOSPC; |
316 | |
317 | if (q_data) { |
318 | retval = assign_queue_slot_by_qid(pqm, qid: q_data->q_id); |
319 | *qid = q_data->q_id; |
320 | } else |
321 | retval = find_available_queue_slot(pqm, qid); |
322 | |
323 | if (retval != 0) |
324 | return retval; |
325 | |
326 | if (list_empty(head: &pdd->qpd.queues_list) && |
327 | list_empty(head: &pdd->qpd.priv_queue_list)) |
328 | dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); |
329 | |
330 | pqn = kzalloc(size: sizeof(*pqn), GFP_KERNEL); |
331 | if (!pqn) { |
332 | retval = -ENOMEM; |
333 | goto err_allocate_pqn; |
334 | } |
335 | |
336 | switch (type) { |
337 | case KFD_QUEUE_TYPE_SDMA: |
338 | case KFD_QUEUE_TYPE_SDMA_XGMI: |
339 | /* SDMA queues are always allocated statically no matter |
340 | * which scheduler mode is used. We also do not need to |
341 | * check whether a SDMA queue can be allocated here, because |
342 | * allocate_sdma_queue() in create_queue() has the |
343 | * corresponding check logic. |
344 | */ |
345 | retval = init_user_queue(pqm, dev, q: &q, q_properties: properties, f, wptr_bo, qid: *qid); |
346 | if (retval != 0) |
347 | goto err_create_queue; |
348 | pqn->q = q; |
349 | pqn->kq = NULL; |
350 | retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data, |
351 | restore_mqd, restore_ctl_stack); |
352 | print_queue(q); |
353 | break; |
354 | |
355 | case KFD_QUEUE_TYPE_COMPUTE: |
356 | /* check if there is over subscription */ |
357 | if ((dev->dqm->sched_policy == |
358 | KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && |
359 | ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || |
360 | (dev->dqm->active_queue_count >= get_cp_queues_num(dqm: dev->dqm)))) { |
361 | pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n" ); |
362 | retval = -EPERM; |
363 | goto err_create_queue; |
364 | } |
365 | |
366 | retval = init_user_queue(pqm, dev, q: &q, q_properties: properties, f, wptr_bo, qid: *qid); |
367 | if (retval != 0) |
368 | goto err_create_queue; |
369 | pqn->q = q; |
370 | pqn->kq = NULL; |
371 | retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data, |
372 | restore_mqd, restore_ctl_stack); |
373 | print_queue(q); |
374 | break; |
375 | case KFD_QUEUE_TYPE_DIQ: |
376 | kq = kernel_queue_init(dev, type: KFD_QUEUE_TYPE_DIQ); |
377 | if (!kq) { |
378 | retval = -ENOMEM; |
379 | goto err_create_queue; |
380 | } |
381 | kq->queue->properties.queue_id = *qid; |
382 | pqn->kq = kq; |
383 | pqn->q = NULL; |
384 | retval = kfd_process_drain_interrupts(pdd); |
385 | if (retval) |
386 | break; |
387 | |
388 | retval = dev->dqm->ops.create_kernel_queue(dev->dqm, |
389 | kq, &pdd->qpd); |
390 | break; |
391 | default: |
392 | WARN(1, "Invalid queue type %d" , type); |
393 | retval = -EINVAL; |
394 | } |
395 | |
396 | if (retval != 0) { |
397 | pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n" , |
398 | pqm->process->pasid, type, retval); |
399 | goto err_create_queue; |
400 | } |
401 | |
402 | if (q && p_doorbell_offset_in_process) { |
403 | /* Return the doorbell offset within the doorbell page |
404 | * to the caller so it can be passed up to user mode |
405 | * (in bytes). |
406 | * relative doorbell index = Absolute doorbell index - |
407 | * absolute index of first doorbell in the page. |
408 | */ |
409 | uint32_t first_db_index = amdgpu_doorbell_index_on_bar(adev: pdd->dev->adev, |
410 | db_bo: pdd->qpd.proc_doorbells, |
411 | doorbell_index: 0, |
412 | db_size: pdd->dev->kfd->device_info.doorbell_size); |
413 | |
414 | *p_doorbell_offset_in_process = (q->properties.doorbell_off |
415 | - first_db_index) * sizeof(uint32_t); |
416 | } |
417 | |
418 | pr_debug("PQM After DQM create queue\n" ); |
419 | |
420 | list_add(new: &pqn->process_queue_list, head: &pqm->queues); |
421 | |
422 | if (q) { |
423 | pr_debug("PQM done creating queue\n" ); |
424 | kfd_procfs_add_queue(q); |
425 | print_queue_properties(q: &q->properties); |
426 | } |
427 | |
428 | return retval; |
429 | |
430 | err_create_queue: |
431 | uninit_queue(q); |
432 | if (kq) |
433 | kernel_queue_uninit(kq, hanging: false); |
434 | kfree(objp: pqn); |
435 | err_allocate_pqn: |
436 | /* check if queues list is empty unregister process from device */ |
437 | clear_bit(nr: *qid, addr: pqm->queue_slot_bitmap); |
438 | if (list_empty(head: &pdd->qpd.queues_list) && |
439 | list_empty(head: &pdd->qpd.priv_queue_list)) |
440 | dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); |
441 | return retval; |
442 | } |
443 | |
444 | int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) |
445 | { |
446 | struct process_queue_node *pqn; |
447 | struct kfd_process_device *pdd; |
448 | struct device_queue_manager *dqm; |
449 | struct kfd_node *dev; |
450 | int retval; |
451 | |
452 | dqm = NULL; |
453 | |
454 | retval = 0; |
455 | |
456 | pqn = get_queue_by_qid(pqm, qid); |
457 | if (!pqn) { |
458 | pr_err("Queue id does not match any known queue\n" ); |
459 | return -EINVAL; |
460 | } |
461 | |
462 | dev = NULL; |
463 | if (pqn->kq) |
464 | dev = pqn->kq->dev; |
465 | if (pqn->q) |
466 | dev = pqn->q->device; |
467 | if (WARN_ON(!dev)) |
468 | return -ENODEV; |
469 | |
470 | pdd = kfd_get_process_device_data(dev, p: pqm->process); |
471 | if (!pdd) { |
472 | pr_err("Process device data doesn't exist\n" ); |
473 | return -1; |
474 | } |
475 | |
476 | if (pqn->kq) { |
477 | /* destroy kernel queue (DIQ) */ |
478 | dqm = pqn->kq->dev->dqm; |
479 | dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); |
480 | kernel_queue_uninit(kq: pqn->kq, hanging: false); |
481 | } |
482 | |
483 | if (pqn->q) { |
484 | kfd_procfs_del_queue(q: pqn->q); |
485 | dqm = pqn->q->device->dqm; |
486 | retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); |
487 | if (retval) { |
488 | pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n" , |
489 | pqm->process->pasid, |
490 | pqn->q->properties.queue_id, retval); |
491 | if (retval != -ETIME) |
492 | goto err_destroy_queue; |
493 | } |
494 | |
495 | pqm_clean_queue_resource(pqm, pqn); |
496 | uninit_queue(q: pqn->q); |
497 | } |
498 | |
499 | list_del(entry: &pqn->process_queue_list); |
500 | kfree(objp: pqn); |
501 | clear_bit(nr: qid, addr: pqm->queue_slot_bitmap); |
502 | |
503 | if (list_empty(head: &pdd->qpd.queues_list) && |
504 | list_empty(head: &pdd->qpd.priv_queue_list)) |
505 | dqm->ops.unregister_process(dqm, &pdd->qpd); |
506 | |
507 | err_destroy_queue: |
508 | return retval; |
509 | } |
510 | |
511 | int pqm_update_queue_properties(struct process_queue_manager *pqm, |
512 | unsigned int qid, struct queue_properties *p) |
513 | { |
514 | int retval; |
515 | struct process_queue_node *pqn; |
516 | |
517 | pqn = get_queue_by_qid(pqm, qid); |
518 | if (!pqn) { |
519 | pr_debug("No queue %d exists for update operation\n" , qid); |
520 | return -EFAULT; |
521 | } |
522 | |
523 | pqn->q->properties.queue_address = p->queue_address; |
524 | pqn->q->properties.queue_size = p->queue_size; |
525 | pqn->q->properties.queue_percent = p->queue_percent; |
526 | pqn->q->properties.priority = p->priority; |
527 | pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc; |
528 | |
529 | retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, |
530 | pqn->q, NULL); |
531 | if (retval != 0) |
532 | return retval; |
533 | |
534 | return 0; |
535 | } |
536 | |
537 | int pqm_update_mqd(struct process_queue_manager *pqm, |
538 | unsigned int qid, struct mqd_update_info *minfo) |
539 | { |
540 | int retval; |
541 | struct process_queue_node *pqn; |
542 | |
543 | pqn = get_queue_by_qid(pqm, qid); |
544 | if (!pqn) { |
545 | pr_debug("No queue %d exists for update operation\n" , qid); |
546 | return -EFAULT; |
547 | } |
548 | |
549 | /* CUs are masked for debugger requirements so deny user mask */ |
550 | if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr) |
551 | return -EBUSY; |
552 | |
553 | /* ASICs that have WGPs must enforce pairwise enabled mask checks. */ |
554 | if (minfo && minfo->cu_mask.ptr && |
555 | KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) { |
556 | int i; |
557 | |
558 | for (i = 0; i < minfo->cu_mask.count; i += 2) { |
559 | uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3; |
560 | |
561 | if (cu_pair && cu_pair != 0x3) { |
562 | pr_debug("CUs must be adjacent pairwise enabled.\n" ); |
563 | return -EINVAL; |
564 | } |
565 | } |
566 | } |
567 | |
568 | retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, |
569 | pqn->q, minfo); |
570 | if (retval != 0) |
571 | return retval; |
572 | |
573 | if (minfo && minfo->cu_mask.ptr) |
574 | pqn->q->properties.is_user_cu_masked = true; |
575 | |
576 | return 0; |
577 | } |
578 | |
579 | struct kernel_queue *pqm_get_kernel_queue( |
580 | struct process_queue_manager *pqm, |
581 | unsigned int qid) |
582 | { |
583 | struct process_queue_node *pqn; |
584 | |
585 | pqn = get_queue_by_qid(pqm, qid); |
586 | if (pqn && pqn->kq) |
587 | return pqn->kq; |
588 | |
589 | return NULL; |
590 | } |
591 | |
592 | struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, |
593 | unsigned int qid) |
594 | { |
595 | struct process_queue_node *pqn; |
596 | |
597 | pqn = get_queue_by_qid(pqm, qid); |
598 | return pqn ? pqn->q : NULL; |
599 | } |
600 | |
601 | int pqm_get_wave_state(struct process_queue_manager *pqm, |
602 | unsigned int qid, |
603 | void __user *ctl_stack, |
604 | u32 *ctl_stack_used_size, |
605 | u32 *save_area_used_size) |
606 | { |
607 | struct process_queue_node *pqn; |
608 | |
609 | pqn = get_queue_by_qid(pqm, qid); |
610 | if (!pqn) { |
611 | pr_debug("amdkfd: No queue %d exists for operation\n" , |
612 | qid); |
613 | return -EFAULT; |
614 | } |
615 | |
616 | return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm, |
617 | pqn->q, |
618 | ctl_stack, |
619 | ctl_stack_used_size, |
620 | save_area_used_size); |
621 | } |
622 | |
623 | int pqm_get_queue_snapshot(struct process_queue_manager *pqm, |
624 | uint64_t exception_clear_mask, |
625 | void __user *buf, |
626 | int *num_qss_entries, |
627 | uint32_t *entry_size) |
628 | { |
629 | struct process_queue_node *pqn; |
630 | struct kfd_queue_snapshot_entry src; |
631 | uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries; |
632 | int r = 0; |
633 | |
634 | *num_qss_entries = 0; |
635 | if (!(*entry_size)) |
636 | return -EINVAL; |
637 | |
638 | *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry)); |
639 | mutex_lock(&pqm->process->event_mutex); |
640 | |
641 | memset(&src, 0, sizeof(src)); |
642 | |
643 | list_for_each_entry(pqn, &pqm->queues, process_queue_list) { |
644 | if (!pqn->q) |
645 | continue; |
646 | |
647 | if (*num_qss_entries < tmp_qss_entries) { |
648 | set_queue_snapshot_entry(q: pqn->q, exception_clear_mask, qss_entry: &src); |
649 | |
650 | if (copy_to_user(to: buf, from: &src, n: *entry_size)) { |
651 | r = -EFAULT; |
652 | break; |
653 | } |
654 | buf += tmp_entry_size; |
655 | } |
656 | *num_qss_entries += 1; |
657 | } |
658 | |
659 | mutex_unlock(lock: &pqm->process->event_mutex); |
660 | return r; |
661 | } |
662 | |
663 | static int get_queue_data_sizes(struct kfd_process_device *pdd, |
664 | struct queue *q, |
665 | uint32_t *mqd_size, |
666 | uint32_t *ctl_stack_size) |
667 | { |
668 | int ret; |
669 | |
670 | ret = pqm_get_queue_checkpoint_info(pqm: &pdd->process->pqm, |
671 | qid: q->properties.queue_id, |
672 | mqd_size, |
673 | ctl_stack_size); |
674 | if (ret) |
675 | pr_err("Failed to get queue dump info (%d)\n" , ret); |
676 | |
677 | return ret; |
678 | } |
679 | |
680 | int kfd_process_get_queue_info(struct kfd_process *p, |
681 | uint32_t *num_queues, |
682 | uint64_t *priv_data_sizes) |
683 | { |
684 | uint32_t = 0; |
685 | struct queue *q; |
686 | int i; |
687 | int ret; |
688 | |
689 | *num_queues = 0; |
690 | |
691 | /* Run over all PDDs of the process */ |
692 | for (i = 0; i < p->n_pdds; i++) { |
693 | struct kfd_process_device *pdd = p->pdds[i]; |
694 | |
695 | list_for_each_entry(q, &pdd->qpd.queues_list, list) { |
696 | if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || |
697 | q->properties.type == KFD_QUEUE_TYPE_SDMA || |
698 | q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { |
699 | uint32_t mqd_size, ctl_stack_size; |
700 | |
701 | *num_queues = *num_queues + 1; |
702 | |
703 | ret = get_queue_data_sizes(pdd, q, mqd_size: &mqd_size, ctl_stack_size: &ctl_stack_size); |
704 | if (ret) |
705 | return ret; |
706 | |
707 | extra_data_sizes += mqd_size + ctl_stack_size; |
708 | } else { |
709 | pr_err("Unsupported queue type (%d)\n" , q->properties.type); |
710 | return -EOPNOTSUPP; |
711 | } |
712 | } |
713 | } |
714 | *priv_data_sizes = extra_data_sizes + |
715 | (*num_queues * sizeof(struct kfd_criu_queue_priv_data)); |
716 | |
717 | return 0; |
718 | } |
719 | |
720 | static int pqm_checkpoint_mqd(struct process_queue_manager *pqm, |
721 | unsigned int qid, |
722 | void *mqd, |
723 | void *ctl_stack) |
724 | { |
725 | struct process_queue_node *pqn; |
726 | |
727 | pqn = get_queue_by_qid(pqm, qid); |
728 | if (!pqn) { |
729 | pr_debug("amdkfd: No queue %d exists for operation\n" , qid); |
730 | return -EFAULT; |
731 | } |
732 | |
733 | if (!pqn->q->device->dqm->ops.checkpoint_mqd) { |
734 | pr_err("amdkfd: queue dumping not supported on this device\n" ); |
735 | return -EOPNOTSUPP; |
736 | } |
737 | |
738 | return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm, |
739 | pqn->q, mqd, ctl_stack); |
740 | } |
741 | |
742 | static int criu_checkpoint_queue(struct kfd_process_device *pdd, |
743 | struct queue *q, |
744 | struct kfd_criu_queue_priv_data *q_data) |
745 | { |
746 | uint8_t *mqd, *ctl_stack; |
747 | int ret; |
748 | |
749 | mqd = (void *)(q_data + 1); |
750 | ctl_stack = mqd + q_data->mqd_size; |
751 | |
752 | q_data->gpu_id = pdd->user_gpu_id; |
753 | q_data->type = q->properties.type; |
754 | q_data->format = q->properties.format; |
755 | q_data->q_id = q->properties.queue_id; |
756 | q_data->q_address = q->properties.queue_address; |
757 | q_data->q_size = q->properties.queue_size; |
758 | q_data->priority = q->properties.priority; |
759 | q_data->q_percent = q->properties.queue_percent; |
760 | q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr; |
761 | q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr; |
762 | q_data->doorbell_id = q->doorbell_id; |
763 | |
764 | q_data->sdma_id = q->sdma_id; |
765 | |
766 | q_data->eop_ring_buffer_address = |
767 | q->properties.eop_ring_buffer_address; |
768 | |
769 | q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size; |
770 | |
771 | q_data->ctx_save_restore_area_address = |
772 | q->properties.ctx_save_restore_area_address; |
773 | |
774 | q_data->ctx_save_restore_area_size = |
775 | q->properties.ctx_save_restore_area_size; |
776 | |
777 | q_data->gws = !!q->gws; |
778 | |
779 | ret = pqm_checkpoint_mqd(pqm: &pdd->process->pqm, qid: q->properties.queue_id, mqd, ctl_stack); |
780 | if (ret) { |
781 | pr_err("Failed checkpoint queue_mqd (%d)\n" , ret); |
782 | return ret; |
783 | } |
784 | |
785 | pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n" , q_data->gpu_id, q_data->q_id); |
786 | return ret; |
787 | } |
788 | |
789 | static int criu_checkpoint_queues_device(struct kfd_process_device *pdd, |
790 | uint8_t __user *user_priv, |
791 | unsigned int *q_index, |
792 | uint64_t *queues_priv_data_offset) |
793 | { |
794 | unsigned int q_private_data_size = 0; |
795 | uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */ |
796 | struct queue *q; |
797 | int ret = 0; |
798 | |
799 | list_for_each_entry(q, &pdd->qpd.queues_list, list) { |
800 | struct kfd_criu_queue_priv_data *q_data; |
801 | uint64_t q_data_size; |
802 | uint32_t mqd_size; |
803 | uint32_t ctl_stack_size; |
804 | |
805 | if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE && |
806 | q->properties.type != KFD_QUEUE_TYPE_SDMA && |
807 | q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) { |
808 | |
809 | pr_err("Unsupported queue type (%d)\n" , q->properties.type); |
810 | ret = -EOPNOTSUPP; |
811 | break; |
812 | } |
813 | |
814 | ret = get_queue_data_sizes(pdd, q, mqd_size: &mqd_size, ctl_stack_size: &ctl_stack_size); |
815 | if (ret) |
816 | break; |
817 | |
818 | q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size; |
819 | |
820 | /* Increase local buffer space if needed */ |
821 | if (q_private_data_size < q_data_size) { |
822 | kfree(objp: q_private_data); |
823 | |
824 | q_private_data = kzalloc(size: q_data_size, GFP_KERNEL); |
825 | if (!q_private_data) { |
826 | ret = -ENOMEM; |
827 | break; |
828 | } |
829 | q_private_data_size = q_data_size; |
830 | } |
831 | |
832 | q_data = (struct kfd_criu_queue_priv_data *)q_private_data; |
833 | |
834 | /* data stored in this order: priv_data, mqd, ctl_stack */ |
835 | q_data->mqd_size = mqd_size; |
836 | q_data->ctl_stack_size = ctl_stack_size; |
837 | |
838 | ret = criu_checkpoint_queue(pdd, q, q_data); |
839 | if (ret) |
840 | break; |
841 | |
842 | q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE; |
843 | |
844 | ret = copy_to_user(to: user_priv + *queues_priv_data_offset, |
845 | from: q_data, n: q_data_size); |
846 | if (ret) { |
847 | ret = -EFAULT; |
848 | break; |
849 | } |
850 | *queues_priv_data_offset += q_data_size; |
851 | *q_index = *q_index + 1; |
852 | } |
853 | |
854 | kfree(objp: q_private_data); |
855 | |
856 | return ret; |
857 | } |
858 | |
859 | int kfd_criu_checkpoint_queues(struct kfd_process *p, |
860 | uint8_t __user *user_priv_data, |
861 | uint64_t *priv_data_offset) |
862 | { |
863 | int ret = 0, pdd_index, q_index = 0; |
864 | |
865 | for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { |
866 | struct kfd_process_device *pdd = p->pdds[pdd_index]; |
867 | |
868 | /* |
869 | * criu_checkpoint_queues_device will copy data to user and update q_index and |
870 | * queues_priv_data_offset |
871 | */ |
872 | ret = criu_checkpoint_queues_device(pdd, user_priv: user_priv_data, q_index: &q_index, |
873 | queues_priv_data_offset: priv_data_offset); |
874 | |
875 | if (ret) |
876 | break; |
877 | } |
878 | |
879 | return ret; |
880 | } |
881 | |
882 | static void set_queue_properties_from_criu(struct queue_properties *qp, |
883 | struct kfd_criu_queue_priv_data *q_data) |
884 | { |
885 | qp->is_interop = false; |
886 | qp->queue_percent = q_data->q_percent; |
887 | qp->priority = q_data->priority; |
888 | qp->queue_address = q_data->q_address; |
889 | qp->queue_size = q_data->q_size; |
890 | qp->read_ptr = (uint32_t *) q_data->read_ptr_addr; |
891 | qp->write_ptr = (uint32_t *) q_data->write_ptr_addr; |
892 | qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address; |
893 | qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size; |
894 | qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address; |
895 | qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size; |
896 | qp->ctl_stack_size = q_data->ctl_stack_size; |
897 | qp->type = q_data->type; |
898 | qp->format = q_data->format; |
899 | } |
900 | |
901 | int kfd_criu_restore_queue(struct kfd_process *p, |
902 | uint8_t __user *user_priv_ptr, |
903 | uint64_t *priv_data_offset, |
904 | uint64_t max_priv_data_size) |
905 | { |
906 | uint8_t *mqd, *ctl_stack, * = NULL; |
907 | struct kfd_criu_queue_priv_data *q_data; |
908 | struct kfd_process_device *pdd; |
909 | uint64_t ; |
910 | struct queue_properties qp; |
911 | unsigned int queue_id; |
912 | int ret = 0; |
913 | |
914 | if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size) |
915 | return -EINVAL; |
916 | |
917 | q_data = kmalloc(size: sizeof(*q_data), GFP_KERNEL); |
918 | if (!q_data) |
919 | return -ENOMEM; |
920 | |
921 | ret = copy_from_user(to: q_data, from: user_priv_ptr + *priv_data_offset, n: sizeof(*q_data)); |
922 | if (ret) { |
923 | ret = -EFAULT; |
924 | goto exit; |
925 | } |
926 | |
927 | *priv_data_offset += sizeof(*q_data); |
928 | q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size; |
929 | |
930 | if (*priv_data_offset + q_extra_data_size > max_priv_data_size) { |
931 | ret = -EINVAL; |
932 | goto exit; |
933 | } |
934 | |
935 | q_extra_data = kmalloc(size: q_extra_data_size, GFP_KERNEL); |
936 | if (!q_extra_data) { |
937 | ret = -ENOMEM; |
938 | goto exit; |
939 | } |
940 | |
941 | ret = copy_from_user(to: q_extra_data, from: user_priv_ptr + *priv_data_offset, n: q_extra_data_size); |
942 | if (ret) { |
943 | ret = -EFAULT; |
944 | goto exit; |
945 | } |
946 | |
947 | *priv_data_offset += q_extra_data_size; |
948 | |
949 | pdd = kfd_process_device_data_by_id(process: p, gpu_id: q_data->gpu_id); |
950 | if (!pdd) { |
951 | pr_err("Failed to get pdd\n" ); |
952 | ret = -EINVAL; |
953 | goto exit; |
954 | } |
955 | |
956 | /* data stored in this order: mqd, ctl_stack */ |
957 | mqd = q_extra_data; |
958 | ctl_stack = mqd + q_data->mqd_size; |
959 | |
960 | memset(&qp, 0, sizeof(qp)); |
961 | set_queue_properties_from_criu(qp: &qp, q_data); |
962 | |
963 | print_queue_properties(q: &qp); |
964 | |
965 | ret = pqm_create_queue(pqm: &p->pqm, dev: pdd->dev, NULL, properties: &qp, qid: &queue_id, NULL, q_data, restore_mqd: mqd, restore_ctl_stack: ctl_stack, |
966 | NULL); |
967 | if (ret) { |
968 | pr_err("Failed to create new queue err:%d\n" , ret); |
969 | goto exit; |
970 | } |
971 | |
972 | if (q_data->gws) |
973 | ret = pqm_set_gws(pqm: &p->pqm, qid: q_data->q_id, gws: pdd->dev->gws); |
974 | |
975 | exit: |
976 | if (ret) |
977 | pr_err("Failed to restore queue (%d)\n" , ret); |
978 | else |
979 | pr_debug("Queue id %d was restored successfully\n" , queue_id); |
980 | |
981 | kfree(objp: q_data); |
982 | |
983 | return ret; |
984 | } |
985 | |
986 | int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, |
987 | unsigned int qid, |
988 | uint32_t *mqd_size, |
989 | uint32_t *ctl_stack_size) |
990 | { |
991 | struct process_queue_node *pqn; |
992 | |
993 | pqn = get_queue_by_qid(pqm, qid); |
994 | if (!pqn) { |
995 | pr_debug("amdkfd: No queue %d exists for operation\n" , qid); |
996 | return -EFAULT; |
997 | } |
998 | |
999 | if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) { |
1000 | pr_err("amdkfd: queue dumping not supported on this device\n" ); |
1001 | return -EOPNOTSUPP; |
1002 | } |
1003 | |
1004 | pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm, |
1005 | pqn->q, mqd_size, |
1006 | ctl_stack_size); |
1007 | return 0; |
1008 | } |
1009 | |
1010 | #if defined(CONFIG_DEBUG_FS) |
1011 | |
1012 | int pqm_debugfs_mqds(struct seq_file *m, void *data) |
1013 | { |
1014 | struct process_queue_manager *pqm = data; |
1015 | struct process_queue_node *pqn; |
1016 | struct queue *q; |
1017 | enum KFD_MQD_TYPE mqd_type; |
1018 | struct mqd_manager *mqd_mgr; |
1019 | int r = 0, xcc, num_xccs = 1; |
1020 | void *mqd; |
1021 | uint64_t size = 0; |
1022 | |
1023 | list_for_each_entry(pqn, &pqm->queues, process_queue_list) { |
1024 | if (pqn->q) { |
1025 | q = pqn->q; |
1026 | switch (q->properties.type) { |
1027 | case KFD_QUEUE_TYPE_SDMA: |
1028 | case KFD_QUEUE_TYPE_SDMA_XGMI: |
1029 | seq_printf(m, fmt: " SDMA queue on device %x\n" , |
1030 | q->device->id); |
1031 | mqd_type = KFD_MQD_TYPE_SDMA; |
1032 | break; |
1033 | case KFD_QUEUE_TYPE_COMPUTE: |
1034 | seq_printf(m, fmt: " Compute queue on device %x\n" , |
1035 | q->device->id); |
1036 | mqd_type = KFD_MQD_TYPE_CP; |
1037 | num_xccs = NUM_XCC(q->device->xcc_mask); |
1038 | break; |
1039 | default: |
1040 | seq_printf(m, |
1041 | fmt: " Bad user queue type %d on device %x\n" , |
1042 | q->properties.type, q->device->id); |
1043 | continue; |
1044 | } |
1045 | mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; |
1046 | size = mqd_mgr->mqd_stride(mqd_mgr, |
1047 | &q->properties); |
1048 | } else if (pqn->kq) { |
1049 | q = pqn->kq->queue; |
1050 | mqd_mgr = pqn->kq->mqd_mgr; |
1051 | switch (q->properties.type) { |
1052 | case KFD_QUEUE_TYPE_DIQ: |
1053 | seq_printf(m, fmt: " DIQ on device %x\n" , |
1054 | pqn->kq->dev->id); |
1055 | break; |
1056 | default: |
1057 | seq_printf(m, |
1058 | fmt: " Bad kernel queue type %d on device %x\n" , |
1059 | q->properties.type, |
1060 | pqn->kq->dev->id); |
1061 | continue; |
1062 | } |
1063 | } else { |
1064 | seq_printf(m, |
1065 | fmt: " Weird: Queue node with neither kernel nor user queue\n" ); |
1066 | continue; |
1067 | } |
1068 | |
1069 | for (xcc = 0; xcc < num_xccs; xcc++) { |
1070 | mqd = q->mqd + size * xcc; |
1071 | r = mqd_mgr->debugfs_show_mqd(m, mqd); |
1072 | if (r != 0) |
1073 | break; |
1074 | } |
1075 | } |
1076 | |
1077 | return r; |
1078 | } |
1079 | |
1080 | #endif |
1081 | |