1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* |
3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/slab.h> |
26 | #include <linux/mutex.h> |
27 | #include "kfd_device_queue_manager.h" |
28 | #include "kfd_kernel_queue.h" |
29 | #include "kfd_priv.h" |
30 | |
31 | static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, |
32 | unsigned int buffer_size_bytes) |
33 | { |
34 | unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); |
35 | |
36 | WARN((temp * sizeof(uint32_t)) > buffer_size_bytes, |
37 | "Runlist IB overflow" ); |
38 | *wptr = temp; |
39 | } |
40 | |
41 | static void pm_calc_rlib_size(struct packet_manager *pm, |
42 | unsigned int *rlib_size, |
43 | bool *over_subscription) |
44 | { |
45 | unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; |
46 | unsigned int map_queue_size; |
47 | unsigned int max_proc_per_quantum = 1; |
48 | struct kfd_node *dev = pm->dqm->dev; |
49 | |
50 | process_count = pm->dqm->processes_count; |
51 | queue_count = pm->dqm->active_queue_count; |
52 | compute_queue_count = pm->dqm->active_cp_queue_count; |
53 | gws_queue_count = pm->dqm->gws_queue_count; |
54 | |
55 | /* check if there is over subscription |
56 | * Note: the arbitration between the number of VMIDs and |
57 | * hws_max_conc_proc has been done in |
58 | * kgd2kfd_device_init(). |
59 | */ |
60 | *over_subscription = false; |
61 | |
62 | if (dev->max_proc_per_quantum > 1) |
63 | max_proc_per_quantum = dev->max_proc_per_quantum; |
64 | |
65 | if ((process_count > max_proc_per_quantum) || |
66 | compute_queue_count > get_cp_queues_num(dqm: pm->dqm) || |
67 | gws_queue_count > 1) { |
68 | *over_subscription = true; |
69 | pr_debug("Over subscribed runlist\n" ); |
70 | } |
71 | |
72 | map_queue_size = pm->pmf->map_queues_size; |
73 | /* calculate run list ib allocation size */ |
74 | *rlib_size = process_count * pm->pmf->map_process_size + |
75 | queue_count * map_queue_size; |
76 | |
77 | /* |
78 | * Increase the allocation size in case we need a chained run list |
79 | * when over subscription |
80 | */ |
81 | if (*over_subscription) |
82 | *rlib_size += pm->pmf->runlist_size; |
83 | |
84 | pr_debug("runlist ib size %d\n" , *rlib_size); |
85 | } |
86 | |
87 | static int pm_allocate_runlist_ib(struct packet_manager *pm, |
88 | unsigned int **rl_buffer, |
89 | uint64_t *rl_gpu_buffer, |
90 | unsigned int *rl_buffer_size, |
91 | bool *is_over_subscription) |
92 | { |
93 | int retval; |
94 | |
95 | if (WARN_ON(pm->allocated)) |
96 | return -EINVAL; |
97 | |
98 | pm_calc_rlib_size(pm, rlib_size: rl_buffer_size, over_subscription: is_over_subscription); |
99 | |
100 | mutex_lock(&pm->lock); |
101 | |
102 | retval = kfd_gtt_sa_allocate(node: pm->dqm->dev, size: *rl_buffer_size, |
103 | mem_obj: &pm->ib_buffer_obj); |
104 | |
105 | if (retval) { |
106 | pr_err("Failed to allocate runlist IB\n" ); |
107 | goto out; |
108 | } |
109 | |
110 | *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr; |
111 | *rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr; |
112 | |
113 | memset(*rl_buffer, 0, *rl_buffer_size); |
114 | pm->allocated = true; |
115 | |
116 | out: |
117 | mutex_unlock(lock: &pm->lock); |
118 | return retval; |
119 | } |
120 | |
121 | static int pm_create_runlist_ib(struct packet_manager *pm, |
122 | struct list_head *queues, |
123 | uint64_t *rl_gpu_addr, |
124 | size_t *rl_size_bytes) |
125 | { |
126 | unsigned int alloc_size_bytes; |
127 | unsigned int *rl_buffer, rl_wptr, i; |
128 | int retval, processes_mapped; |
129 | struct device_process_node *cur; |
130 | struct qcm_process_device *qpd; |
131 | struct queue *q; |
132 | struct kernel_queue *kq; |
133 | bool is_over_subscription; |
134 | |
135 | rl_wptr = retval = processes_mapped = 0; |
136 | |
137 | retval = pm_allocate_runlist_ib(pm, rl_buffer: &rl_buffer, rl_gpu_buffer: rl_gpu_addr, |
138 | rl_buffer_size: &alloc_size_bytes, is_over_subscription: &is_over_subscription); |
139 | if (retval) |
140 | return retval; |
141 | |
142 | *rl_size_bytes = alloc_size_bytes; |
143 | pm->ib_size_bytes = alloc_size_bytes; |
144 | |
145 | pr_debug("Building runlist ib process count: %d queues count %d\n" , |
146 | pm->dqm->processes_count, pm->dqm->active_queue_count); |
147 | |
148 | /* build the run list ib packet */ |
149 | list_for_each_entry(cur, queues, list) { |
150 | qpd = cur->qpd; |
151 | /* build map process packet */ |
152 | if (processes_mapped >= pm->dqm->processes_count) { |
153 | pr_debug("Not enough space left in runlist IB\n" ); |
154 | pm_release_ib(pm); |
155 | return -ENOMEM; |
156 | } |
157 | |
158 | retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd); |
159 | if (retval) |
160 | return retval; |
161 | |
162 | processes_mapped++; |
163 | inc_wptr(wptr: &rl_wptr, increment_bytes: pm->pmf->map_process_size, |
164 | buffer_size_bytes: alloc_size_bytes); |
165 | |
166 | list_for_each_entry(kq, &qpd->priv_queue_list, list) { |
167 | if (!kq->queue->properties.is_active) |
168 | continue; |
169 | |
170 | pr_debug("static_queue, mapping kernel q %d, is debug status %d\n" , |
171 | kq->queue->queue, qpd->is_debug); |
172 | |
173 | retval = pm->pmf->map_queues(pm, |
174 | &rl_buffer[rl_wptr], |
175 | kq->queue, |
176 | qpd->is_debug); |
177 | if (retval) |
178 | return retval; |
179 | |
180 | inc_wptr(wptr: &rl_wptr, |
181 | increment_bytes: pm->pmf->map_queues_size, |
182 | buffer_size_bytes: alloc_size_bytes); |
183 | } |
184 | |
185 | list_for_each_entry(q, &qpd->queues_list, list) { |
186 | if (!q->properties.is_active) |
187 | continue; |
188 | |
189 | pr_debug("static_queue, mapping user queue %d, is debug status %d\n" , |
190 | q->queue, qpd->is_debug); |
191 | |
192 | retval = pm->pmf->map_queues(pm, |
193 | &rl_buffer[rl_wptr], |
194 | q, |
195 | qpd->is_debug); |
196 | |
197 | if (retval) |
198 | return retval; |
199 | |
200 | inc_wptr(wptr: &rl_wptr, |
201 | increment_bytes: pm->pmf->map_queues_size, |
202 | buffer_size_bytes: alloc_size_bytes); |
203 | } |
204 | } |
205 | |
206 | pr_debug("Finished map process and queues to runlist\n" ); |
207 | |
208 | if (is_over_subscription) { |
209 | if (!pm->is_over_subscription) |
210 | pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n" ); |
211 | retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], |
212 | *rl_gpu_addr, |
213 | alloc_size_bytes / sizeof(uint32_t), |
214 | true); |
215 | } |
216 | pm->is_over_subscription = is_over_subscription; |
217 | |
218 | for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) |
219 | pr_debug("0x%2X " , rl_buffer[i]); |
220 | pr_debug("\n" ); |
221 | |
222 | return retval; |
223 | } |
224 | |
225 | int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) |
226 | { |
227 | switch (dqm->dev->adev->asic_type) { |
228 | case CHIP_KAVERI: |
229 | case CHIP_HAWAII: |
230 | /* PM4 packet structures on CIK are the same as on VI */ |
231 | case CHIP_CARRIZO: |
232 | case CHIP_TONGA: |
233 | case CHIP_FIJI: |
234 | case CHIP_POLARIS10: |
235 | case CHIP_POLARIS11: |
236 | case CHIP_POLARIS12: |
237 | case CHIP_VEGAM: |
238 | pm->pmf = &kfd_vi_pm_funcs; |
239 | break; |
240 | default: |
241 | if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) || |
242 | KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3)) |
243 | pm->pmf = &kfd_aldebaran_pm_funcs; |
244 | else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) |
245 | pm->pmf = &kfd_v9_pm_funcs; |
246 | else { |
247 | WARN(1, "Unexpected ASIC family %u" , |
248 | dqm->dev->adev->asic_type); |
249 | return -EINVAL; |
250 | } |
251 | } |
252 | |
253 | pm->dqm = dqm; |
254 | mutex_init(&pm->lock); |
255 | pm->priv_queue = kernel_queue_init(dev: dqm->dev, type: KFD_QUEUE_TYPE_HIQ); |
256 | if (!pm->priv_queue) { |
257 | mutex_destroy(lock: &pm->lock); |
258 | return -ENOMEM; |
259 | } |
260 | pm->allocated = false; |
261 | |
262 | return 0; |
263 | } |
264 | |
265 | void pm_uninit(struct packet_manager *pm, bool hanging) |
266 | { |
267 | mutex_destroy(lock: &pm->lock); |
268 | kernel_queue_uninit(kq: pm->priv_queue, hanging); |
269 | pm->priv_queue = NULL; |
270 | } |
271 | |
272 | int pm_send_set_resources(struct packet_manager *pm, |
273 | struct scheduling_resources *res) |
274 | { |
275 | uint32_t *buffer, size; |
276 | int retval = 0; |
277 | |
278 | size = pm->pmf->set_resources_size; |
279 | mutex_lock(&pm->lock); |
280 | kq_acquire_packet_buffer(kq: pm->priv_queue, |
281 | packet_size_in_dwords: size / sizeof(uint32_t), |
282 | buffer_ptr: (unsigned int **)&buffer); |
283 | if (!buffer) { |
284 | pr_err("Failed to allocate buffer on kernel queue\n" ); |
285 | retval = -ENOMEM; |
286 | goto out; |
287 | } |
288 | |
289 | retval = pm->pmf->set_resources(pm, buffer, res); |
290 | if (!retval) |
291 | retval = kq_submit_packet(kq: pm->priv_queue); |
292 | else |
293 | kq_rollback_packet(kq: pm->priv_queue); |
294 | |
295 | out: |
296 | mutex_unlock(lock: &pm->lock); |
297 | |
298 | return retval; |
299 | } |
300 | |
301 | int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) |
302 | { |
303 | uint64_t rl_gpu_ib_addr; |
304 | uint32_t *rl_buffer; |
305 | size_t rl_ib_size, packet_size_dwords; |
306 | int retval; |
307 | |
308 | retval = pm_create_runlist_ib(pm, queues: dqm_queues, rl_gpu_addr: &rl_gpu_ib_addr, |
309 | rl_size_bytes: &rl_ib_size); |
310 | if (retval) |
311 | goto fail_create_runlist_ib; |
312 | |
313 | pr_debug("runlist IB address: 0x%llX\n" , rl_gpu_ib_addr); |
314 | |
315 | packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); |
316 | mutex_lock(&pm->lock); |
317 | |
318 | retval = kq_acquire_packet_buffer(kq: pm->priv_queue, |
319 | packet_size_in_dwords: packet_size_dwords, buffer_ptr: &rl_buffer); |
320 | if (retval) |
321 | goto fail_acquire_packet_buffer; |
322 | |
323 | retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, |
324 | rl_ib_size / sizeof(uint32_t), false); |
325 | if (retval) |
326 | goto fail_create_runlist; |
327 | |
328 | retval = kq_submit_packet(kq: pm->priv_queue); |
329 | |
330 | mutex_unlock(lock: &pm->lock); |
331 | |
332 | return retval; |
333 | |
334 | fail_create_runlist: |
335 | kq_rollback_packet(kq: pm->priv_queue); |
336 | fail_acquire_packet_buffer: |
337 | mutex_unlock(lock: &pm->lock); |
338 | fail_create_runlist_ib: |
339 | pm_release_ib(pm); |
340 | return retval; |
341 | } |
342 | |
343 | int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, |
344 | uint64_t fence_value) |
345 | { |
346 | uint32_t *buffer, size; |
347 | int retval = 0; |
348 | |
349 | if (WARN_ON(!fence_address)) |
350 | return -EFAULT; |
351 | |
352 | size = pm->pmf->query_status_size; |
353 | mutex_lock(&pm->lock); |
354 | kq_acquire_packet_buffer(kq: pm->priv_queue, |
355 | packet_size_in_dwords: size / sizeof(uint32_t), buffer_ptr: (unsigned int **)&buffer); |
356 | if (!buffer) { |
357 | pr_err("Failed to allocate buffer on kernel queue\n" ); |
358 | retval = -ENOMEM; |
359 | goto out; |
360 | } |
361 | |
362 | retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); |
363 | if (!retval) |
364 | retval = kq_submit_packet(kq: pm->priv_queue); |
365 | else |
366 | kq_rollback_packet(kq: pm->priv_queue); |
367 | |
368 | out: |
369 | mutex_unlock(lock: &pm->lock); |
370 | return retval; |
371 | } |
372 | |
373 | int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) |
374 | { |
375 | int retval = 0; |
376 | uint32_t *buffer, size; |
377 | |
378 | size = pm->pmf->set_grace_period_size; |
379 | |
380 | mutex_lock(&pm->lock); |
381 | |
382 | if (size) { |
383 | kq_acquire_packet_buffer(kq: pm->priv_queue, |
384 | packet_size_in_dwords: size / sizeof(uint32_t), |
385 | buffer_ptr: (unsigned int **)&buffer); |
386 | |
387 | if (!buffer) { |
388 | pr_err("Failed to allocate buffer on kernel queue\n" ); |
389 | retval = -ENOMEM; |
390 | goto out; |
391 | } |
392 | |
393 | retval = pm->pmf->set_grace_period(pm, buffer, grace_period); |
394 | if (!retval) |
395 | retval = kq_submit_packet(kq: pm->priv_queue); |
396 | else |
397 | kq_rollback_packet(kq: pm->priv_queue); |
398 | } |
399 | |
400 | out: |
401 | mutex_unlock(lock: &pm->lock); |
402 | return retval; |
403 | } |
404 | |
405 | int pm_send_unmap_queue(struct packet_manager *pm, |
406 | enum kfd_unmap_queues_filter filter, |
407 | uint32_t filter_param, bool reset) |
408 | { |
409 | uint32_t *buffer, size; |
410 | int retval = 0; |
411 | |
412 | size = pm->pmf->unmap_queues_size; |
413 | mutex_lock(&pm->lock); |
414 | kq_acquire_packet_buffer(kq: pm->priv_queue, |
415 | packet_size_in_dwords: size / sizeof(uint32_t), buffer_ptr: (unsigned int **)&buffer); |
416 | if (!buffer) { |
417 | pr_err("Failed to allocate buffer on kernel queue\n" ); |
418 | retval = -ENOMEM; |
419 | goto out; |
420 | } |
421 | |
422 | retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset); |
423 | if (!retval) |
424 | retval = kq_submit_packet(kq: pm->priv_queue); |
425 | else |
426 | kq_rollback_packet(kq: pm->priv_queue); |
427 | |
428 | out: |
429 | mutex_unlock(lock: &pm->lock); |
430 | return retval; |
431 | } |
432 | |
433 | void pm_release_ib(struct packet_manager *pm) |
434 | { |
435 | mutex_lock(&pm->lock); |
436 | if (pm->allocated) { |
437 | kfd_gtt_sa_free(node: pm->dqm->dev, mem_obj: pm->ib_buffer_obj); |
438 | pm->allocated = false; |
439 | } |
440 | mutex_unlock(lock: &pm->lock); |
441 | } |
442 | |
443 | #if defined(CONFIG_DEBUG_FS) |
444 | |
445 | int pm_debugfs_runlist(struct seq_file *m, void *data) |
446 | { |
447 | struct packet_manager *pm = data; |
448 | |
449 | mutex_lock(&pm->lock); |
450 | |
451 | if (!pm->allocated) { |
452 | seq_puts(m, s: " No active runlist\n" ); |
453 | goto out; |
454 | } |
455 | |
456 | seq_hex_dump(m, prefix_str: " " , prefix_type: DUMP_PREFIX_OFFSET, rowsize: 32, groupsize: 4, |
457 | buf: pm->ib_buffer_obj->cpu_ptr, len: pm->ib_size_bytes, ascii: false); |
458 | |
459 | out: |
460 | mutex_unlock(lock: &pm->lock); |
461 | return 0; |
462 | } |
463 | |
464 | int pm_debugfs_hang_hws(struct packet_manager *pm) |
465 | { |
466 | uint32_t *buffer, size; |
467 | int r = 0; |
468 | |
469 | if (!pm->priv_queue) |
470 | return -EAGAIN; |
471 | |
472 | size = pm->pmf->query_status_size; |
473 | mutex_lock(&pm->lock); |
474 | kq_acquire_packet_buffer(kq: pm->priv_queue, |
475 | packet_size_in_dwords: size / sizeof(uint32_t), buffer_ptr: (unsigned int **)&buffer); |
476 | if (!buffer) { |
477 | pr_err("Failed to allocate buffer on kernel queue\n" ); |
478 | r = -ENOMEM; |
479 | goto out; |
480 | } |
481 | memset(buffer, 0x55, size); |
482 | kq_submit_packet(kq: pm->priv_queue); |
483 | |
484 | pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS." , |
485 | buffer[0], buffer[1], buffer[2], buffer[3], |
486 | buffer[4], buffer[5], buffer[6]); |
487 | out: |
488 | mutex_unlock(lock: &pm->lock); |
489 | return r; |
490 | } |
491 | |
492 | |
493 | #endif |
494 | |