1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2024, Advanced Micro Devices, Inc.
4 */
5
6#include <drm/amdxdna_accel.h>
7#include <drm/drm_device.h>
8#include <drm/drm_gem.h>
9#include <drm/drm_gem_shmem_helper.h>
10#include <drm/drm_print.h>
11#include <drm/drm_syncobj.h>
12#include <linux/hmm.h>
13#include <linux/types.h>
14#include <linux/xarray.h>
15#include <trace/events/amdxdna.h>
16
17#include "aie2_msg_priv.h"
18#include "aie2_pci.h"
19#include "aie2_solver.h"
20#include "amdxdna_ctx.h"
21#include "amdxdna_gem.h"
22#include "amdxdna_mailbox.h"
23#include "amdxdna_pci_drv.h"
24
25static bool force_cmdlist;
26module_param(force_cmdlist, bool, 0600);
27MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
28
29#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
30
31static void aie2_job_release(struct kref *ref)
32{
33 struct amdxdna_sched_job *job;
34
35 job = container_of(ref, struct amdxdna_sched_job, refcnt);
36 amdxdna_sched_job_cleanup(job);
37 atomic64_inc(v: &job->hwctx->job_free_cnt);
38 wake_up(&job->hwctx->priv->job_free_wq);
39 if (job->out_fence)
40 dma_fence_put(fence: job->out_fence);
41 kfree(objp: job);
42}
43
44static void aie2_job_put(struct amdxdna_sched_job *job)
45{
46 kref_put(kref: &job->refcnt, release: aie2_job_release);
47}
48
49/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
50static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
51 struct drm_sched_job *bad_job)
52{
53 drm_sched_stop(sched: &hwctx->priv->sched, bad: bad_job);
54 aie2_destroy_context(ndev: xdna->dev_handle, hwctx);
55}
56
57static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
58{
59 struct amdxdna_gem_obj *heap = hwctx->priv->heap;
60 int ret;
61
62 ret = aie2_create_context(ndev: xdna->dev_handle, hwctx);
63 if (ret) {
64 XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
65 goto out;
66 }
67
68 ret = aie2_map_host_buf(ndev: xdna->dev_handle, context_id: hwctx->fw_ctx_id,
69 addr: heap->mem.userptr, size: heap->mem.size);
70 if (ret) {
71 XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
72 goto out;
73 }
74
75 if (hwctx->status != HWCTX_STAT_READY) {
76 XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
77 goto out;
78 }
79
80 ret = aie2_config_cu(hwctx);
81 if (ret) {
82 XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
83 goto out;
84 }
85
86out:
87 drm_sched_start(sched: &hwctx->priv->sched, errno: 0);
88 XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
89 return ret;
90}
91
92void aie2_restart_ctx(struct amdxdna_client *client)
93{
94 struct amdxdna_dev *xdna = client->xdna;
95 struct amdxdna_hwctx *hwctx;
96 unsigned long hwctx_id;
97
98 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
99 mutex_lock(&client->hwctx_lock);
100 amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
101 if (hwctx->status != HWCTX_STAT_STOP)
102 continue;
103
104 hwctx->status = hwctx->old_status;
105 XDNA_DBG(xdna, "Resetting %s", hwctx->name);
106 aie2_hwctx_restart(xdna, hwctx);
107 }
108 mutex_unlock(lock: &client->hwctx_lock);
109}
110
111static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
112{
113 struct dma_fence *fence, *out_fence = NULL;
114 int ret;
115
116 fence = drm_syncobj_fence_get(syncobj: hwctx->priv->syncobj);
117 if (!fence)
118 return NULL;
119
120 ret = dma_fence_chain_find_seqno(pfence: &fence, seqno: seq);
121 if (ret)
122 goto out;
123
124 out_fence = dma_fence_get(fence: dma_fence_chain_contained(fence));
125
126out:
127 dma_fence_put(fence);
128 return out_fence;
129}
130
131static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
132{
133 struct dma_fence *fence;
134
135 fence = aie2_cmd_get_out_fence(hwctx, seq: hwctx->priv->seq - 1);
136 if (!fence)
137 return;
138
139 /* Wait up to 2 seconds for fw to finish all pending requests */
140 dma_fence_wait_timeout(fence, intr: false, timeout: msecs_to_jiffies(m: 2000));
141 dma_fence_put(fence);
142}
143
144void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx)
145{
146 struct amdxdna_dev *xdna = hwctx->client->xdna;
147
148 /*
149 * Command timeout is unlikely. But if it happens, it doesn't
150 * break the system. aie2_hwctx_stop() will destroy mailbox
151 * and abort all commands.
152 */
153 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
154 aie2_hwctx_wait_for_idle(hwctx);
155 aie2_hwctx_stop(xdna, hwctx, NULL);
156 hwctx->old_status = hwctx->status;
157 hwctx->status = HWCTX_STAT_STOP;
158}
159
160void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx)
161{
162 struct amdxdna_dev *xdna = hwctx->client->xdna;
163
164 /*
165 * The resume path cannot guarantee that mailbox channel can be
166 * regenerated. If this happen, when submit message to this
167 * mailbox channel, error will return.
168 */
169 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
170 hwctx->status = hwctx->old_status;
171 aie2_hwctx_restart(xdna, hwctx);
172}
173
174static void
175aie2_sched_notify(struct amdxdna_sched_job *job)
176{
177 struct dma_fence *fence = job->fence;
178
179 trace_xdna_job(sched_job: &job->base, name: job->hwctx->name, str: "signaled fence", seq: job->seq);
180 job->hwctx->priv->completed++;
181 dma_fence_signal(fence);
182
183 up(sem: &job->hwctx->priv->job_sem);
184 job->job_done = true;
185 dma_fence_put(fence);
186 mmput_async(job->mm);
187 aie2_job_put(job);
188}
189
190static int
191aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
192{
193 struct amdxdna_sched_job *job = handle;
194 struct amdxdna_gem_obj *cmd_abo;
195 u32 ret = 0;
196 u32 status;
197
198 cmd_abo = job->cmd_bo;
199
200 if (unlikely(!data))
201 goto out;
202
203 if (unlikely(size != sizeof(u32))) {
204 amdxdna_cmd_set_state(abo: cmd_abo, s: ERT_CMD_STATE_ABORT);
205 ret = -EINVAL;
206 goto out;
207 }
208
209 status = readl(addr: data);
210 XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
211 if (status == AIE2_STATUS_SUCCESS)
212 amdxdna_cmd_set_state(abo: cmd_abo, s: ERT_CMD_STATE_COMPLETED);
213 else
214 amdxdna_cmd_set_state(abo: cmd_abo, s: ERT_CMD_STATE_ERROR);
215
216out:
217 aie2_sched_notify(job);
218 return ret;
219}
220
221static int
222aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
223{
224 struct amdxdna_sched_job *job = handle;
225 u32 ret = 0;
226 u32 status;
227
228 if (unlikely(!data))
229 goto out;
230
231 if (unlikely(size != sizeof(u32))) {
232 ret = -EINVAL;
233 goto out;
234 }
235
236 status = readl(addr: data);
237 XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
238
239out:
240 aie2_sched_notify(job);
241 return ret;
242}
243
244static int
245aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
246{
247 struct amdxdna_sched_job *job = handle;
248 struct amdxdna_gem_obj *cmd_abo;
249 struct amdxdna_dev *xdna;
250 u32 fail_cmd_status;
251 u32 fail_cmd_idx;
252 u32 cmd_status;
253 u32 ret = 0;
254
255 cmd_abo = job->cmd_bo;
256 if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
257 amdxdna_cmd_set_state(abo: cmd_abo, s: ERT_CMD_STATE_ABORT);
258 ret = -EINVAL;
259 goto out;
260 }
261
262 cmd_status = readl(addr: data + offsetof(struct cmd_chain_resp, status));
263 xdna = job->hwctx->client->xdna;
264 XDNA_DBG(xdna, "Status 0x%x", cmd_status);
265 if (cmd_status == AIE2_STATUS_SUCCESS) {
266 amdxdna_cmd_set_state(abo: cmd_abo, s: ERT_CMD_STATE_COMPLETED);
267 goto out;
268 }
269
270 /* Slow path to handle error, read from ringbuf on BAR */
271 fail_cmd_idx = readl(addr: data + offsetof(struct cmd_chain_resp, fail_cmd_idx));
272 fail_cmd_status = readl(addr: data + offsetof(struct cmd_chain_resp, fail_cmd_status));
273 XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
274 fail_cmd_idx, fail_cmd_status);
275
276 if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
277 amdxdna_cmd_set_state(abo: cmd_abo, s: ERT_CMD_STATE_ABORT);
278 ret = -EINVAL;
279 goto out;
280 }
281 amdxdna_cmd_set_state(abo: cmd_abo, s: fail_cmd_status);
282
283 if (amdxdna_cmd_get_op(abo: cmd_abo) == ERT_CMD_CHAIN) {
284 struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(abo: cmd_abo, NULL);
285
286 cc->error_index = fail_cmd_idx;
287 if (cc->error_index >= cc->command_count)
288 cc->error_index = 0;
289 }
290out:
291 aie2_sched_notify(job);
292 return ret;
293}
294
295static struct dma_fence *
296aie2_sched_job_run(struct drm_sched_job *sched_job)
297{
298 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
299 struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
300 struct amdxdna_hwctx *hwctx = job->hwctx;
301 struct dma_fence *fence;
302 int ret;
303
304 if (!mmget_not_zero(mm: job->mm))
305 return ERR_PTR(error: -ESRCH);
306
307 kref_get(kref: &job->refcnt);
308 fence = dma_fence_get(fence: job->fence);
309
310 if (unlikely(!cmd_abo)) {
311 ret = aie2_sync_bo(hwctx, job, notify_cb: aie2_sched_nocmd_resp_handler);
312 goto out;
313 }
314
315 amdxdna_cmd_set_state(abo: cmd_abo, s: ERT_CMD_STATE_NEW);
316
317 if (amdxdna_cmd_get_op(abo: cmd_abo) == ERT_CMD_CHAIN)
318 ret = aie2_cmdlist_multi_execbuf(hwctx, job, notify_cb: aie2_sched_cmdlist_resp_handler);
319 else if (force_cmdlist)
320 ret = aie2_cmdlist_single_execbuf(hwctx, job, notify_cb: aie2_sched_cmdlist_resp_handler);
321 else
322 ret = aie2_execbuf(hwctx, job, notify_cb: aie2_sched_resp_handler);
323
324out:
325 if (ret) {
326 dma_fence_put(fence: job->fence);
327 aie2_job_put(job);
328 mmput(job->mm);
329 fence = ERR_PTR(error: ret);
330 }
331 trace_xdna_job(sched_job, name: hwctx->name, str: "sent to device", seq: job->seq);
332
333 return fence;
334}
335
336static void aie2_sched_job_free(struct drm_sched_job *sched_job)
337{
338 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
339 struct amdxdna_hwctx *hwctx = job->hwctx;
340
341 trace_xdna_job(sched_job, name: hwctx->name, str: "job free", seq: job->seq);
342 if (!job->job_done)
343 up(sem: &hwctx->priv->job_sem);
344
345 drm_sched_job_cleanup(job: sched_job);
346 aie2_job_put(job);
347}
348
349static enum drm_gpu_sched_stat
350aie2_sched_job_timedout(struct drm_sched_job *sched_job)
351{
352 struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
353 struct amdxdna_hwctx *hwctx = job->hwctx;
354 struct amdxdna_dev *xdna;
355
356 xdna = hwctx->client->xdna;
357 trace_xdna_job(sched_job, name: hwctx->name, str: "job timedout", seq: job->seq);
358 mutex_lock(&xdna->dev_lock);
359 aie2_hwctx_stop(xdna, hwctx, bad_job: sched_job);
360
361 aie2_hwctx_restart(xdna, hwctx);
362 mutex_unlock(lock: &xdna->dev_lock);
363
364 return DRM_GPU_SCHED_STAT_NOMINAL;
365}
366
367static const struct drm_sched_backend_ops sched_ops = {
368 .run_job = aie2_sched_job_run,
369 .free_job = aie2_sched_job_free,
370 .timedout_job = aie2_sched_job_timedout,
371};
372
373static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
374{
375 struct amdxdna_dev *xdna = hwctx->client->xdna;
376 struct amdxdna_dev_hdl *ndev;
377 int start, end, first, last;
378 u32 width = 1, entries = 0;
379 int i;
380
381 if (!hwctx->num_tiles) {
382 XDNA_ERR(xdna, "Number of tiles is zero");
383 return -EINVAL;
384 }
385
386 ndev = xdna->dev_handle;
387 if (unlikely(!ndev->metadata.core.row_count)) {
388 XDNA_WARN(xdna, "Core tile row count is zero");
389 return -EINVAL;
390 }
391
392 hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
393 if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
394 XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
395 return -EINVAL;
396 }
397
398 if (ndev->priv->col_align == COL_ALIGN_NATURE)
399 width = hwctx->num_col;
400
401 /*
402 * In range [start, end], find out columns that is multiple of width.
403 * 'first' is the first column,
404 * 'last' is the last column,
405 * 'entries' is the total number of columns.
406 */
407 start = xdna->dev_info->first_col;
408 end = ndev->total_col - hwctx->num_col;
409 if (start > 0 && end == 0) {
410 XDNA_DBG(xdna, "Force start from col 0");
411 start = 0;
412 }
413 first = start + (width - start % width) % width;
414 last = end - end % width;
415 if (last >= first)
416 entries = (last - first) / width + 1;
417 XDNA_DBG(xdna, "start %d end %d first %d last %d",
418 start, end, first, last);
419
420 if (unlikely(!entries)) {
421 XDNA_ERR(xdna, "Start %d end %d width %d",
422 start, end, width);
423 return -EINVAL;
424 }
425
426 hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
427 if (!hwctx->col_list)
428 return -ENOMEM;
429
430 hwctx->col_list_len = entries;
431 hwctx->col_list[0] = first;
432 for (i = 1; i < entries; i++)
433 hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
434
435 print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
436 entries * sizeof(*hwctx->col_list), false);
437 return 0;
438}
439
440static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
441{
442 struct amdxdna_dev *xdna = hwctx->client->xdna;
443 struct alloc_requests *xrs_req;
444 int ret;
445
446 xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
447 if (!xrs_req)
448 return -ENOMEM;
449
450 xrs_req->cdo.start_cols = hwctx->col_list;
451 xrs_req->cdo.cols_len = hwctx->col_list_len;
452 xrs_req->cdo.ncols = hwctx->num_col;
453 xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
454
455 xrs_req->rqos.gops = hwctx->qos.gops;
456 xrs_req->rqos.fps = hwctx->qos.fps;
457 xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
458 xrs_req->rqos.latency = hwctx->qos.latency;
459 xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
460 xrs_req->rqos.priority = hwctx->qos.priority;
461
462 xrs_req->rid = (uintptr_t)hwctx;
463
464 ret = xrs_allocate_resource(hdl: xdna->xrs_hdl, req: xrs_req, cb_arg: hwctx);
465 if (ret)
466 XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
467
468 kfree(objp: xrs_req);
469 return ret;
470}
471
472static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
473{
474 struct amdxdna_dev *xdna = hwctx->client->xdna;
475 int ret;
476
477 ret = xrs_release_resource(hdl: xdna->xrs_hdl, rid: (uintptr_t)hwctx);
478 if (ret)
479 XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
480}
481
482static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
483{
484 struct amdxdna_dev *xdna = hwctx->client->xdna;
485 struct drm_file *filp = hwctx->client->filp;
486 struct drm_syncobj *syncobj;
487 u32 hdl;
488 int ret;
489
490 hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
491
492 ret = drm_syncobj_create(out_syncobj: &syncobj, flags: 0, NULL);
493 if (ret) {
494 XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
495 return ret;
496 }
497 ret = drm_syncobj_get_handle(file_private: filp, syncobj, handle: &hdl);
498 if (ret) {
499 drm_syncobj_put(obj: syncobj);
500 XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
501 return ret;
502 }
503 hwctx->priv->syncobj = syncobj;
504 hwctx->syncobj_hdl = hdl;
505
506 return 0;
507}
508
509static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
510{
511 /*
512 * The syncobj_hdl is owned by user space and will be cleaned up
513 * separately.
514 */
515 drm_syncobj_put(obj: hwctx->priv->syncobj);
516}
517
518int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
519{
520 struct amdxdna_client *client = hwctx->client;
521 struct amdxdna_dev *xdna = client->xdna;
522 const struct drm_sched_init_args args = {
523 .ops = &sched_ops,
524 .num_rqs = DRM_SCHED_PRIORITY_COUNT,
525 .credit_limit = HWCTX_MAX_CMDS,
526 .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
527 .name = hwctx->name,
528 .dev = xdna->ddev.dev,
529 };
530 struct drm_gpu_scheduler *sched;
531 struct amdxdna_hwctx_priv *priv;
532 struct amdxdna_gem_obj *heap;
533 struct amdxdna_dev_hdl *ndev;
534 int i, ret;
535
536 priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
537 if (!priv)
538 return -ENOMEM;
539 hwctx->priv = priv;
540
541 mutex_lock(&client->mm_lock);
542 heap = client->dev_heap;
543 if (!heap) {
544 XDNA_ERR(xdna, "The client dev heap object not exist");
545 mutex_unlock(lock: &client->mm_lock);
546 ret = -ENOENT;
547 goto free_priv;
548 }
549 drm_gem_object_get(to_gobj(heap));
550 mutex_unlock(lock: &client->mm_lock);
551 priv->heap = heap;
552 sema_init(sem: &priv->job_sem, HWCTX_MAX_CMDS);
553
554 ret = amdxdna_gem_pin(abo: heap);
555 if (ret) {
556 XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
557 goto put_heap;
558 }
559
560 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
561 struct amdxdna_gem_obj *abo;
562 struct amdxdna_drm_create_bo args = {
563 .flags = 0,
564 .type = AMDXDNA_BO_DEV,
565 .vaddr = 0,
566 .size = MAX_CHAIN_CMDBUF_SIZE,
567 };
568
569 abo = amdxdna_drm_alloc_dev_bo(dev: &xdna->ddev, args: &args, filp: client->filp, use_vmap: true);
570 if (IS_ERR(ptr: abo)) {
571 ret = PTR_ERR(ptr: abo);
572 goto free_cmd_bufs;
573 }
574
575 XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
576 i, abo->mem.dev_addr, abo->mem.size);
577 priv->cmd_buf[i] = abo;
578 }
579
580 sched = &priv->sched;
581 mutex_init(&priv->io_lock);
582
583 fs_reclaim_acquire(GFP_KERNEL);
584 might_lock(&priv->io_lock);
585 fs_reclaim_release(GFP_KERNEL);
586
587 ret = drm_sched_init(sched, args: &args);
588 if (ret) {
589 XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
590 goto free_cmd_bufs;
591 }
592
593 ret = drm_sched_entity_init(entity: &priv->entity, priority: DRM_SCHED_PRIORITY_NORMAL,
594 sched_list: &sched, num_sched_list: 1, NULL);
595 if (ret) {
596 XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
597 goto free_sched;
598 }
599
600 ret = aie2_hwctx_col_list(hwctx);
601 if (ret) {
602 XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
603 goto free_entity;
604 }
605
606 ret = aie2_alloc_resource(hwctx);
607 if (ret) {
608 XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
609 goto free_col_list;
610 }
611
612 ret = aie2_map_host_buf(ndev: xdna->dev_handle, context_id: hwctx->fw_ctx_id,
613 addr: heap->mem.userptr, size: heap->mem.size);
614 if (ret) {
615 XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
616 goto release_resource;
617 }
618
619 ret = aie2_ctx_syncobj_create(hwctx);
620 if (ret) {
621 XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
622 goto release_resource;
623 }
624
625 hwctx->status = HWCTX_STAT_INIT;
626 ndev = xdna->dev_handle;
627 ndev->hwctx_num++;
628 init_waitqueue_head(&priv->job_free_wq);
629
630 XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
631
632 return 0;
633
634release_resource:
635 aie2_release_resource(hwctx);
636free_col_list:
637 kfree(objp: hwctx->col_list);
638free_entity:
639 drm_sched_entity_destroy(entity: &priv->entity);
640free_sched:
641 drm_sched_fini(sched: &priv->sched);
642free_cmd_bufs:
643 for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
644 if (!priv->cmd_buf[i])
645 continue;
646 drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
647 }
648 amdxdna_gem_unpin(abo: heap);
649put_heap:
650 drm_gem_object_put(to_gobj(heap));
651free_priv:
652 kfree(objp: priv);
653 return ret;
654}
655
656void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
657{
658 struct amdxdna_dev_hdl *ndev;
659 struct amdxdna_dev *xdna;
660 int idx;
661
662 xdna = hwctx->client->xdna;
663 ndev = xdna->dev_handle;
664 ndev->hwctx_num--;
665
666 XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
667 drm_sched_entity_destroy(entity: &hwctx->priv->entity);
668
669 aie2_hwctx_wait_for_idle(hwctx);
670
671 /* Request fw to destroy hwctx and cancel the rest pending requests */
672 aie2_release_resource(hwctx);
673
674 /* Wait for all submitted jobs to be completed or canceled */
675 wait_event(hwctx->priv->job_free_wq,
676 atomic64_read(&hwctx->job_submit_cnt) ==
677 atomic64_read(&hwctx->job_free_cnt));
678
679 drm_sched_fini(sched: &hwctx->priv->sched);
680 aie2_ctx_syncobj_destroy(hwctx);
681
682 for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
683 drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
684 amdxdna_gem_unpin(abo: hwctx->priv->heap);
685 drm_gem_object_put(to_gobj(hwctx->priv->heap));
686
687 mutex_destroy(lock: &hwctx->priv->io_lock);
688 kfree(objp: hwctx->col_list);
689 kfree(objp: hwctx->priv);
690 kfree(objp: hwctx->cus);
691}
692
693static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
694{
695 struct amdxdna_hwctx_param_config_cu *config = buf;
696 struct amdxdna_dev *xdna = hwctx->client->xdna;
697 u32 total_size;
698 int ret;
699
700 XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
701 if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
702 return -EINVAL;
703
704 if (hwctx->status != HWCTX_STAT_INIT) {
705 XDNA_ERR(xdna, "Not support re-config CU");
706 return -EINVAL;
707 }
708
709 if (!config->num_cus) {
710 XDNA_ERR(xdna, "Number of CU is zero");
711 return -EINVAL;
712 }
713
714 total_size = struct_size(config, cu_configs, config->num_cus);
715 if (total_size > size) {
716 XDNA_ERR(xdna, "CU config larger than size");
717 return -EINVAL;
718 }
719
720 hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
721 if (!hwctx->cus)
722 return -ENOMEM;
723
724 ret = aie2_config_cu(hwctx);
725 if (ret) {
726 XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
727 goto free_cus;
728 }
729
730 wmb(); /* To avoid locking in command submit when check status */
731 hwctx->status = HWCTX_STAT_READY;
732
733 return 0;
734
735free_cus:
736 kfree(objp: hwctx->cus);
737 hwctx->cus = NULL;
738 return ret;
739}
740
741int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
742{
743 struct amdxdna_dev *xdna = hwctx->client->xdna;
744
745 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
746 switch (type) {
747 case DRM_AMDXDNA_HWCTX_CONFIG_CU:
748 return aie2_hwctx_cu_config(hwctx, buf, size);
749 case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
750 case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
751 return -EOPNOTSUPP;
752 default:
753 XDNA_DBG(xdna, "Not supported type %d", type);
754 return -EOPNOTSUPP;
755 }
756}
757
758static int aie2_populate_range(struct amdxdna_gem_obj *abo)
759{
760 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
761 struct amdxdna_umap *mapp;
762 unsigned long timeout;
763 struct mm_struct *mm;
764 bool found;
765 int ret;
766
767 timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
768again:
769 found = false;
770 down_write(sem: &xdna->notifier_lock);
771 list_for_each_entry(mapp, &abo->mem.umap_list, node) {
772 if (mapp->invalid) {
773 found = true;
774 break;
775 }
776 }
777
778 if (!found) {
779 abo->mem.map_invalid = false;
780 up_write(sem: &xdna->notifier_lock);
781 return 0;
782 }
783 kref_get(kref: &mapp->refcnt);
784 up_write(sem: &xdna->notifier_lock);
785
786 XDNA_DBG(xdna, "populate memory range %lx %lx",
787 mapp->vma->vm_start, mapp->vma->vm_end);
788 mm = mapp->notifier.mm;
789 if (!mmget_not_zero(mm)) {
790 amdxdna_umap_put(mapp);
791 return -EFAULT;
792 }
793
794 mapp->range.notifier_seq = mmu_interval_read_begin(interval_sub: &mapp->notifier);
795 mmap_read_lock(mm);
796 ret = hmm_range_fault(range: &mapp->range);
797 mmap_read_unlock(mm);
798 if (ret) {
799 if (time_after(jiffies, timeout)) {
800 ret = -ETIME;
801 goto put_mm;
802 }
803
804 if (ret == -EBUSY) {
805 amdxdna_umap_put(mapp);
806 goto again;
807 }
808
809 goto put_mm;
810 }
811
812 down_write(sem: &xdna->notifier_lock);
813 if (mmu_interval_read_retry(interval_sub: &mapp->notifier, seq: mapp->range.notifier_seq)) {
814 up_write(sem: &xdna->notifier_lock);
815 amdxdna_umap_put(mapp);
816 goto again;
817 }
818 mapp->invalid = false;
819 up_write(sem: &xdna->notifier_lock);
820 amdxdna_umap_put(mapp);
821 goto again;
822
823put_mm:
824 amdxdna_umap_put(mapp);
825 mmput(mm);
826 return ret;
827}
828
829int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
830{
831 struct amdxdna_dev *xdna = hwctx->client->xdna;
832 struct ww_acquire_ctx acquire_ctx;
833 struct dma_fence_chain *chain;
834 struct amdxdna_gem_obj *abo;
835 unsigned long timeout = 0;
836 int ret, i;
837
838 ret = down_interruptible(sem: &hwctx->priv->job_sem);
839 if (ret) {
840 XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
841 return ret;
842 }
843
844 chain = dma_fence_chain_alloc();
845 if (!chain) {
846 XDNA_ERR(xdna, "Alloc fence chain failed");
847 ret = -ENOMEM;
848 goto up_sem;
849 }
850
851 ret = drm_sched_job_init(job: &job->base, entity: &hwctx->priv->entity, credits: 1, owner: hwctx);
852 if (ret) {
853 XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
854 goto free_chain;
855 }
856
857retry:
858 ret = drm_gem_lock_reservations(objs: job->bos, count: job->bo_cnt, acquire_ctx: &acquire_ctx);
859 if (ret) {
860 XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
861 goto cleanup_job;
862 }
863
864 for (i = 0; i < job->bo_cnt; i++) {
865 ret = dma_resv_reserve_fences(obj: job->bos[i]->resv, num_fences: 1);
866 if (ret) {
867 XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
868 drm_gem_unlock_reservations(objs: job->bos, count: job->bo_cnt, acquire_ctx: &acquire_ctx);
869 goto cleanup_job;
870 }
871 }
872
873 down_read(sem: &xdna->notifier_lock);
874 for (i = 0; i < job->bo_cnt; i++) {
875 abo = to_xdna_obj(gobj: job->bos[i]);
876 if (abo->mem.map_invalid) {
877 up_read(sem: &xdna->notifier_lock);
878 drm_gem_unlock_reservations(objs: job->bos, count: job->bo_cnt, acquire_ctx: &acquire_ctx);
879 if (!timeout) {
880 timeout = jiffies +
881 msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
882 } else if (time_after(jiffies, timeout)) {
883 ret = -ETIME;
884 goto cleanup_job;
885 }
886
887 ret = aie2_populate_range(abo);
888 if (ret)
889 goto cleanup_job;
890 goto retry;
891 }
892 }
893
894 mutex_lock(&hwctx->priv->io_lock);
895 drm_sched_job_arm(job: &job->base);
896 job->out_fence = dma_fence_get(fence: &job->base.s_fence->finished);
897 for (i = 0; i < job->bo_cnt; i++)
898 dma_resv_add_fence(obj: job->bos[i]->resv, fence: job->out_fence, usage: DMA_RESV_USAGE_WRITE);
899 job->seq = hwctx->priv->seq++;
900 kref_get(kref: &job->refcnt);
901 drm_sched_entity_push_job(sched_job: &job->base);
902
903 *seq = job->seq;
904 drm_syncobj_add_point(syncobj: hwctx->priv->syncobj, chain, fence: job->out_fence, point: *seq);
905 mutex_unlock(lock: &hwctx->priv->io_lock);
906
907 up_read(sem: &xdna->notifier_lock);
908 drm_gem_unlock_reservations(objs: job->bos, count: job->bo_cnt, acquire_ctx: &acquire_ctx);
909
910 aie2_job_put(job);
911 atomic64_inc(v: &hwctx->job_submit_cnt);
912
913 return 0;
914
915cleanup_job:
916 drm_sched_job_cleanup(job: &job->base);
917free_chain:
918 dma_fence_chain_free(chain);
919up_sem:
920 up(sem: &hwctx->priv->job_sem);
921 job->job_done = true;
922 return ret;
923}
924
925void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
926 unsigned long cur_seq)
927{
928 struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
929 struct drm_gem_object *gobj = to_gobj(abo);
930 long ret;
931
932 ret = dma_resv_wait_timeout(obj: gobj->resv, usage: DMA_RESV_USAGE_BOOKKEEP,
933 intr: true, MAX_SCHEDULE_TIMEOUT);
934 if (!ret || ret == -ERESTARTSYS)
935 XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
936}
937

source code of linux/drivers/accel/amdxdna/aie2_ctx.c