1// SPDX-License-Identifier: GPL-2.0-only OR MIT
2/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
3/* Copyright 2025 Arm, Ltd. */
4
5#include <linux/bitfield.h>
6#include <linux/genalloc.h>
7#include <linux/interrupt.h>
8#include <linux/iopoll.h>
9#include <linux/platform_device.h>
10#include <linux/pm_runtime.h>
11
12#include <drm/drm_file.h>
13#include <drm/drm_gem.h>
14#include <drm/drm_gem_dma_helper.h>
15#include <drm/drm_print.h>
16#include <drm/ethosu_accel.h>
17
18#include "ethosu_device.h"
19#include "ethosu_drv.h"
20#include "ethosu_gem.h"
21#include "ethosu_job.h"
22
23#define JOB_TIMEOUT_MS 500
24
25static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job)
26{
27 return container_of(sched_job, struct ethosu_job, base);
28}
29
30static const char *ethosu_fence_get_driver_name(struct dma_fence *fence)
31{
32 return "ethosu";
33}
34
35static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence)
36{
37 return "ethosu-npu";
38}
39
40static const struct dma_fence_ops ethosu_fence_ops = {
41 .get_driver_name = ethosu_fence_get_driver_name,
42 .get_timeline_name = ethosu_fence_get_timeline_name,
43};
44
45static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job)
46{
47 struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo);
48 struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(obj: job->cmd_bo)->info;
49
50 for (int i = 0; i < job->region_cnt; i++) {
51 struct drm_gem_dma_object *bo;
52 int region = job->region_bo_num[i];
53
54 bo = to_drm_gem_dma_obj(job->region_bo[i]);
55 writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region));
56 writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region));
57 dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr);
58 }
59
60 if (job->sram_size) {
61 writel_relaxed(lower_32_bits(dev->sramphys),
62 dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION));
63 writel_relaxed(upper_32_bits(dev->sramphys),
64 dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION));
65 dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n",
66 ETHOSU_SRAM_REGION, &dev->sramphys);
67 }
68
69 writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE);
70 writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI);
71 writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE);
72
73 writel(CMD_TRANSITION_TO_RUN, addr: dev->regs + NPU_REG_CMD);
74
75 dev_dbg(dev->base.dev,
76 "Submitted cmd at %pad to core\n", &cmd_bo->dma_addr);
77}
78
79static int ethosu_acquire_object_fences(struct ethosu_job *job)
80{
81 int i, ret;
82 struct drm_gem_object **bos = job->region_bo;
83 struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(obj: job->cmd_bo)->info;
84
85 for (i = 0; i < job->region_cnt; i++) {
86 bool is_write;
87
88 if (!bos[i])
89 break;
90
91 ret = dma_resv_reserve_fences(obj: bos[i]->resv, num_fences: 1);
92 if (ret)
93 return ret;
94
95 is_write = info->output_region[job->region_bo_num[i]];
96 ret = drm_sched_job_add_implicit_dependencies(job: &job->base, obj: bos[i],
97 write: is_write);
98 if (ret)
99 return ret;
100 }
101
102 return 0;
103}
104
105static void ethosu_attach_object_fences(struct ethosu_job *job)
106{
107 int i;
108 struct dma_fence *fence = job->inference_done_fence;
109 struct drm_gem_object **bos = job->region_bo;
110 struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(obj: job->cmd_bo)->info;
111
112 for (i = 0; i < job->region_cnt; i++)
113 if (info->output_region[job->region_bo_num[i]])
114 dma_resv_add_fence(obj: bos[i]->resv, fence, usage: DMA_RESV_USAGE_WRITE);
115}
116
117static int ethosu_job_push(struct ethosu_job *job)
118{
119 struct ww_acquire_ctx acquire_ctx;
120 int ret;
121
122 ret = drm_gem_lock_reservations(objs: job->region_bo, count: job->region_cnt, acquire_ctx: &acquire_ctx);
123 if (ret)
124 return ret;
125
126 ret = ethosu_acquire_object_fences(job);
127 if (ret)
128 goto out;
129
130 ret = pm_runtime_resume_and_get(dev: job->dev->base.dev);
131 if (!ret) {
132 guard(mutex)(T: &job->dev->sched_lock);
133
134 drm_sched_job_arm(job: &job->base);
135 job->inference_done_fence = dma_fence_get(fence: &job->base.s_fence->finished);
136 kref_get(kref: &job->refcount); /* put by scheduler job completion */
137 drm_sched_entity_push_job(sched_job: &job->base);
138 ethosu_attach_object_fences(job);
139 }
140
141out:
142 drm_gem_unlock_reservations(objs: job->region_bo, count: job->region_cnt, acquire_ctx: &acquire_ctx);
143 return ret;
144}
145
146static void ethosu_job_cleanup(struct kref *ref)
147{
148 struct ethosu_job *job = container_of(ref, struct ethosu_job,
149 refcount);
150 unsigned int i;
151
152 pm_runtime_put_autosuspend(dev: job->dev->base.dev);
153
154 dma_fence_put(fence: job->done_fence);
155 dma_fence_put(fence: job->inference_done_fence);
156
157 for (i = 0; i < job->region_cnt; i++)
158 drm_gem_object_put(obj: job->region_bo[i]);
159
160 drm_gem_object_put(obj: job->cmd_bo);
161
162 kfree(objp: job);
163}
164
165static void ethosu_job_put(struct ethosu_job *job)
166{
167 kref_put(kref: &job->refcount, release: ethosu_job_cleanup);
168}
169
170static void ethosu_job_free(struct drm_sched_job *sched_job)
171{
172 struct ethosu_job *job = to_ethosu_job(sched_job);
173
174 drm_sched_job_cleanup(job: sched_job);
175 ethosu_job_put(job);
176}
177
178static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job)
179{
180 struct ethosu_job *job = to_ethosu_job(sched_job);
181 struct ethosu_device *dev = job->dev;
182 struct dma_fence *fence = job->done_fence;
183
184 if (unlikely(job->base.s_fence->finished.error))
185 return NULL;
186
187 dma_fence_init(fence, ops: &ethosu_fence_ops, lock: &dev->fence_lock,
188 context: dev->fence_context, seqno: ++dev->emit_seqno);
189 dma_fence_get(fence);
190
191 scoped_guard(mutex, &dev->job_lock) {
192 dev->in_flight_job = job;
193 ethosu_job_hw_submit(dev, job);
194 }
195
196 return fence;
197}
198
199static void ethosu_job_handle_irq(struct ethosu_device *dev)
200{
201 u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
202
203 if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) {
204 dev_err(dev->base.dev, "Error IRQ - %x\n", status);
205 drm_sched_fault(sched: &dev->sched);
206 return;
207 }
208
209 scoped_guard(mutex, &dev->job_lock) {
210 if (dev->in_flight_job) {
211 dma_fence_signal(fence: dev->in_flight_job->done_fence);
212 dev->in_flight_job = NULL;
213 }
214 }
215}
216
217static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data)
218{
219 struct ethosu_device *dev = data;
220
221 ethosu_job_handle_irq(dev);
222
223 return IRQ_HANDLED;
224}
225
226static irqreturn_t ethosu_job_irq_handler(int irq, void *data)
227{
228 struct ethosu_device *dev = data;
229 u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
230
231 if (!(status & STATUS_IRQ_RAISED))
232 return IRQ_NONE;
233
234 writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD);
235 return IRQ_WAKE_THREAD;
236}
237
238static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad)
239{
240 struct ethosu_job *job = to_ethosu_job(sched_job: bad);
241 struct ethosu_device *dev = job->dev;
242 bool running;
243 u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr;
244 u32 cmdaddr;
245
246 cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD);
247 running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS));
248
249 if (running) {
250 int ret;
251 u32 reg;
252
253 ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD,
254 reg,
255 reg != cmdaddr,
256 USEC_PER_MSEC, 100 * USEC_PER_MSEC);
257
258 /* If still running and progress is being made, just return */
259 if (!ret)
260 return DRM_GPU_SCHED_STAT_NO_HANG;
261 }
262
263 dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n",
264 running ? "running" : "stopped",
265 cmdaddr, bocmds[cmdaddr / 4]);
266
267 drm_sched_stop(sched: &dev->sched, bad);
268
269 scoped_guard(mutex, &dev->job_lock)
270 dev->in_flight_job = NULL;
271
272 /* Proceed with reset now. */
273 pm_runtime_force_suspend(dev: dev->base.dev);
274 pm_runtime_force_resume(dev: dev->base.dev);
275
276 /* Restart the scheduler */
277 drm_sched_start(sched: &dev->sched, errno: 0);
278
279 return DRM_GPU_SCHED_STAT_RESET;
280}
281
282static const struct drm_sched_backend_ops ethosu_sched_ops = {
283 .run_job = ethosu_job_run,
284 .timedout_job = ethosu_job_timedout,
285 .free_job = ethosu_job_free
286};
287
288int ethosu_job_init(struct ethosu_device *edev)
289{
290 struct device *dev = edev->base.dev;
291 struct drm_sched_init_args args = {
292 .ops = &ethosu_sched_ops,
293 .num_rqs = DRM_SCHED_PRIORITY_COUNT,
294 .credit_limit = 1,
295 .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
296 .name = dev_name(dev),
297 .dev = dev,
298 };
299 int ret;
300
301 spin_lock_init(&edev->fence_lock);
302 ret = devm_mutex_init(dev, &edev->job_lock);
303 if (ret)
304 return ret;
305 ret = devm_mutex_init(dev, &edev->sched_lock);
306 if (ret)
307 return ret;
308
309 edev->irq = platform_get_irq(to_platform_device(dev), 0);
310 if (edev->irq < 0)
311 return edev->irq;
312
313 ret = devm_request_threaded_irq(dev, irq: edev->irq,
314 handler: ethosu_job_irq_handler,
315 thread_fn: ethosu_job_irq_handler_thread,
316 IRQF_SHARED, KBUILD_MODNAME,
317 dev_id: edev);
318 if (ret) {
319 dev_err(dev, "failed to request irq\n");
320 return ret;
321 }
322
323 edev->fence_context = dma_fence_context_alloc(num: 1);
324
325 ret = drm_sched_init(sched: &edev->sched, args: &args);
326 if (ret) {
327 dev_err(dev, "Failed to create scheduler: %d\n", ret);
328 goto err_sched;
329 }
330
331 return 0;
332
333err_sched:
334 drm_sched_fini(sched: &edev->sched);
335 return ret;
336}
337
338void ethosu_job_fini(struct ethosu_device *dev)
339{
340 drm_sched_fini(sched: &dev->sched);
341}
342
343int ethosu_job_open(struct ethosu_file_priv *ethosu_priv)
344{
345 struct ethosu_device *dev = ethosu_priv->edev;
346 struct drm_gpu_scheduler *sched = &dev->sched;
347 int ret;
348
349 ret = drm_sched_entity_init(entity: &ethosu_priv->sched_entity,
350 priority: DRM_SCHED_PRIORITY_NORMAL,
351 sched_list: &sched, num_sched_list: 1, NULL);
352 return WARN_ON(ret);
353}
354
355void ethosu_job_close(struct ethosu_file_priv *ethosu_priv)
356{
357 struct drm_sched_entity *entity = &ethosu_priv->sched_entity;
358
359 drm_sched_entity_destroy(entity);
360}
361
362static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
363 struct drm_ethosu_job *job)
364{
365 struct ethosu_device *edev = to_ethosu_device(dev);
366 struct ethosu_file_priv *file_priv = file->driver_priv;
367 struct ethosu_job *ejob = NULL;
368 struct ethosu_validated_cmdstream_info *cmd_info;
369 int ret = 0;
370
371 /* BO region 2 is reserved if SRAM is used */
372 if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size)
373 return -EINVAL;
374
375 if (edev->npu_info.sram_size < job->sram_size)
376 return -EINVAL;
377
378 ejob = kzalloc(sizeof(*ejob), GFP_KERNEL);
379 if (!ejob)
380 return -ENOMEM;
381
382 kref_init(kref: &ejob->refcount);
383
384 ejob->dev = edev;
385 ejob->sram_size = job->sram_size;
386
387 ejob->done_fence = kzalloc(sizeof(*ejob->done_fence), GFP_KERNEL);
388 if (!ejob->done_fence) {
389 ret = -ENOMEM;
390 goto out_cleanup_job;
391 }
392
393 ret = drm_sched_job_init(job: &ejob->base,
394 entity: &file_priv->sched_entity,
395 credits: 1, NULL, drm_client_id: file->client_id);
396 if (ret)
397 goto out_put_job;
398
399 ejob->cmd_bo = drm_gem_object_lookup(filp: file, handle: job->cmd_bo);
400 if (!ejob->cmd_bo) {
401 ret = -ENOENT;
402 goto out_cleanup_job;
403 }
404 cmd_info = to_ethosu_bo(obj: ejob->cmd_bo)->info;
405 if (!cmd_info) {
406 ret = -EINVAL;
407 goto out_cleanup_job;
408 }
409
410 for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) {
411 struct drm_gem_object *gem;
412
413 /* Can only omit a BO handle if the region is not used or used for SRAM */
414 if (!job->region_bo_handles[i] &&
415 (!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size)))
416 continue;
417
418 if (job->region_bo_handles[i] && !cmd_info->region_size[i]) {
419 dev_err(dev->dev,
420 "Cmdstream BO handle %d set for unused region %d\n",
421 job->region_bo_handles[i], i);
422 ret = -EINVAL;
423 goto out_cleanup_job;
424 }
425
426 gem = drm_gem_object_lookup(filp: file, handle: job->region_bo_handles[i]);
427 if (!gem) {
428 dev_err(dev->dev,
429 "Invalid BO handle %d for region %d\n",
430 job->region_bo_handles[i], i);
431 ret = -ENOENT;
432 goto out_cleanup_job;
433 }
434
435 ejob->region_bo[ejob->region_cnt] = gem;
436 ejob->region_bo_num[ejob->region_cnt] = i;
437 ejob->region_cnt++;
438
439 if (to_ethosu_bo(obj: gem)->info) {
440 dev_err(dev->dev,
441 "Cmdstream BO handle %d used for region %d\n",
442 job->region_bo_handles[i], i);
443 ret = -EINVAL;
444 goto out_cleanup_job;
445 }
446
447 /* Verify the command stream doesn't have accesses outside the BO */
448 if (cmd_info->region_size[i] > gem->size) {
449 dev_err(dev->dev,
450 "cmd stream region %d size greater than BO size (%llu > %zu)\n",
451 i, cmd_info->region_size[i], gem->size);
452 ret = -EOVERFLOW;
453 goto out_cleanup_job;
454 }
455 }
456 ret = ethosu_job_push(job: ejob);
457
458out_cleanup_job:
459 if (ret)
460 drm_sched_job_cleanup(job: &ejob->base);
461out_put_job:
462 ethosu_job_put(job: ejob);
463
464 return ret;
465}
466
467int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file)
468{
469 struct drm_ethosu_submit *args = data;
470 int ret = 0;
471 unsigned int i = 0;
472
473 if (args->pad) {
474 drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n");
475 return -EINVAL;
476 }
477
478 struct drm_ethosu_job __free(kvfree) *jobs =
479 kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL);
480 if (!jobs)
481 return -ENOMEM;
482
483 if (copy_from_user(to: jobs,
484 from: (void __user *)(uintptr_t)args->jobs,
485 n: args->job_count * sizeof(*jobs))) {
486 drm_dbg(dev, "Failed to copy incoming job array\n");
487 return -EFAULT;
488 }
489
490 for (i = 0; i < args->job_count; i++) {
491 ret = ethosu_ioctl_submit_job(dev, file, job: &jobs[i]);
492 if (ret)
493 return ret;
494 }
495
496 return 0;
497}
498

source code of linux/drivers/accel/ethosu/ethosu_job.c