| 1 | // SPDX-License-Identifier: GPL-2.0-only OR MIT |
| 2 | /* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ |
| 3 | /* Copyright 2025 Arm, Ltd. */ |
| 4 | |
| 5 | #include <linux/bitfield.h> |
| 6 | #include <linux/genalloc.h> |
| 7 | #include <linux/interrupt.h> |
| 8 | #include <linux/iopoll.h> |
| 9 | #include <linux/platform_device.h> |
| 10 | #include <linux/pm_runtime.h> |
| 11 | |
| 12 | #include <drm/drm_file.h> |
| 13 | #include <drm/drm_gem.h> |
| 14 | #include <drm/drm_gem_dma_helper.h> |
| 15 | #include <drm/drm_print.h> |
| 16 | #include <drm/ethosu_accel.h> |
| 17 | |
| 18 | #include "ethosu_device.h" |
| 19 | #include "ethosu_drv.h" |
| 20 | #include "ethosu_gem.h" |
| 21 | #include "ethosu_job.h" |
| 22 | |
| 23 | #define JOB_TIMEOUT_MS 500 |
| 24 | |
| 25 | static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job) |
| 26 | { |
| 27 | return container_of(sched_job, struct ethosu_job, base); |
| 28 | } |
| 29 | |
| 30 | static const char *ethosu_fence_get_driver_name(struct dma_fence *fence) |
| 31 | { |
| 32 | return "ethosu" ; |
| 33 | } |
| 34 | |
| 35 | static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence) |
| 36 | { |
| 37 | return "ethosu-npu" ; |
| 38 | } |
| 39 | |
| 40 | static const struct dma_fence_ops ethosu_fence_ops = { |
| 41 | .get_driver_name = ethosu_fence_get_driver_name, |
| 42 | .get_timeline_name = ethosu_fence_get_timeline_name, |
| 43 | }; |
| 44 | |
| 45 | static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job) |
| 46 | { |
| 47 | struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo); |
| 48 | struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(obj: job->cmd_bo)->info; |
| 49 | |
| 50 | for (int i = 0; i < job->region_cnt; i++) { |
| 51 | struct drm_gem_dma_object *bo; |
| 52 | int region = job->region_bo_num[i]; |
| 53 | |
| 54 | bo = to_drm_gem_dma_obj(job->region_bo[i]); |
| 55 | writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region)); |
| 56 | writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region)); |
| 57 | dev_dbg(dev->base.dev, "Region %d base addr = %pad\n" , region, &bo->dma_addr); |
| 58 | } |
| 59 | |
| 60 | if (job->sram_size) { |
| 61 | writel_relaxed(lower_32_bits(dev->sramphys), |
| 62 | dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION)); |
| 63 | writel_relaxed(upper_32_bits(dev->sramphys), |
| 64 | dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION)); |
| 65 | dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n" , |
| 66 | ETHOSU_SRAM_REGION, &dev->sramphys); |
| 67 | } |
| 68 | |
| 69 | writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE); |
| 70 | writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI); |
| 71 | writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE); |
| 72 | |
| 73 | writel(CMD_TRANSITION_TO_RUN, addr: dev->regs + NPU_REG_CMD); |
| 74 | |
| 75 | dev_dbg(dev->base.dev, |
| 76 | "Submitted cmd at %pad to core\n" , &cmd_bo->dma_addr); |
| 77 | } |
| 78 | |
| 79 | static int ethosu_acquire_object_fences(struct ethosu_job *job) |
| 80 | { |
| 81 | int i, ret; |
| 82 | struct drm_gem_object **bos = job->region_bo; |
| 83 | struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(obj: job->cmd_bo)->info; |
| 84 | |
| 85 | for (i = 0; i < job->region_cnt; i++) { |
| 86 | bool is_write; |
| 87 | |
| 88 | if (!bos[i]) |
| 89 | break; |
| 90 | |
| 91 | ret = dma_resv_reserve_fences(obj: bos[i]->resv, num_fences: 1); |
| 92 | if (ret) |
| 93 | return ret; |
| 94 | |
| 95 | is_write = info->output_region[job->region_bo_num[i]]; |
| 96 | ret = drm_sched_job_add_implicit_dependencies(job: &job->base, obj: bos[i], |
| 97 | write: is_write); |
| 98 | if (ret) |
| 99 | return ret; |
| 100 | } |
| 101 | |
| 102 | return 0; |
| 103 | } |
| 104 | |
| 105 | static void ethosu_attach_object_fences(struct ethosu_job *job) |
| 106 | { |
| 107 | int i; |
| 108 | struct dma_fence *fence = job->inference_done_fence; |
| 109 | struct drm_gem_object **bos = job->region_bo; |
| 110 | struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(obj: job->cmd_bo)->info; |
| 111 | |
| 112 | for (i = 0; i < job->region_cnt; i++) |
| 113 | if (info->output_region[job->region_bo_num[i]]) |
| 114 | dma_resv_add_fence(obj: bos[i]->resv, fence, usage: DMA_RESV_USAGE_WRITE); |
| 115 | } |
| 116 | |
| 117 | static int ethosu_job_push(struct ethosu_job *job) |
| 118 | { |
| 119 | struct ww_acquire_ctx acquire_ctx; |
| 120 | int ret; |
| 121 | |
| 122 | ret = drm_gem_lock_reservations(objs: job->region_bo, count: job->region_cnt, acquire_ctx: &acquire_ctx); |
| 123 | if (ret) |
| 124 | return ret; |
| 125 | |
| 126 | ret = ethosu_acquire_object_fences(job); |
| 127 | if (ret) |
| 128 | goto out; |
| 129 | |
| 130 | ret = pm_runtime_resume_and_get(dev: job->dev->base.dev); |
| 131 | if (!ret) { |
| 132 | guard(mutex)(T: &job->dev->sched_lock); |
| 133 | |
| 134 | drm_sched_job_arm(job: &job->base); |
| 135 | job->inference_done_fence = dma_fence_get(fence: &job->base.s_fence->finished); |
| 136 | kref_get(kref: &job->refcount); /* put by scheduler job completion */ |
| 137 | drm_sched_entity_push_job(sched_job: &job->base); |
| 138 | ethosu_attach_object_fences(job); |
| 139 | } |
| 140 | |
| 141 | out: |
| 142 | drm_gem_unlock_reservations(objs: job->region_bo, count: job->region_cnt, acquire_ctx: &acquire_ctx); |
| 143 | return ret; |
| 144 | } |
| 145 | |
| 146 | static void ethosu_job_cleanup(struct kref *ref) |
| 147 | { |
| 148 | struct ethosu_job *job = container_of(ref, struct ethosu_job, |
| 149 | refcount); |
| 150 | unsigned int i; |
| 151 | |
| 152 | pm_runtime_put_autosuspend(dev: job->dev->base.dev); |
| 153 | |
| 154 | dma_fence_put(fence: job->done_fence); |
| 155 | dma_fence_put(fence: job->inference_done_fence); |
| 156 | |
| 157 | for (i = 0; i < job->region_cnt; i++) |
| 158 | drm_gem_object_put(obj: job->region_bo[i]); |
| 159 | |
| 160 | drm_gem_object_put(obj: job->cmd_bo); |
| 161 | |
| 162 | kfree(objp: job); |
| 163 | } |
| 164 | |
| 165 | static void ethosu_job_put(struct ethosu_job *job) |
| 166 | { |
| 167 | kref_put(kref: &job->refcount, release: ethosu_job_cleanup); |
| 168 | } |
| 169 | |
| 170 | static void ethosu_job_free(struct drm_sched_job *sched_job) |
| 171 | { |
| 172 | struct ethosu_job *job = to_ethosu_job(sched_job); |
| 173 | |
| 174 | drm_sched_job_cleanup(job: sched_job); |
| 175 | ethosu_job_put(job); |
| 176 | } |
| 177 | |
| 178 | static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job) |
| 179 | { |
| 180 | struct ethosu_job *job = to_ethosu_job(sched_job); |
| 181 | struct ethosu_device *dev = job->dev; |
| 182 | struct dma_fence *fence = job->done_fence; |
| 183 | |
| 184 | if (unlikely(job->base.s_fence->finished.error)) |
| 185 | return NULL; |
| 186 | |
| 187 | dma_fence_init(fence, ops: ðosu_fence_ops, lock: &dev->fence_lock, |
| 188 | context: dev->fence_context, seqno: ++dev->emit_seqno); |
| 189 | dma_fence_get(fence); |
| 190 | |
| 191 | scoped_guard(mutex, &dev->job_lock) { |
| 192 | dev->in_flight_job = job; |
| 193 | ethosu_job_hw_submit(dev, job); |
| 194 | } |
| 195 | |
| 196 | return fence; |
| 197 | } |
| 198 | |
| 199 | static void ethosu_job_handle_irq(struct ethosu_device *dev) |
| 200 | { |
| 201 | u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); |
| 202 | |
| 203 | if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) { |
| 204 | dev_err(dev->base.dev, "Error IRQ - %x\n" , status); |
| 205 | drm_sched_fault(sched: &dev->sched); |
| 206 | return; |
| 207 | } |
| 208 | |
| 209 | scoped_guard(mutex, &dev->job_lock) { |
| 210 | if (dev->in_flight_job) { |
| 211 | dma_fence_signal(fence: dev->in_flight_job->done_fence); |
| 212 | dev->in_flight_job = NULL; |
| 213 | } |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data) |
| 218 | { |
| 219 | struct ethosu_device *dev = data; |
| 220 | |
| 221 | ethosu_job_handle_irq(dev); |
| 222 | |
| 223 | return IRQ_HANDLED; |
| 224 | } |
| 225 | |
| 226 | static irqreturn_t ethosu_job_irq_handler(int irq, void *data) |
| 227 | { |
| 228 | struct ethosu_device *dev = data; |
| 229 | u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); |
| 230 | |
| 231 | if (!(status & STATUS_IRQ_RAISED)) |
| 232 | return IRQ_NONE; |
| 233 | |
| 234 | writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD); |
| 235 | return IRQ_WAKE_THREAD; |
| 236 | } |
| 237 | |
| 238 | static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad) |
| 239 | { |
| 240 | struct ethosu_job *job = to_ethosu_job(sched_job: bad); |
| 241 | struct ethosu_device *dev = job->dev; |
| 242 | bool running; |
| 243 | u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr; |
| 244 | u32 cmdaddr; |
| 245 | |
| 246 | cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD); |
| 247 | running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS)); |
| 248 | |
| 249 | if (running) { |
| 250 | int ret; |
| 251 | u32 reg; |
| 252 | |
| 253 | ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD, |
| 254 | reg, |
| 255 | reg != cmdaddr, |
| 256 | USEC_PER_MSEC, 100 * USEC_PER_MSEC); |
| 257 | |
| 258 | /* If still running and progress is being made, just return */ |
| 259 | if (!ret) |
| 260 | return DRM_GPU_SCHED_STAT_NO_HANG; |
| 261 | } |
| 262 | |
| 263 | dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n" , |
| 264 | running ? "running" : "stopped" , |
| 265 | cmdaddr, bocmds[cmdaddr / 4]); |
| 266 | |
| 267 | drm_sched_stop(sched: &dev->sched, bad); |
| 268 | |
| 269 | scoped_guard(mutex, &dev->job_lock) |
| 270 | dev->in_flight_job = NULL; |
| 271 | |
| 272 | /* Proceed with reset now. */ |
| 273 | pm_runtime_force_suspend(dev: dev->base.dev); |
| 274 | pm_runtime_force_resume(dev: dev->base.dev); |
| 275 | |
| 276 | /* Restart the scheduler */ |
| 277 | drm_sched_start(sched: &dev->sched, errno: 0); |
| 278 | |
| 279 | return DRM_GPU_SCHED_STAT_RESET; |
| 280 | } |
| 281 | |
| 282 | static const struct drm_sched_backend_ops ethosu_sched_ops = { |
| 283 | .run_job = ethosu_job_run, |
| 284 | .timedout_job = ethosu_job_timedout, |
| 285 | .free_job = ethosu_job_free |
| 286 | }; |
| 287 | |
| 288 | int ethosu_job_init(struct ethosu_device *edev) |
| 289 | { |
| 290 | struct device *dev = edev->base.dev; |
| 291 | struct drm_sched_init_args args = { |
| 292 | .ops = ðosu_sched_ops, |
| 293 | .num_rqs = DRM_SCHED_PRIORITY_COUNT, |
| 294 | .credit_limit = 1, |
| 295 | .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), |
| 296 | .name = dev_name(dev), |
| 297 | .dev = dev, |
| 298 | }; |
| 299 | int ret; |
| 300 | |
| 301 | spin_lock_init(&edev->fence_lock); |
| 302 | ret = devm_mutex_init(dev, &edev->job_lock); |
| 303 | if (ret) |
| 304 | return ret; |
| 305 | ret = devm_mutex_init(dev, &edev->sched_lock); |
| 306 | if (ret) |
| 307 | return ret; |
| 308 | |
| 309 | edev->irq = platform_get_irq(to_platform_device(dev), 0); |
| 310 | if (edev->irq < 0) |
| 311 | return edev->irq; |
| 312 | |
| 313 | ret = devm_request_threaded_irq(dev, irq: edev->irq, |
| 314 | handler: ethosu_job_irq_handler, |
| 315 | thread_fn: ethosu_job_irq_handler_thread, |
| 316 | IRQF_SHARED, KBUILD_MODNAME, |
| 317 | dev_id: edev); |
| 318 | if (ret) { |
| 319 | dev_err(dev, "failed to request irq\n" ); |
| 320 | return ret; |
| 321 | } |
| 322 | |
| 323 | edev->fence_context = dma_fence_context_alloc(num: 1); |
| 324 | |
| 325 | ret = drm_sched_init(sched: &edev->sched, args: &args); |
| 326 | if (ret) { |
| 327 | dev_err(dev, "Failed to create scheduler: %d\n" , ret); |
| 328 | goto err_sched; |
| 329 | } |
| 330 | |
| 331 | return 0; |
| 332 | |
| 333 | err_sched: |
| 334 | drm_sched_fini(sched: &edev->sched); |
| 335 | return ret; |
| 336 | } |
| 337 | |
| 338 | void ethosu_job_fini(struct ethosu_device *dev) |
| 339 | { |
| 340 | drm_sched_fini(sched: &dev->sched); |
| 341 | } |
| 342 | |
| 343 | int ethosu_job_open(struct ethosu_file_priv *ethosu_priv) |
| 344 | { |
| 345 | struct ethosu_device *dev = ethosu_priv->edev; |
| 346 | struct drm_gpu_scheduler *sched = &dev->sched; |
| 347 | int ret; |
| 348 | |
| 349 | ret = drm_sched_entity_init(entity: ðosu_priv->sched_entity, |
| 350 | priority: DRM_SCHED_PRIORITY_NORMAL, |
| 351 | sched_list: &sched, num_sched_list: 1, NULL); |
| 352 | return WARN_ON(ret); |
| 353 | } |
| 354 | |
| 355 | void ethosu_job_close(struct ethosu_file_priv *ethosu_priv) |
| 356 | { |
| 357 | struct drm_sched_entity *entity = ðosu_priv->sched_entity; |
| 358 | |
| 359 | drm_sched_entity_destroy(entity); |
| 360 | } |
| 361 | |
| 362 | static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file, |
| 363 | struct drm_ethosu_job *job) |
| 364 | { |
| 365 | struct ethosu_device *edev = to_ethosu_device(dev); |
| 366 | struct ethosu_file_priv *file_priv = file->driver_priv; |
| 367 | struct ethosu_job *ejob = NULL; |
| 368 | struct ethosu_validated_cmdstream_info *cmd_info; |
| 369 | int ret = 0; |
| 370 | |
| 371 | /* BO region 2 is reserved if SRAM is used */ |
| 372 | if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size) |
| 373 | return -EINVAL; |
| 374 | |
| 375 | if (edev->npu_info.sram_size < job->sram_size) |
| 376 | return -EINVAL; |
| 377 | |
| 378 | ejob = kzalloc(sizeof(*ejob), GFP_KERNEL); |
| 379 | if (!ejob) |
| 380 | return -ENOMEM; |
| 381 | |
| 382 | kref_init(kref: &ejob->refcount); |
| 383 | |
| 384 | ejob->dev = edev; |
| 385 | ejob->sram_size = job->sram_size; |
| 386 | |
| 387 | ejob->done_fence = kzalloc(sizeof(*ejob->done_fence), GFP_KERNEL); |
| 388 | if (!ejob->done_fence) { |
| 389 | ret = -ENOMEM; |
| 390 | goto out_cleanup_job; |
| 391 | } |
| 392 | |
| 393 | ret = drm_sched_job_init(job: &ejob->base, |
| 394 | entity: &file_priv->sched_entity, |
| 395 | credits: 1, NULL, drm_client_id: file->client_id); |
| 396 | if (ret) |
| 397 | goto out_put_job; |
| 398 | |
| 399 | ejob->cmd_bo = drm_gem_object_lookup(filp: file, handle: job->cmd_bo); |
| 400 | if (!ejob->cmd_bo) { |
| 401 | ret = -ENOENT; |
| 402 | goto out_cleanup_job; |
| 403 | } |
| 404 | cmd_info = to_ethosu_bo(obj: ejob->cmd_bo)->info; |
| 405 | if (!cmd_info) { |
| 406 | ret = -EINVAL; |
| 407 | goto out_cleanup_job; |
| 408 | } |
| 409 | |
| 410 | for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) { |
| 411 | struct drm_gem_object *gem; |
| 412 | |
| 413 | /* Can only omit a BO handle if the region is not used or used for SRAM */ |
| 414 | if (!job->region_bo_handles[i] && |
| 415 | (!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size))) |
| 416 | continue; |
| 417 | |
| 418 | if (job->region_bo_handles[i] && !cmd_info->region_size[i]) { |
| 419 | dev_err(dev->dev, |
| 420 | "Cmdstream BO handle %d set for unused region %d\n" , |
| 421 | job->region_bo_handles[i], i); |
| 422 | ret = -EINVAL; |
| 423 | goto out_cleanup_job; |
| 424 | } |
| 425 | |
| 426 | gem = drm_gem_object_lookup(filp: file, handle: job->region_bo_handles[i]); |
| 427 | if (!gem) { |
| 428 | dev_err(dev->dev, |
| 429 | "Invalid BO handle %d for region %d\n" , |
| 430 | job->region_bo_handles[i], i); |
| 431 | ret = -ENOENT; |
| 432 | goto out_cleanup_job; |
| 433 | } |
| 434 | |
| 435 | ejob->region_bo[ejob->region_cnt] = gem; |
| 436 | ejob->region_bo_num[ejob->region_cnt] = i; |
| 437 | ejob->region_cnt++; |
| 438 | |
| 439 | if (to_ethosu_bo(obj: gem)->info) { |
| 440 | dev_err(dev->dev, |
| 441 | "Cmdstream BO handle %d used for region %d\n" , |
| 442 | job->region_bo_handles[i], i); |
| 443 | ret = -EINVAL; |
| 444 | goto out_cleanup_job; |
| 445 | } |
| 446 | |
| 447 | /* Verify the command stream doesn't have accesses outside the BO */ |
| 448 | if (cmd_info->region_size[i] > gem->size) { |
| 449 | dev_err(dev->dev, |
| 450 | "cmd stream region %d size greater than BO size (%llu > %zu)\n" , |
| 451 | i, cmd_info->region_size[i], gem->size); |
| 452 | ret = -EOVERFLOW; |
| 453 | goto out_cleanup_job; |
| 454 | } |
| 455 | } |
| 456 | ret = ethosu_job_push(job: ejob); |
| 457 | |
| 458 | out_cleanup_job: |
| 459 | if (ret) |
| 460 | drm_sched_job_cleanup(job: &ejob->base); |
| 461 | out_put_job: |
| 462 | ethosu_job_put(job: ejob); |
| 463 | |
| 464 | return ret; |
| 465 | } |
| 466 | |
| 467 | int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) |
| 468 | { |
| 469 | struct drm_ethosu_submit *args = data; |
| 470 | int ret = 0; |
| 471 | unsigned int i = 0; |
| 472 | |
| 473 | if (args->pad) { |
| 474 | drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n" ); |
| 475 | return -EINVAL; |
| 476 | } |
| 477 | |
| 478 | struct drm_ethosu_job __free(kvfree) *jobs = |
| 479 | kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); |
| 480 | if (!jobs) |
| 481 | return -ENOMEM; |
| 482 | |
| 483 | if (copy_from_user(to: jobs, |
| 484 | from: (void __user *)(uintptr_t)args->jobs, |
| 485 | n: args->job_count * sizeof(*jobs))) { |
| 486 | drm_dbg(dev, "Failed to copy incoming job array\n" ); |
| 487 | return -EFAULT; |
| 488 | } |
| 489 | |
| 490 | for (i = 0; i < args->job_count; i++) { |
| 491 | ret = ethosu_ioctl_submit_job(dev, file, job: &jobs[i]); |
| 492 | if (ret) |
| 493 | return ret; |
| 494 | } |
| 495 | |
| 496 | return 0; |
| 497 | } |
| 498 | |