1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */ |
3 | |
4 | #include <linux/iosys-map.h> |
5 | #include <linux/kthread.h> |
6 | #include <linux/slab.h> |
7 | #include <linux/vmalloc.h> |
8 | #include <linux/pm_runtime.h> |
9 | |
10 | #include "lima_devfreq.h" |
11 | #include "lima_drv.h" |
12 | #include "lima_sched.h" |
13 | #include "lima_vm.h" |
14 | #include "lima_mmu.h" |
15 | #include "lima_l2_cache.h" |
16 | #include "lima_gem.h" |
17 | #include "lima_trace.h" |
18 | |
19 | struct lima_fence { |
20 | struct dma_fence base; |
21 | struct lima_sched_pipe *pipe; |
22 | }; |
23 | |
24 | static struct kmem_cache *lima_fence_slab; |
25 | static int lima_fence_slab_refcnt; |
26 | |
27 | int lima_sched_slab_init(void) |
28 | { |
29 | if (!lima_fence_slab) { |
30 | lima_fence_slab = kmem_cache_create( |
31 | name: "lima_fence" , size: sizeof(struct lima_fence), align: 0, |
32 | SLAB_HWCACHE_ALIGN, NULL); |
33 | if (!lima_fence_slab) |
34 | return -ENOMEM; |
35 | } |
36 | |
37 | lima_fence_slab_refcnt++; |
38 | return 0; |
39 | } |
40 | |
41 | void lima_sched_slab_fini(void) |
42 | { |
43 | if (!--lima_fence_slab_refcnt) { |
44 | kmem_cache_destroy(s: lima_fence_slab); |
45 | lima_fence_slab = NULL; |
46 | } |
47 | } |
48 | |
49 | static inline struct lima_fence *to_lima_fence(struct dma_fence *fence) |
50 | { |
51 | return container_of(fence, struct lima_fence, base); |
52 | } |
53 | |
54 | static const char *lima_fence_get_driver_name(struct dma_fence *fence) |
55 | { |
56 | return "lima" ; |
57 | } |
58 | |
59 | static const char *lima_fence_get_timeline_name(struct dma_fence *fence) |
60 | { |
61 | struct lima_fence *f = to_lima_fence(fence); |
62 | |
63 | return f->pipe->base.name; |
64 | } |
65 | |
66 | static void lima_fence_release_rcu(struct rcu_head *rcu) |
67 | { |
68 | struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); |
69 | struct lima_fence *fence = to_lima_fence(fence: f); |
70 | |
71 | kmem_cache_free(s: lima_fence_slab, objp: fence); |
72 | } |
73 | |
74 | static void lima_fence_release(struct dma_fence *fence) |
75 | { |
76 | struct lima_fence *f = to_lima_fence(fence); |
77 | |
78 | call_rcu(head: &f->base.rcu, func: lima_fence_release_rcu); |
79 | } |
80 | |
81 | static const struct dma_fence_ops lima_fence_ops = { |
82 | .get_driver_name = lima_fence_get_driver_name, |
83 | .get_timeline_name = lima_fence_get_timeline_name, |
84 | .release = lima_fence_release, |
85 | }; |
86 | |
87 | static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe) |
88 | { |
89 | struct lima_fence *fence; |
90 | |
91 | fence = kmem_cache_zalloc(k: lima_fence_slab, GFP_KERNEL); |
92 | if (!fence) |
93 | return NULL; |
94 | |
95 | fence->pipe = pipe; |
96 | dma_fence_init(fence: &fence->base, ops: &lima_fence_ops, lock: &pipe->fence_lock, |
97 | context: pipe->fence_context, seqno: ++pipe->fence_seqno); |
98 | |
99 | return fence; |
100 | } |
101 | |
102 | static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job) |
103 | { |
104 | return container_of(job, struct lima_sched_task, base); |
105 | } |
106 | |
107 | static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched) |
108 | { |
109 | return container_of(sched, struct lima_sched_pipe, base); |
110 | } |
111 | |
112 | int lima_sched_task_init(struct lima_sched_task *task, |
113 | struct lima_sched_context *context, |
114 | struct lima_bo **bos, int num_bos, |
115 | struct lima_vm *vm) |
116 | { |
117 | int err, i; |
118 | |
119 | task->bos = kmemdup(p: bos, size: sizeof(*bos) * num_bos, GFP_KERNEL); |
120 | if (!task->bos) |
121 | return -ENOMEM; |
122 | |
123 | for (i = 0; i < num_bos; i++) |
124 | drm_gem_object_get(obj: &bos[i]->base.base); |
125 | |
126 | err = drm_sched_job_init(job: &task->base, entity: &context->base, owner: vm); |
127 | if (err) { |
128 | kfree(objp: task->bos); |
129 | return err; |
130 | } |
131 | |
132 | drm_sched_job_arm(job: &task->base); |
133 | |
134 | task->num_bos = num_bos; |
135 | task->vm = lima_vm_get(vm); |
136 | |
137 | return 0; |
138 | } |
139 | |
140 | void lima_sched_task_fini(struct lima_sched_task *task) |
141 | { |
142 | int i; |
143 | |
144 | drm_sched_job_cleanup(job: &task->base); |
145 | |
146 | if (task->bos) { |
147 | for (i = 0; i < task->num_bos; i++) |
148 | drm_gem_object_put(obj: &task->bos[i]->base.base); |
149 | kfree(objp: task->bos); |
150 | } |
151 | |
152 | lima_vm_put(vm: task->vm); |
153 | } |
154 | |
155 | int lima_sched_context_init(struct lima_sched_pipe *pipe, |
156 | struct lima_sched_context *context, |
157 | atomic_t *guilty) |
158 | { |
159 | struct drm_gpu_scheduler *sched = &pipe->base; |
160 | |
161 | return drm_sched_entity_init(entity: &context->base, priority: DRM_SCHED_PRIORITY_NORMAL, |
162 | sched_list: &sched, num_sched_list: 1, guilty); |
163 | } |
164 | |
165 | void lima_sched_context_fini(struct lima_sched_pipe *pipe, |
166 | struct lima_sched_context *context) |
167 | { |
168 | drm_sched_entity_destroy(entity: &context->base); |
169 | } |
170 | |
171 | struct dma_fence *lima_sched_context_queue_task(struct lima_sched_task *task) |
172 | { |
173 | struct dma_fence *fence = dma_fence_get(fence: &task->base.s_fence->finished); |
174 | |
175 | trace_lima_task_submit(task); |
176 | drm_sched_entity_push_job(sched_job: &task->base); |
177 | return fence; |
178 | } |
179 | |
180 | static int lima_pm_busy(struct lima_device *ldev) |
181 | { |
182 | int ret; |
183 | |
184 | /* resume GPU if it has been suspended by runtime PM */ |
185 | ret = pm_runtime_resume_and_get(dev: ldev->dev); |
186 | if (ret < 0) |
187 | return ret; |
188 | |
189 | lima_devfreq_record_busy(devfreq: &ldev->devfreq); |
190 | return 0; |
191 | } |
192 | |
193 | static void lima_pm_idle(struct lima_device *ldev) |
194 | { |
195 | lima_devfreq_record_idle(devfreq: &ldev->devfreq); |
196 | |
197 | /* GPU can do auto runtime suspend */ |
198 | pm_runtime_mark_last_busy(dev: ldev->dev); |
199 | pm_runtime_put_autosuspend(dev: ldev->dev); |
200 | } |
201 | |
202 | static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job) |
203 | { |
204 | struct lima_sched_task *task = to_lima_task(job); |
205 | struct lima_sched_pipe *pipe = to_lima_pipe(sched: job->sched); |
206 | struct lima_device *ldev = pipe->ldev; |
207 | struct lima_fence *fence; |
208 | int i, err; |
209 | |
210 | /* after GPU reset */ |
211 | if (job->s_fence->finished.error < 0) |
212 | return NULL; |
213 | |
214 | fence = lima_fence_create(pipe); |
215 | if (!fence) |
216 | return NULL; |
217 | |
218 | err = lima_pm_busy(ldev); |
219 | if (err < 0) { |
220 | dma_fence_put(fence: &fence->base); |
221 | return NULL; |
222 | } |
223 | |
224 | task->fence = &fence->base; |
225 | |
226 | /* for caller usage of the fence, otherwise irq handler |
227 | * may consume the fence before caller use it |
228 | */ |
229 | dma_fence_get(fence: task->fence); |
230 | |
231 | pipe->current_task = task; |
232 | |
233 | /* this is needed for MMU to work correctly, otherwise GP/PP |
234 | * will hang or page fault for unknown reason after running for |
235 | * a while. |
236 | * |
237 | * Need to investigate: |
238 | * 1. is it related to TLB |
239 | * 2. how much performance will be affected by L2 cache flush |
240 | * 3. can we reduce the calling of this function because all |
241 | * GP/PP use the same L2 cache on mali400 |
242 | * |
243 | * TODO: |
244 | * 1. move this to task fini to save some wait time? |
245 | * 2. when GP/PP use different l2 cache, need PP wait GP l2 |
246 | * cache flush? |
247 | */ |
248 | for (i = 0; i < pipe->num_l2_cache; i++) |
249 | lima_l2_cache_flush(ip: pipe->l2_cache[i]); |
250 | |
251 | lima_vm_put(vm: pipe->current_vm); |
252 | pipe->current_vm = lima_vm_get(vm: task->vm); |
253 | |
254 | if (pipe->bcast_mmu) |
255 | lima_mmu_switch_vm(ip: pipe->bcast_mmu, vm: pipe->current_vm); |
256 | else { |
257 | for (i = 0; i < pipe->num_mmu; i++) |
258 | lima_mmu_switch_vm(ip: pipe->mmu[i], vm: pipe->current_vm); |
259 | } |
260 | |
261 | trace_lima_task_run(task); |
262 | |
263 | pipe->error = false; |
264 | pipe->task_run(pipe, task); |
265 | |
266 | return task->fence; |
267 | } |
268 | |
269 | static void lima_sched_build_error_task_list(struct lima_sched_task *task) |
270 | { |
271 | struct lima_sched_error_task *et; |
272 | struct lima_sched_pipe *pipe = to_lima_pipe(sched: task->base.sched); |
273 | struct lima_ip *ip = pipe->processor[0]; |
274 | int pipe_id = ip->id == lima_ip_gp ? lima_pipe_gp : lima_pipe_pp; |
275 | struct lima_device *dev = ip->dev; |
276 | struct lima_sched_context *sched_ctx = |
277 | container_of(task->base.entity, |
278 | struct lima_sched_context, base); |
279 | struct lima_ctx *ctx = |
280 | container_of(sched_ctx, struct lima_ctx, context[pipe_id]); |
281 | struct lima_dump_task *dt; |
282 | struct lima_dump_chunk *chunk; |
283 | struct lima_dump_chunk_pid *pid_chunk; |
284 | struct lima_dump_chunk_buffer *buffer_chunk; |
285 | u32 size, task_size, mem_size; |
286 | int i; |
287 | struct iosys_map map; |
288 | int ret; |
289 | |
290 | mutex_lock(&dev->error_task_list_lock); |
291 | |
292 | if (dev->dump.num_tasks >= lima_max_error_tasks) { |
293 | dev_info(dev->dev, "fail to save task state from %s pid %d: " |
294 | "error task list is full\n" , ctx->pname, ctx->pid); |
295 | goto out; |
296 | } |
297 | |
298 | /* frame chunk */ |
299 | size = sizeof(struct lima_dump_chunk) + pipe->frame_size; |
300 | /* process name chunk */ |
301 | size += sizeof(struct lima_dump_chunk) + sizeof(ctx->pname); |
302 | /* pid chunk */ |
303 | size += sizeof(struct lima_dump_chunk); |
304 | /* buffer chunks */ |
305 | for (i = 0; i < task->num_bos; i++) { |
306 | struct lima_bo *bo = task->bos[i]; |
307 | |
308 | size += sizeof(struct lima_dump_chunk); |
309 | size += bo->heap_size ? bo->heap_size : lima_bo_size(bo); |
310 | } |
311 | |
312 | task_size = size + sizeof(struct lima_dump_task); |
313 | mem_size = task_size + sizeof(*et); |
314 | et = kvmalloc(size: mem_size, GFP_KERNEL); |
315 | if (!et) { |
316 | dev_err(dev->dev, "fail to alloc task dump buffer of size %x\n" , |
317 | mem_size); |
318 | goto out; |
319 | } |
320 | |
321 | et->data = et + 1; |
322 | et->size = task_size; |
323 | |
324 | dt = et->data; |
325 | memset(dt, 0, sizeof(*dt)); |
326 | dt->id = pipe_id; |
327 | dt->size = size; |
328 | |
329 | chunk = (struct lima_dump_chunk *)(dt + 1); |
330 | memset(chunk, 0, sizeof(*chunk)); |
331 | chunk->id = LIMA_DUMP_CHUNK_FRAME; |
332 | chunk->size = pipe->frame_size; |
333 | memcpy(chunk + 1, task->frame, pipe->frame_size); |
334 | dt->num_chunks++; |
335 | |
336 | chunk = (void *)(chunk + 1) + chunk->size; |
337 | memset(chunk, 0, sizeof(*chunk)); |
338 | chunk->id = LIMA_DUMP_CHUNK_PROCESS_NAME; |
339 | chunk->size = sizeof(ctx->pname); |
340 | memcpy(chunk + 1, ctx->pname, sizeof(ctx->pname)); |
341 | dt->num_chunks++; |
342 | |
343 | pid_chunk = (void *)(chunk + 1) + chunk->size; |
344 | memset(pid_chunk, 0, sizeof(*pid_chunk)); |
345 | pid_chunk->id = LIMA_DUMP_CHUNK_PROCESS_ID; |
346 | pid_chunk->pid = ctx->pid; |
347 | dt->num_chunks++; |
348 | |
349 | buffer_chunk = (void *)(pid_chunk + 1) + pid_chunk->size; |
350 | for (i = 0; i < task->num_bos; i++) { |
351 | struct lima_bo *bo = task->bos[i]; |
352 | void *data; |
353 | |
354 | memset(buffer_chunk, 0, sizeof(*buffer_chunk)); |
355 | buffer_chunk->id = LIMA_DUMP_CHUNK_BUFFER; |
356 | buffer_chunk->va = lima_vm_get_va(vm: task->vm, bo); |
357 | |
358 | if (bo->heap_size) { |
359 | buffer_chunk->size = bo->heap_size; |
360 | |
361 | data = vmap(pages: bo->base.pages, count: bo->heap_size >> PAGE_SHIFT, |
362 | VM_MAP, pgprot_writecombine(PAGE_KERNEL)); |
363 | if (!data) { |
364 | kvfree(addr: et); |
365 | goto out; |
366 | } |
367 | |
368 | memcpy(buffer_chunk + 1, data, buffer_chunk->size); |
369 | |
370 | vunmap(addr: data); |
371 | } else { |
372 | buffer_chunk->size = lima_bo_size(bo); |
373 | |
374 | ret = drm_gem_vmap_unlocked(obj: &bo->base.base, map: &map); |
375 | if (ret) { |
376 | kvfree(addr: et); |
377 | goto out; |
378 | } |
379 | |
380 | memcpy(buffer_chunk + 1, map.vaddr, buffer_chunk->size); |
381 | |
382 | drm_gem_vunmap_unlocked(obj: &bo->base.base, map: &map); |
383 | } |
384 | |
385 | buffer_chunk = (void *)(buffer_chunk + 1) + buffer_chunk->size; |
386 | dt->num_chunks++; |
387 | } |
388 | |
389 | list_add(new: &et->list, head: &dev->error_task_list); |
390 | dev->dump.size += et->size; |
391 | dev->dump.num_tasks++; |
392 | |
393 | dev_info(dev->dev, "save error task state success\n" ); |
394 | |
395 | out: |
396 | mutex_unlock(lock: &dev->error_task_list_lock); |
397 | } |
398 | |
399 | static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job) |
400 | { |
401 | struct lima_sched_pipe *pipe = to_lima_pipe(sched: job->sched); |
402 | struct lima_sched_task *task = to_lima_task(job); |
403 | struct lima_device *ldev = pipe->ldev; |
404 | |
405 | if (!pipe->error) |
406 | DRM_ERROR("lima job timeout\n" ); |
407 | |
408 | drm_sched_stop(sched: &pipe->base, bad: &task->base); |
409 | |
410 | drm_sched_increase_karma(bad: &task->base); |
411 | |
412 | if (lima_max_error_tasks) |
413 | lima_sched_build_error_task_list(task); |
414 | |
415 | pipe->task_error(pipe); |
416 | |
417 | if (pipe->bcast_mmu) |
418 | lima_mmu_page_fault_resume(ip: pipe->bcast_mmu); |
419 | else { |
420 | int i; |
421 | |
422 | for (i = 0; i < pipe->num_mmu; i++) |
423 | lima_mmu_page_fault_resume(ip: pipe->mmu[i]); |
424 | } |
425 | |
426 | lima_vm_put(vm: pipe->current_vm); |
427 | pipe->current_vm = NULL; |
428 | pipe->current_task = NULL; |
429 | |
430 | lima_pm_idle(ldev); |
431 | |
432 | drm_sched_resubmit_jobs(sched: &pipe->base); |
433 | drm_sched_start(sched: &pipe->base, full_recovery: true); |
434 | |
435 | return DRM_GPU_SCHED_STAT_NOMINAL; |
436 | } |
437 | |
438 | static void lima_sched_free_job(struct drm_sched_job *job) |
439 | { |
440 | struct lima_sched_task *task = to_lima_task(job); |
441 | struct lima_sched_pipe *pipe = to_lima_pipe(sched: job->sched); |
442 | struct lima_vm *vm = task->vm; |
443 | struct lima_bo **bos = task->bos; |
444 | int i; |
445 | |
446 | dma_fence_put(fence: task->fence); |
447 | |
448 | for (i = 0; i < task->num_bos; i++) |
449 | lima_vm_bo_del(vm, bo: bos[i]); |
450 | |
451 | lima_sched_task_fini(task); |
452 | kmem_cache_free(s: pipe->task_slab, objp: task); |
453 | } |
454 | |
455 | static const struct drm_sched_backend_ops lima_sched_ops = { |
456 | .run_job = lima_sched_run_job, |
457 | .timedout_job = lima_sched_timedout_job, |
458 | .free_job = lima_sched_free_job, |
459 | }; |
460 | |
461 | static void lima_sched_recover_work(struct work_struct *work) |
462 | { |
463 | struct lima_sched_pipe *pipe = |
464 | container_of(work, struct lima_sched_pipe, recover_work); |
465 | int i; |
466 | |
467 | for (i = 0; i < pipe->num_l2_cache; i++) |
468 | lima_l2_cache_flush(ip: pipe->l2_cache[i]); |
469 | |
470 | if (pipe->bcast_mmu) { |
471 | lima_mmu_flush_tlb(ip: pipe->bcast_mmu); |
472 | } else { |
473 | for (i = 0; i < pipe->num_mmu; i++) |
474 | lima_mmu_flush_tlb(ip: pipe->mmu[i]); |
475 | } |
476 | |
477 | if (pipe->task_recover(pipe)) |
478 | drm_sched_fault(sched: &pipe->base); |
479 | } |
480 | |
481 | int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) |
482 | { |
483 | unsigned int timeout = lima_sched_timeout_ms > 0 ? |
484 | lima_sched_timeout_ms : 500; |
485 | |
486 | pipe->fence_context = dma_fence_context_alloc(num: 1); |
487 | spin_lock_init(&pipe->fence_lock); |
488 | |
489 | INIT_WORK(&pipe->recover_work, lima_sched_recover_work); |
490 | |
491 | return drm_sched_init(sched: &pipe->base, ops: &lima_sched_ops, |
492 | num_rqs: DRM_SCHED_PRIORITY_COUNT, |
493 | hw_submission: 1, |
494 | hang_limit: lima_job_hang_limit, |
495 | timeout: msecs_to_jiffies(m: timeout), NULL, |
496 | NULL, name, dev: pipe->ldev->dev); |
497 | } |
498 | |
499 | void lima_sched_pipe_fini(struct lima_sched_pipe *pipe) |
500 | { |
501 | drm_sched_fini(sched: &pipe->base); |
502 | } |
503 | |
504 | void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe) |
505 | { |
506 | struct lima_sched_task *task = pipe->current_task; |
507 | struct lima_device *ldev = pipe->ldev; |
508 | |
509 | if (pipe->error) { |
510 | if (task && task->recoverable) |
511 | schedule_work(work: &pipe->recover_work); |
512 | else |
513 | drm_sched_fault(sched: &pipe->base); |
514 | } else { |
515 | pipe->task_fini(pipe); |
516 | dma_fence_signal(fence: task->fence); |
517 | |
518 | lima_pm_idle(ldev); |
519 | } |
520 | } |
521 | |