1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2013 Red Hat |
4 | * Author: Rob Clark <robdclark@gmail.com> |
5 | */ |
6 | |
7 | #include "drm/drm_drv.h" |
8 | |
9 | #include "msm_gpu.h" |
10 | #include "msm_gem.h" |
11 | #include "msm_mmu.h" |
12 | #include "msm_fence.h" |
13 | #include "msm_gpu_trace.h" |
14 | #include "adreno/adreno_gpu.h" |
15 | |
16 | #include <generated/utsrelease.h> |
17 | #include <linux/string_helpers.h> |
18 | #include <linux/devcoredump.h> |
19 | #include <linux/sched/task.h> |
20 | |
21 | /* |
22 | * Power Management: |
23 | */ |
24 | |
25 | static int enable_pwrrail(struct msm_gpu *gpu) |
26 | { |
27 | struct drm_device *dev = gpu->dev; |
28 | int ret = 0; |
29 | |
30 | if (gpu->gpu_reg) { |
31 | ret = regulator_enable(regulator: gpu->gpu_reg); |
32 | if (ret) { |
33 | DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_reg': %d\n" , ret); |
34 | return ret; |
35 | } |
36 | } |
37 | |
38 | if (gpu->gpu_cx) { |
39 | ret = regulator_enable(regulator: gpu->gpu_cx); |
40 | if (ret) { |
41 | DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_cx': %d\n" , ret); |
42 | return ret; |
43 | } |
44 | } |
45 | |
46 | return 0; |
47 | } |
48 | |
49 | static int disable_pwrrail(struct msm_gpu *gpu) |
50 | { |
51 | if (gpu->gpu_cx) |
52 | regulator_disable(regulator: gpu->gpu_cx); |
53 | if (gpu->gpu_reg) |
54 | regulator_disable(regulator: gpu->gpu_reg); |
55 | return 0; |
56 | } |
57 | |
58 | static int enable_clk(struct msm_gpu *gpu) |
59 | { |
60 | if (gpu->core_clk && gpu->fast_rate) |
61 | dev_pm_opp_set_rate(dev: &gpu->pdev->dev, target_freq: gpu->fast_rate); |
62 | |
63 | /* Set the RBBM timer rate to 19.2Mhz */ |
64 | if (gpu->rbbmtimer_clk) |
65 | clk_set_rate(clk: gpu->rbbmtimer_clk, rate: 19200000); |
66 | |
67 | return clk_bulk_prepare_enable(num_clks: gpu->nr_clocks, clks: gpu->grp_clks); |
68 | } |
69 | |
70 | static int disable_clk(struct msm_gpu *gpu) |
71 | { |
72 | clk_bulk_disable_unprepare(num_clks: gpu->nr_clocks, clks: gpu->grp_clks); |
73 | |
74 | /* |
75 | * Set the clock to a deliberately low rate. On older targets the clock |
76 | * speed had to be non zero to avoid problems. On newer targets this |
77 | * will be rounded down to zero anyway so it all works out. |
78 | */ |
79 | if (gpu->core_clk) |
80 | dev_pm_opp_set_rate(dev: &gpu->pdev->dev, target_freq: 27000000); |
81 | |
82 | if (gpu->rbbmtimer_clk) |
83 | clk_set_rate(clk: gpu->rbbmtimer_clk, rate: 0); |
84 | |
85 | return 0; |
86 | } |
87 | |
88 | static int enable_axi(struct msm_gpu *gpu) |
89 | { |
90 | return clk_prepare_enable(clk: gpu->ebi1_clk); |
91 | } |
92 | |
93 | static int disable_axi(struct msm_gpu *gpu) |
94 | { |
95 | clk_disable_unprepare(clk: gpu->ebi1_clk); |
96 | return 0; |
97 | } |
98 | |
99 | int msm_gpu_pm_resume(struct msm_gpu *gpu) |
100 | { |
101 | int ret; |
102 | |
103 | DBG("%s" , gpu->name); |
104 | trace_msm_gpu_resume(dummy: 0); |
105 | |
106 | ret = enable_pwrrail(gpu); |
107 | if (ret) |
108 | return ret; |
109 | |
110 | ret = enable_clk(gpu); |
111 | if (ret) |
112 | return ret; |
113 | |
114 | ret = enable_axi(gpu); |
115 | if (ret) |
116 | return ret; |
117 | |
118 | msm_devfreq_resume(gpu); |
119 | |
120 | gpu->needs_hw_init = true; |
121 | |
122 | return 0; |
123 | } |
124 | |
125 | int msm_gpu_pm_suspend(struct msm_gpu *gpu) |
126 | { |
127 | int ret; |
128 | |
129 | DBG("%s" , gpu->name); |
130 | trace_msm_gpu_suspend(dummy: 0); |
131 | |
132 | msm_devfreq_suspend(gpu); |
133 | |
134 | ret = disable_axi(gpu); |
135 | if (ret) |
136 | return ret; |
137 | |
138 | ret = disable_clk(gpu); |
139 | if (ret) |
140 | return ret; |
141 | |
142 | ret = disable_pwrrail(gpu); |
143 | if (ret) |
144 | return ret; |
145 | |
146 | gpu->suspend_count++; |
147 | |
148 | return 0; |
149 | } |
150 | |
151 | void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx, |
152 | struct drm_printer *p) |
153 | { |
154 | drm_printf(p, f: "drm-engine-gpu:\t%llu ns\n" , ctx->elapsed_ns); |
155 | drm_printf(p, f: "drm-cycles-gpu:\t%llu\n" , ctx->cycles); |
156 | drm_printf(p, f: "drm-maxfreq-gpu:\t%u Hz\n" , gpu->fast_rate); |
157 | } |
158 | |
159 | int msm_gpu_hw_init(struct msm_gpu *gpu) |
160 | { |
161 | int ret; |
162 | |
163 | WARN_ON(!mutex_is_locked(&gpu->lock)); |
164 | |
165 | if (!gpu->needs_hw_init) |
166 | return 0; |
167 | |
168 | disable_irq(irq: gpu->irq); |
169 | ret = gpu->funcs->hw_init(gpu); |
170 | if (!ret) |
171 | gpu->needs_hw_init = false; |
172 | enable_irq(irq: gpu->irq); |
173 | |
174 | return ret; |
175 | } |
176 | |
177 | #ifdef CONFIG_DEV_COREDUMP |
178 | static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset, |
179 | size_t count, void *data, size_t datalen) |
180 | { |
181 | struct msm_gpu *gpu = data; |
182 | struct drm_print_iterator iter; |
183 | struct drm_printer p; |
184 | struct msm_gpu_state *state; |
185 | |
186 | state = msm_gpu_crashstate_get(gpu); |
187 | if (!state) |
188 | return 0; |
189 | |
190 | iter.data = buffer; |
191 | iter.offset = 0; |
192 | iter.start = offset; |
193 | iter.remain = count; |
194 | |
195 | p = drm_coredump_printer(iter: &iter); |
196 | |
197 | drm_printf(p: &p, f: "---\n" ); |
198 | drm_printf(p: &p, f: "kernel: " UTS_RELEASE "\n" ); |
199 | drm_printf(p: &p, f: "module: " KBUILD_MODNAME "\n" ); |
200 | drm_printf(p: &p, f: "time: %lld.%09ld\n" , |
201 | state->time.tv_sec, state->time.tv_nsec); |
202 | if (state->comm) |
203 | drm_printf(p: &p, f: "comm: %s\n" , state->comm); |
204 | if (state->cmd) |
205 | drm_printf(p: &p, f: "cmdline: %s\n" , state->cmd); |
206 | |
207 | gpu->funcs->show(gpu, state, &p); |
208 | |
209 | msm_gpu_crashstate_put(gpu); |
210 | |
211 | return count - iter.remain; |
212 | } |
213 | |
214 | static void msm_gpu_devcoredump_free(void *data) |
215 | { |
216 | struct msm_gpu *gpu = data; |
217 | |
218 | msm_gpu_crashstate_put(gpu); |
219 | } |
220 | |
221 | static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state, |
222 | struct drm_gem_object *obj, u64 iova, bool full) |
223 | { |
224 | struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos]; |
225 | |
226 | /* Don't record write only objects */ |
227 | state_bo->size = obj->size; |
228 | state_bo->iova = iova; |
229 | |
230 | BUILD_BUG_ON(sizeof(state_bo->name) != sizeof(to_msm_bo(obj)->name)); |
231 | |
232 | memcpy(state_bo->name, to_msm_bo(obj)->name, sizeof(state_bo->name)); |
233 | |
234 | if (full) { |
235 | void *ptr; |
236 | |
237 | state_bo->data = kvmalloc(size: obj->size, GFP_KERNEL); |
238 | if (!state_bo->data) |
239 | goto out; |
240 | |
241 | msm_gem_lock(obj); |
242 | ptr = msm_gem_get_vaddr_active(obj); |
243 | msm_gem_unlock(obj); |
244 | if (IS_ERR(ptr)) { |
245 | kvfree(addr: state_bo->data); |
246 | state_bo->data = NULL; |
247 | goto out; |
248 | } |
249 | |
250 | memcpy(state_bo->data, ptr, obj->size); |
251 | msm_gem_put_vaddr(obj); |
252 | } |
253 | out: |
254 | state->nr_bos++; |
255 | } |
256 | |
257 | static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, |
258 | struct msm_gem_submit *submit, char *comm, char *cmd) |
259 | { |
260 | struct msm_gpu_state *state; |
261 | |
262 | /* Check if the target supports capturing crash state */ |
263 | if (!gpu->funcs->gpu_state_get) |
264 | return; |
265 | |
266 | /* Only save one crash state at a time */ |
267 | if (gpu->crashstate) |
268 | return; |
269 | |
270 | state = gpu->funcs->gpu_state_get(gpu); |
271 | if (IS_ERR_OR_NULL(ptr: state)) |
272 | return; |
273 | |
274 | /* Fill in the additional crash state information */ |
275 | state->comm = kstrdup(s: comm, GFP_KERNEL); |
276 | state->cmd = kstrdup(s: cmd, GFP_KERNEL); |
277 | state->fault_info = gpu->fault_info; |
278 | |
279 | if (submit) { |
280 | int i; |
281 | |
282 | state->bos = kcalloc(n: submit->nr_bos, |
283 | size: sizeof(struct msm_gpu_state_bo), GFP_KERNEL); |
284 | |
285 | for (i = 0; state->bos && i < submit->nr_bos; i++) { |
286 | msm_gpu_crashstate_get_bo(state, obj: submit->bos[i].obj, |
287 | iova: submit->bos[i].iova, |
288 | full: should_dump(submit, idx: i)); |
289 | } |
290 | } |
291 | |
292 | /* Set the active crash state to be dumped on failure */ |
293 | gpu->crashstate = state; |
294 | |
295 | dev_coredumpm(dev: &gpu->pdev->dev, THIS_MODULE, data: gpu, datalen: 0, GFP_KERNEL, |
296 | read: msm_gpu_devcoredump_read, free: msm_gpu_devcoredump_free); |
297 | } |
298 | #else |
299 | static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, |
300 | struct msm_gem_submit *submit, char *comm, char *cmd) |
301 | { |
302 | } |
303 | #endif |
304 | |
305 | /* |
306 | * Hangcheck detection for locked gpu: |
307 | */ |
308 | |
309 | static struct msm_gem_submit * |
310 | find_submit(struct msm_ringbuffer *ring, uint32_t fence) |
311 | { |
312 | struct msm_gem_submit *submit; |
313 | unsigned long flags; |
314 | |
315 | spin_lock_irqsave(&ring->submit_lock, flags); |
316 | list_for_each_entry(submit, &ring->submits, node) { |
317 | if (submit->seqno == fence) { |
318 | spin_unlock_irqrestore(lock: &ring->submit_lock, flags); |
319 | return submit; |
320 | } |
321 | } |
322 | spin_unlock_irqrestore(lock: &ring->submit_lock, flags); |
323 | |
324 | return NULL; |
325 | } |
326 | |
327 | static void retire_submits(struct msm_gpu *gpu); |
328 | |
329 | static void get_comm_cmdline(struct msm_gem_submit *submit, char **comm, char **cmd) |
330 | { |
331 | struct msm_file_private *ctx = submit->queue->ctx; |
332 | struct task_struct *task; |
333 | |
334 | WARN_ON(!mutex_is_locked(&submit->gpu->lock)); |
335 | |
336 | /* Note that kstrdup will return NULL if argument is NULL: */ |
337 | *comm = kstrdup(s: ctx->comm, GFP_KERNEL); |
338 | *cmd = kstrdup(s: ctx->cmdline, GFP_KERNEL); |
339 | |
340 | task = get_pid_task(pid: submit->pid, PIDTYPE_PID); |
341 | if (!task) |
342 | return; |
343 | |
344 | if (!*comm) |
345 | *comm = kstrdup(s: task->comm, GFP_KERNEL); |
346 | |
347 | if (!*cmd) |
348 | *cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); |
349 | |
350 | put_task_struct(t: task); |
351 | } |
352 | |
353 | static void recover_worker(struct kthread_work *work) |
354 | { |
355 | struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); |
356 | struct drm_device *dev = gpu->dev; |
357 | struct msm_drm_private *priv = dev->dev_private; |
358 | struct msm_gem_submit *submit; |
359 | struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); |
360 | char *comm = NULL, *cmd = NULL; |
361 | int i; |
362 | |
363 | mutex_lock(&gpu->lock); |
364 | |
365 | DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n" , gpu->name); |
366 | |
367 | submit = find_submit(ring: cur_ring, fence: cur_ring->memptrs->fence + 1); |
368 | |
369 | /* |
370 | * If the submit retired while we were waiting for the worker to run, |
371 | * or waiting to acquire the gpu lock, then nothing more to do. |
372 | */ |
373 | if (!submit) |
374 | goto out_unlock; |
375 | |
376 | /* Increment the fault counts */ |
377 | submit->queue->faults++; |
378 | if (submit->aspace) |
379 | submit->aspace->faults++; |
380 | |
381 | get_comm_cmdline(submit, comm: &comm, cmd: &cmd); |
382 | |
383 | if (comm && cmd) { |
384 | DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n" , |
385 | gpu->name, comm, cmd); |
386 | |
387 | msm_rd_dump_submit(rd: priv->hangrd, submit, |
388 | fmt: "offending task: %s (%s)" , comm, cmd); |
389 | } else { |
390 | DRM_DEV_ERROR(dev->dev, "%s: offending task: unknown\n" , gpu->name); |
391 | |
392 | msm_rd_dump_submit(rd: priv->hangrd, submit, NULL); |
393 | } |
394 | |
395 | /* Record the crash state */ |
396 | pm_runtime_get_sync(dev: &gpu->pdev->dev); |
397 | msm_gpu_crashstate_capture(gpu, submit, comm, cmd); |
398 | |
399 | kfree(objp: cmd); |
400 | kfree(objp: comm); |
401 | |
402 | /* |
403 | * Update all the rings with the latest and greatest fence.. this |
404 | * needs to happen after msm_rd_dump_submit() to ensure that the |
405 | * bo's referenced by the offending submit are still around. |
406 | */ |
407 | for (i = 0; i < gpu->nr_rings; i++) { |
408 | struct msm_ringbuffer *ring = gpu->rb[i]; |
409 | |
410 | uint32_t fence = ring->memptrs->fence; |
411 | |
412 | /* |
413 | * For the current (faulting?) ring/submit advance the fence by |
414 | * one more to clear the faulting submit |
415 | */ |
416 | if (ring == cur_ring) |
417 | ring->memptrs->fence = ++fence; |
418 | |
419 | msm_update_fence(fctx: ring->fctx, fence); |
420 | } |
421 | |
422 | if (msm_gpu_active(gpu)) { |
423 | /* retire completed submits, plus the one that hung: */ |
424 | retire_submits(gpu); |
425 | |
426 | gpu->funcs->recover(gpu); |
427 | |
428 | /* |
429 | * Replay all remaining submits starting with highest priority |
430 | * ring |
431 | */ |
432 | for (i = 0; i < gpu->nr_rings; i++) { |
433 | struct msm_ringbuffer *ring = gpu->rb[i]; |
434 | unsigned long flags; |
435 | |
436 | spin_lock_irqsave(&ring->submit_lock, flags); |
437 | list_for_each_entry(submit, &ring->submits, node) |
438 | gpu->funcs->submit(gpu, submit); |
439 | spin_unlock_irqrestore(lock: &ring->submit_lock, flags); |
440 | } |
441 | } |
442 | |
443 | pm_runtime_put(dev: &gpu->pdev->dev); |
444 | |
445 | out_unlock: |
446 | mutex_unlock(lock: &gpu->lock); |
447 | |
448 | msm_gpu_retire(gpu); |
449 | } |
450 | |
451 | static void fault_worker(struct kthread_work *work) |
452 | { |
453 | struct msm_gpu *gpu = container_of(work, struct msm_gpu, fault_work); |
454 | struct msm_gem_submit *submit; |
455 | struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); |
456 | char *comm = NULL, *cmd = NULL; |
457 | |
458 | mutex_lock(&gpu->lock); |
459 | |
460 | submit = find_submit(ring: cur_ring, fence: cur_ring->memptrs->fence + 1); |
461 | if (submit && submit->fault_dumped) |
462 | goto resume_smmu; |
463 | |
464 | if (submit) { |
465 | get_comm_cmdline(submit, comm: &comm, cmd: &cmd); |
466 | |
467 | /* |
468 | * When we get GPU iova faults, we can get 1000s of them, |
469 | * but we really only want to log the first one. |
470 | */ |
471 | submit->fault_dumped = true; |
472 | } |
473 | |
474 | /* Record the crash state */ |
475 | pm_runtime_get_sync(dev: &gpu->pdev->dev); |
476 | msm_gpu_crashstate_capture(gpu, submit, comm, cmd); |
477 | pm_runtime_put_sync(dev: &gpu->pdev->dev); |
478 | |
479 | kfree(objp: cmd); |
480 | kfree(objp: comm); |
481 | |
482 | resume_smmu: |
483 | memset(&gpu->fault_info, 0, sizeof(gpu->fault_info)); |
484 | gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); |
485 | |
486 | mutex_unlock(lock: &gpu->lock); |
487 | } |
488 | |
489 | static void hangcheck_timer_reset(struct msm_gpu *gpu) |
490 | { |
491 | struct msm_drm_private *priv = gpu->dev->dev_private; |
492 | mod_timer(timer: &gpu->hangcheck_timer, |
493 | expires: round_jiffies_up(j: jiffies + msecs_to_jiffies(m: priv->hangcheck_period))); |
494 | } |
495 | |
496 | static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) |
497 | { |
498 | if (ring->hangcheck_progress_retries >= DRM_MSM_HANGCHECK_PROGRESS_RETRIES) |
499 | return false; |
500 | |
501 | if (!gpu->funcs->progress) |
502 | return false; |
503 | |
504 | if (!gpu->funcs->progress(gpu, ring)) |
505 | return false; |
506 | |
507 | ring->hangcheck_progress_retries++; |
508 | return true; |
509 | } |
510 | |
511 | static void hangcheck_handler(struct timer_list *t) |
512 | { |
513 | struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer); |
514 | struct drm_device *dev = gpu->dev; |
515 | struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); |
516 | uint32_t fence = ring->memptrs->fence; |
517 | |
518 | if (fence != ring->hangcheck_fence) { |
519 | /* some progress has been made.. ya! */ |
520 | ring->hangcheck_fence = fence; |
521 | ring->hangcheck_progress_retries = 0; |
522 | } else if (fence_before(a: fence, b: ring->fctx->last_fence) && |
523 | !made_progress(gpu, ring)) { |
524 | /* no progress and not done.. hung! */ |
525 | ring->hangcheck_fence = fence; |
526 | ring->hangcheck_progress_retries = 0; |
527 | DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n" , |
528 | gpu->name, ring->id); |
529 | DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n" , |
530 | gpu->name, fence); |
531 | DRM_DEV_ERROR(dev->dev, "%s: submitted fence: %u\n" , |
532 | gpu->name, ring->fctx->last_fence); |
533 | |
534 | kthread_queue_work(worker: gpu->worker, work: &gpu->recover_work); |
535 | } |
536 | |
537 | /* if still more pending work, reset the hangcheck timer: */ |
538 | if (fence_after(a: ring->fctx->last_fence, b: ring->hangcheck_fence)) |
539 | hangcheck_timer_reset(gpu); |
540 | |
541 | /* workaround for missing irq: */ |
542 | msm_gpu_retire(gpu); |
543 | } |
544 | |
545 | /* |
546 | * Performance Counters: |
547 | */ |
548 | |
549 | /* called under perf_lock */ |
550 | static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) |
551 | { |
552 | uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; |
553 | int i, n = min(ncntrs, gpu->num_perfcntrs); |
554 | |
555 | /* read current values: */ |
556 | for (i = 0; i < gpu->num_perfcntrs; i++) |
557 | current_cntrs[i] = gpu_read(gpu, reg: gpu->perfcntrs[i].sample_reg); |
558 | |
559 | /* update cntrs: */ |
560 | for (i = 0; i < n; i++) |
561 | cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; |
562 | |
563 | /* save current values: */ |
564 | for (i = 0; i < gpu->num_perfcntrs; i++) |
565 | gpu->last_cntrs[i] = current_cntrs[i]; |
566 | |
567 | return n; |
568 | } |
569 | |
570 | static void update_sw_cntrs(struct msm_gpu *gpu) |
571 | { |
572 | ktime_t time; |
573 | uint32_t elapsed; |
574 | unsigned long flags; |
575 | |
576 | spin_lock_irqsave(&gpu->perf_lock, flags); |
577 | if (!gpu->perfcntr_active) |
578 | goto out; |
579 | |
580 | time = ktime_get(); |
581 | elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); |
582 | |
583 | gpu->totaltime += elapsed; |
584 | if (gpu->last_sample.active) |
585 | gpu->activetime += elapsed; |
586 | |
587 | gpu->last_sample.active = msm_gpu_active(gpu); |
588 | gpu->last_sample.time = time; |
589 | |
590 | out: |
591 | spin_unlock_irqrestore(lock: &gpu->perf_lock, flags); |
592 | } |
593 | |
594 | void msm_gpu_perfcntr_start(struct msm_gpu *gpu) |
595 | { |
596 | unsigned long flags; |
597 | |
598 | pm_runtime_get_sync(dev: &gpu->pdev->dev); |
599 | |
600 | spin_lock_irqsave(&gpu->perf_lock, flags); |
601 | /* we could dynamically enable/disable perfcntr registers too.. */ |
602 | gpu->last_sample.active = msm_gpu_active(gpu); |
603 | gpu->last_sample.time = ktime_get(); |
604 | gpu->activetime = gpu->totaltime = 0; |
605 | gpu->perfcntr_active = true; |
606 | update_hw_cntrs(gpu, ncntrs: 0, NULL); |
607 | spin_unlock_irqrestore(lock: &gpu->perf_lock, flags); |
608 | } |
609 | |
610 | void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) |
611 | { |
612 | gpu->perfcntr_active = false; |
613 | pm_runtime_put_sync(dev: &gpu->pdev->dev); |
614 | } |
615 | |
616 | /* returns -errno or # of cntrs sampled */ |
617 | int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, |
618 | uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) |
619 | { |
620 | unsigned long flags; |
621 | int ret; |
622 | |
623 | spin_lock_irqsave(&gpu->perf_lock, flags); |
624 | |
625 | if (!gpu->perfcntr_active) { |
626 | ret = -EINVAL; |
627 | goto out; |
628 | } |
629 | |
630 | *activetime = gpu->activetime; |
631 | *totaltime = gpu->totaltime; |
632 | |
633 | gpu->activetime = gpu->totaltime = 0; |
634 | |
635 | ret = update_hw_cntrs(gpu, ncntrs, cntrs); |
636 | |
637 | out: |
638 | spin_unlock_irqrestore(lock: &gpu->perf_lock, flags); |
639 | |
640 | return ret; |
641 | } |
642 | |
643 | /* |
644 | * Cmdstream submission/retirement: |
645 | */ |
646 | |
647 | static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, |
648 | struct msm_gem_submit *submit) |
649 | { |
650 | int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; |
651 | volatile struct msm_gpu_submit_stats *stats; |
652 | u64 elapsed, clock = 0, cycles; |
653 | unsigned long flags; |
654 | |
655 | stats = &ring->memptrs->stats[index]; |
656 | /* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */ |
657 | elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000; |
658 | do_div(elapsed, 192); |
659 | |
660 | cycles = stats->cpcycles_end - stats->cpcycles_start; |
661 | |
662 | /* Calculate the clock frequency from the number of CP cycles */ |
663 | if (elapsed) { |
664 | clock = cycles * 1000; |
665 | do_div(clock, elapsed); |
666 | } |
667 | |
668 | submit->queue->ctx->elapsed_ns += elapsed; |
669 | submit->queue->ctx->cycles += cycles; |
670 | |
671 | trace_msm_gpu_submit_retired(submit, elapsed, clock, |
672 | start: stats->alwayson_start, end: stats->alwayson_end); |
673 | |
674 | msm_submit_retire(submit); |
675 | |
676 | pm_runtime_mark_last_busy(dev: &gpu->pdev->dev); |
677 | |
678 | spin_lock_irqsave(&ring->submit_lock, flags); |
679 | list_del(entry: &submit->node); |
680 | spin_unlock_irqrestore(lock: &ring->submit_lock, flags); |
681 | |
682 | /* Update devfreq on transition from active->idle: */ |
683 | mutex_lock(&gpu->active_lock); |
684 | gpu->active_submits--; |
685 | WARN_ON(gpu->active_submits < 0); |
686 | if (!gpu->active_submits) { |
687 | msm_devfreq_idle(gpu); |
688 | pm_runtime_put_autosuspend(dev: &gpu->pdev->dev); |
689 | } |
690 | |
691 | mutex_unlock(lock: &gpu->active_lock); |
692 | |
693 | msm_gem_submit_put(submit); |
694 | } |
695 | |
696 | static void retire_submits(struct msm_gpu *gpu) |
697 | { |
698 | int i; |
699 | |
700 | /* Retire the commits starting with highest priority */ |
701 | for (i = 0; i < gpu->nr_rings; i++) { |
702 | struct msm_ringbuffer *ring = gpu->rb[i]; |
703 | |
704 | while (true) { |
705 | struct msm_gem_submit *submit = NULL; |
706 | unsigned long flags; |
707 | |
708 | spin_lock_irqsave(&ring->submit_lock, flags); |
709 | submit = list_first_entry_or_null(&ring->submits, |
710 | struct msm_gem_submit, node); |
711 | spin_unlock_irqrestore(lock: &ring->submit_lock, flags); |
712 | |
713 | /* |
714 | * If no submit, we are done. If submit->fence hasn't |
715 | * been signalled, then later submits are not signalled |
716 | * either, so we are also done. |
717 | */ |
718 | if (submit && dma_fence_is_signaled(fence: submit->hw_fence)) { |
719 | retire_submit(gpu, ring, submit); |
720 | } else { |
721 | break; |
722 | } |
723 | } |
724 | } |
725 | |
726 | wake_up_all(&gpu->retire_event); |
727 | } |
728 | |
729 | static void retire_worker(struct kthread_work *work) |
730 | { |
731 | struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); |
732 | |
733 | retire_submits(gpu); |
734 | } |
735 | |
736 | /* call from irq handler to schedule work to retire bo's */ |
737 | void msm_gpu_retire(struct msm_gpu *gpu) |
738 | { |
739 | int i; |
740 | |
741 | for (i = 0; i < gpu->nr_rings; i++) |
742 | msm_update_fence(fctx: gpu->rb[i]->fctx, fence: gpu->rb[i]->memptrs->fence); |
743 | |
744 | kthread_queue_work(worker: gpu->worker, work: &gpu->retire_work); |
745 | update_sw_cntrs(gpu); |
746 | } |
747 | |
748 | /* add bo's to gpu's ring, and kick gpu: */ |
749 | void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
750 | { |
751 | struct msm_ringbuffer *ring = submit->ring; |
752 | unsigned long flags; |
753 | |
754 | WARN_ON(!mutex_is_locked(&gpu->lock)); |
755 | |
756 | pm_runtime_get_sync(dev: &gpu->pdev->dev); |
757 | |
758 | msm_gpu_hw_init(gpu); |
759 | |
760 | submit->seqno = submit->hw_fence->seqno; |
761 | |
762 | update_sw_cntrs(gpu); |
763 | |
764 | /* |
765 | * ring->submits holds a ref to the submit, to deal with the case |
766 | * that a submit completes before msm_ioctl_gem_submit() returns. |
767 | */ |
768 | msm_gem_submit_get(submit); |
769 | |
770 | spin_lock_irqsave(&ring->submit_lock, flags); |
771 | list_add_tail(new: &submit->node, head: &ring->submits); |
772 | spin_unlock_irqrestore(lock: &ring->submit_lock, flags); |
773 | |
774 | /* Update devfreq on transition from idle->active: */ |
775 | mutex_lock(&gpu->active_lock); |
776 | if (!gpu->active_submits) { |
777 | pm_runtime_get(dev: &gpu->pdev->dev); |
778 | msm_devfreq_active(gpu); |
779 | } |
780 | gpu->active_submits++; |
781 | mutex_unlock(lock: &gpu->active_lock); |
782 | |
783 | gpu->funcs->submit(gpu, submit); |
784 | gpu->cur_ctx_seqno = submit->queue->ctx->seqno; |
785 | |
786 | pm_runtime_put(dev: &gpu->pdev->dev); |
787 | hangcheck_timer_reset(gpu); |
788 | } |
789 | |
790 | /* |
791 | * Init/Cleanup: |
792 | */ |
793 | |
794 | static irqreturn_t irq_handler(int irq, void *data) |
795 | { |
796 | struct msm_gpu *gpu = data; |
797 | return gpu->funcs->irq(gpu); |
798 | } |
799 | |
800 | static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) |
801 | { |
802 | int ret = devm_clk_bulk_get_all(dev: &pdev->dev, clks: &gpu->grp_clks); |
803 | |
804 | if (ret < 1) { |
805 | gpu->nr_clocks = 0; |
806 | return ret; |
807 | } |
808 | |
809 | gpu->nr_clocks = ret; |
810 | |
811 | gpu->core_clk = msm_clk_bulk_get_clock(bulk: gpu->grp_clks, |
812 | count: gpu->nr_clocks, name: "core" ); |
813 | |
814 | gpu->rbbmtimer_clk = msm_clk_bulk_get_clock(bulk: gpu->grp_clks, |
815 | count: gpu->nr_clocks, name: "rbbmtimer" ); |
816 | |
817 | return 0; |
818 | } |
819 | |
820 | /* Return a new address space for a msm_drm_private instance */ |
821 | struct msm_gem_address_space * |
822 | msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task) |
823 | { |
824 | struct msm_gem_address_space *aspace = NULL; |
825 | if (!gpu) |
826 | return NULL; |
827 | |
828 | /* |
829 | * If the target doesn't support private address spaces then return |
830 | * the global one |
831 | */ |
832 | if (gpu->funcs->create_private_address_space) { |
833 | aspace = gpu->funcs->create_private_address_space(gpu); |
834 | if (!IS_ERR(ptr: aspace)) |
835 | aspace->pid = get_pid(pid: task_pid(task)); |
836 | } |
837 | |
838 | if (IS_ERR_OR_NULL(ptr: aspace)) |
839 | aspace = msm_gem_address_space_get(aspace: gpu->aspace); |
840 | |
841 | return aspace; |
842 | } |
843 | |
844 | int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, |
845 | struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, |
846 | const char *name, struct msm_gpu_config *config) |
847 | { |
848 | struct msm_drm_private *priv = drm->dev_private; |
849 | int i, ret, nr_rings = config->nr_rings; |
850 | void *memptrs; |
851 | uint64_t memptrs_iova; |
852 | |
853 | if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) |
854 | gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); |
855 | |
856 | gpu->dev = drm; |
857 | gpu->funcs = funcs; |
858 | gpu->name = name; |
859 | |
860 | gpu->worker = kthread_create_worker(flags: 0, namefmt: "gpu-worker" ); |
861 | if (IS_ERR(ptr: gpu->worker)) { |
862 | ret = PTR_ERR(ptr: gpu->worker); |
863 | gpu->worker = NULL; |
864 | goto fail; |
865 | } |
866 | |
867 | sched_set_fifo_low(p: gpu->worker->task); |
868 | |
869 | mutex_init(&gpu->active_lock); |
870 | mutex_init(&gpu->lock); |
871 | init_waitqueue_head(&gpu->retire_event); |
872 | kthread_init_work(&gpu->retire_work, retire_worker); |
873 | kthread_init_work(&gpu->recover_work, recover_worker); |
874 | kthread_init_work(&gpu->fault_work, fault_worker); |
875 | |
876 | priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; |
877 | |
878 | /* |
879 | * If progress detection is supported, halve the hangcheck timer |
880 | * duration, as it takes two iterations of the hangcheck handler |
881 | * to detect a hang. |
882 | */ |
883 | if (funcs->progress) |
884 | priv->hangcheck_period /= 2; |
885 | |
886 | timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0); |
887 | |
888 | spin_lock_init(&gpu->perf_lock); |
889 | |
890 | |
891 | /* Map registers: */ |
892 | gpu->mmio = msm_ioremap(pdev, name: config->ioname); |
893 | if (IS_ERR(ptr: gpu->mmio)) { |
894 | ret = PTR_ERR(ptr: gpu->mmio); |
895 | goto fail; |
896 | } |
897 | |
898 | /* Get Interrupt: */ |
899 | gpu->irq = platform_get_irq(pdev, 0); |
900 | if (gpu->irq < 0) { |
901 | ret = gpu->irq; |
902 | goto fail; |
903 | } |
904 | |
905 | ret = devm_request_irq(dev: &pdev->dev, irq: gpu->irq, handler: irq_handler, |
906 | IRQF_TRIGGER_HIGH, devname: "gpu-irq" , dev_id: gpu); |
907 | if (ret) { |
908 | DRM_DEV_ERROR(drm->dev, "failed to request IRQ%u: %d\n" , gpu->irq, ret); |
909 | goto fail; |
910 | } |
911 | |
912 | ret = get_clocks(pdev, gpu); |
913 | if (ret) |
914 | goto fail; |
915 | |
916 | gpu->ebi1_clk = msm_clk_get(pdev, name: "bus" ); |
917 | DBG("ebi1_clk: %p" , gpu->ebi1_clk); |
918 | if (IS_ERR(ptr: gpu->ebi1_clk)) |
919 | gpu->ebi1_clk = NULL; |
920 | |
921 | /* Acquire regulators: */ |
922 | gpu->gpu_reg = devm_regulator_get(dev: &pdev->dev, id: "vdd" ); |
923 | DBG("gpu_reg: %p" , gpu->gpu_reg); |
924 | if (IS_ERR(ptr: gpu->gpu_reg)) |
925 | gpu->gpu_reg = NULL; |
926 | |
927 | gpu->gpu_cx = devm_regulator_get(dev: &pdev->dev, id: "vddcx" ); |
928 | DBG("gpu_cx: %p" , gpu->gpu_cx); |
929 | if (IS_ERR(ptr: gpu->gpu_cx)) |
930 | gpu->gpu_cx = NULL; |
931 | |
932 | gpu->pdev = pdev; |
933 | platform_set_drvdata(pdev, data: &gpu->adreno_smmu); |
934 | |
935 | msm_devfreq_init(gpu); |
936 | |
937 | |
938 | gpu->aspace = gpu->funcs->create_address_space(gpu, pdev); |
939 | |
940 | if (gpu->aspace == NULL) |
941 | DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n" , name); |
942 | else if (IS_ERR(ptr: gpu->aspace)) { |
943 | ret = PTR_ERR(ptr: gpu->aspace); |
944 | goto fail; |
945 | } |
946 | |
947 | memptrs = msm_gem_kernel_new(dev: drm, |
948 | size: sizeof(struct msm_rbmemptrs) * nr_rings, |
949 | check_apriv(gpu, MSM_BO_WC), aspace: gpu->aspace, bo: &gpu->memptrs_bo, |
950 | iova: &memptrs_iova); |
951 | |
952 | if (IS_ERR(ptr: memptrs)) { |
953 | ret = PTR_ERR(ptr: memptrs); |
954 | DRM_DEV_ERROR(drm->dev, "could not allocate memptrs: %d\n" , ret); |
955 | goto fail; |
956 | } |
957 | |
958 | msm_gem_object_set_name(bo: gpu->memptrs_bo, fmt: "memptrs" ); |
959 | |
960 | if (nr_rings > ARRAY_SIZE(gpu->rb)) { |
961 | DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n" , |
962 | ARRAY_SIZE(gpu->rb)); |
963 | nr_rings = ARRAY_SIZE(gpu->rb); |
964 | } |
965 | |
966 | /* Create ringbuffer(s): */ |
967 | for (i = 0; i < nr_rings; i++) { |
968 | gpu->rb[i] = msm_ringbuffer_new(gpu, id: i, memptrs, memptrs_iova); |
969 | |
970 | if (IS_ERR(ptr: gpu->rb[i])) { |
971 | ret = PTR_ERR(ptr: gpu->rb[i]); |
972 | DRM_DEV_ERROR(drm->dev, |
973 | "could not create ringbuffer %d: %d\n" , i, ret); |
974 | goto fail; |
975 | } |
976 | |
977 | memptrs += sizeof(struct msm_rbmemptrs); |
978 | memptrs_iova += sizeof(struct msm_rbmemptrs); |
979 | } |
980 | |
981 | gpu->nr_rings = nr_rings; |
982 | |
983 | refcount_set(r: &gpu->sysprof_active, n: 1); |
984 | |
985 | return 0; |
986 | |
987 | fail: |
988 | for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { |
989 | msm_ringbuffer_destroy(ring: gpu->rb[i]); |
990 | gpu->rb[i] = NULL; |
991 | } |
992 | |
993 | msm_gem_kernel_put(bo: gpu->memptrs_bo, aspace: gpu->aspace); |
994 | |
995 | platform_set_drvdata(pdev, NULL); |
996 | return ret; |
997 | } |
998 | |
999 | void msm_gpu_cleanup(struct msm_gpu *gpu) |
1000 | { |
1001 | int i; |
1002 | |
1003 | DBG("%s" , gpu->name); |
1004 | |
1005 | for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { |
1006 | msm_ringbuffer_destroy(ring: gpu->rb[i]); |
1007 | gpu->rb[i] = NULL; |
1008 | } |
1009 | |
1010 | msm_gem_kernel_put(bo: gpu->memptrs_bo, aspace: gpu->aspace); |
1011 | |
1012 | if (!IS_ERR_OR_NULL(ptr: gpu->aspace)) { |
1013 | gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu); |
1014 | msm_gem_address_space_put(aspace: gpu->aspace); |
1015 | } |
1016 | |
1017 | if (gpu->worker) { |
1018 | kthread_destroy_worker(worker: gpu->worker); |
1019 | } |
1020 | |
1021 | msm_devfreq_cleanup(gpu); |
1022 | |
1023 | platform_set_drvdata(pdev: gpu->pdev, NULL); |
1024 | } |
1025 | |