1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2013 Red Hat |
4 | * Author: Rob Clark <robdclark@gmail.com> |
5 | * |
6 | * Copyright (c) 2014 The Linux Foundation. All rights reserved. |
7 | */ |
8 | |
9 | #include <linux/ascii85.h> |
10 | #include <linux/interconnect.h> |
11 | #include <linux/firmware/qcom/qcom_scm.h> |
12 | #include <linux/kernel.h> |
13 | #include <linux/of_address.h> |
14 | #include <linux/pm_opp.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/soc/qcom/mdt_loader.h> |
17 | #include <linux/nvmem-consumer.h> |
18 | #include <soc/qcom/ocmem.h> |
19 | #include "adreno_gpu.h" |
20 | #include "a6xx_gpu.h" |
21 | #include "msm_gem.h" |
22 | #include "msm_mmu.h" |
23 | |
24 | static u64 address_space_size = 0; |
25 | MODULE_PARM_DESC(address_space_size, "Override for size of processes private GPU address space" ); |
26 | module_param(address_space_size, ullong, 0600); |
27 | |
28 | static bool zap_available = true; |
29 | |
30 | static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, |
31 | u32 pasid) |
32 | { |
33 | struct device *dev = &gpu->pdev->dev; |
34 | const struct firmware *fw; |
35 | const char *signed_fwname = NULL; |
36 | struct device_node *np, *mem_np; |
37 | struct resource r; |
38 | phys_addr_t mem_phys; |
39 | ssize_t mem_size; |
40 | void *mem_region = NULL; |
41 | int ret; |
42 | |
43 | if (!IS_ENABLED(CONFIG_ARCH_QCOM)) { |
44 | zap_available = false; |
45 | return -EINVAL; |
46 | } |
47 | |
48 | np = of_get_child_by_name(node: dev->of_node, name: "zap-shader" ); |
49 | if (!np) { |
50 | zap_available = false; |
51 | return -ENODEV; |
52 | } |
53 | |
54 | mem_np = of_parse_phandle(np, phandle_name: "memory-region" , index: 0); |
55 | of_node_put(node: np); |
56 | if (!mem_np) { |
57 | zap_available = false; |
58 | return -EINVAL; |
59 | } |
60 | |
61 | ret = of_address_to_resource(dev: mem_np, index: 0, r: &r); |
62 | of_node_put(node: mem_np); |
63 | if (ret) |
64 | return ret; |
65 | |
66 | mem_phys = r.start; |
67 | |
68 | /* |
69 | * Check for a firmware-name property. This is the new scheme |
70 | * to handle firmware that may be signed with device specific |
71 | * keys, allowing us to have a different zap fw path for different |
72 | * devices. |
73 | * |
74 | * If the firmware-name property is found, we bypass the |
75 | * adreno_request_fw() mechanism, because we don't need to handle |
76 | * the /lib/firmware/qcom/... vs /lib/firmware/... case. |
77 | * |
78 | * If the firmware-name property is not found, for backwards |
79 | * compatibility we fall back to the fwname from the gpulist |
80 | * table. |
81 | */ |
82 | of_property_read_string_index(np, propname: "firmware-name" , index: 0, output: &signed_fwname); |
83 | if (signed_fwname) { |
84 | fwname = signed_fwname; |
85 | ret = request_firmware_direct(fw: &fw, name: fwname, device: gpu->dev->dev); |
86 | if (ret) |
87 | fw = ERR_PTR(error: ret); |
88 | } else if (fwname) { |
89 | /* Request the MDT file from the default location: */ |
90 | fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); |
91 | } else { |
92 | /* |
93 | * For new targets, we require the firmware-name property, |
94 | * if a zap-shader is required, rather than falling back |
95 | * to a firmware name specified in gpulist. |
96 | * |
97 | * Because the firmware is signed with a (potentially) |
98 | * device specific key, having the name come from gpulist |
99 | * was a bad idea, and is only provided for backwards |
100 | * compatibility for older targets. |
101 | */ |
102 | return -ENODEV; |
103 | } |
104 | |
105 | if (IS_ERR(ptr: fw)) { |
106 | DRM_DEV_ERROR(dev, "Unable to load %s\n" , fwname); |
107 | return PTR_ERR(ptr: fw); |
108 | } |
109 | |
110 | /* Figure out how much memory we need */ |
111 | mem_size = qcom_mdt_get_size(fw); |
112 | if (mem_size < 0) { |
113 | ret = mem_size; |
114 | goto out; |
115 | } |
116 | |
117 | if (mem_size > resource_size(res: &r)) { |
118 | DRM_DEV_ERROR(dev, |
119 | "memory region is too small to load the MDT\n" ); |
120 | ret = -E2BIG; |
121 | goto out; |
122 | } |
123 | |
124 | /* Allocate memory for the firmware image */ |
125 | mem_region = memremap(offset: mem_phys, size: mem_size, flags: MEMREMAP_WC); |
126 | if (!mem_region) { |
127 | ret = -ENOMEM; |
128 | goto out; |
129 | } |
130 | |
131 | /* |
132 | * Load the rest of the MDT |
133 | * |
134 | * Note that we could be dealing with two different paths, since |
135 | * with upstream linux-firmware it would be in a qcom/ subdir.. |
136 | * adreno_request_fw() handles this, but qcom_mdt_load() does |
137 | * not. But since we've already gotten through adreno_request_fw() |
138 | * we know which of the two cases it is: |
139 | */ |
140 | if (signed_fwname || (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY)) { |
141 | ret = qcom_mdt_load(dev, fw, fw_name: fwname, pas_id: pasid, |
142 | mem_region, mem_phys, mem_size, NULL); |
143 | } else { |
144 | char *newname; |
145 | |
146 | newname = kasprintf(GFP_KERNEL, fmt: "qcom/%s" , fwname); |
147 | |
148 | ret = qcom_mdt_load(dev, fw, fw_name: newname, pas_id: pasid, |
149 | mem_region, mem_phys, mem_size, NULL); |
150 | kfree(objp: newname); |
151 | } |
152 | if (ret) |
153 | goto out; |
154 | |
155 | /* Send the image to the secure world */ |
156 | ret = qcom_scm_pas_auth_and_reset(peripheral: pasid); |
157 | |
158 | /* |
159 | * If the scm call returns -EOPNOTSUPP we assume that this target |
160 | * doesn't need/support the zap shader so quietly fail |
161 | */ |
162 | if (ret == -EOPNOTSUPP) |
163 | zap_available = false; |
164 | else if (ret) |
165 | DRM_DEV_ERROR(dev, "Unable to authorize the image\n" ); |
166 | |
167 | out: |
168 | if (mem_region) |
169 | memunmap(addr: mem_region); |
170 | |
171 | release_firmware(fw); |
172 | |
173 | return ret; |
174 | } |
175 | |
176 | int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid) |
177 | { |
178 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
179 | struct platform_device *pdev = gpu->pdev; |
180 | |
181 | /* Short cut if we determine the zap shader isn't available/needed */ |
182 | if (!zap_available) |
183 | return -ENODEV; |
184 | |
185 | /* We need SCM to be able to load the firmware */ |
186 | if (!qcom_scm_is_available()) { |
187 | DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n" ); |
188 | return -EPROBE_DEFER; |
189 | } |
190 | |
191 | return zap_shader_load_mdt(gpu, fwname: adreno_gpu->info->zapfw, pasid); |
192 | } |
193 | |
194 | struct msm_gem_address_space * |
195 | adreno_create_address_space(struct msm_gpu *gpu, |
196 | struct platform_device *pdev) |
197 | { |
198 | return adreno_iommu_create_address_space(gpu, pdev, quirks: 0); |
199 | } |
200 | |
201 | struct msm_gem_address_space * |
202 | adreno_iommu_create_address_space(struct msm_gpu *gpu, |
203 | struct platform_device *pdev, |
204 | unsigned long quirks) |
205 | { |
206 | struct iommu_domain_geometry *geometry; |
207 | struct msm_mmu *mmu; |
208 | struct msm_gem_address_space *aspace; |
209 | u64 start, size; |
210 | |
211 | mmu = msm_iommu_gpu_new(&pdev->dev, gpu, quirks); |
212 | if (IS_ERR_OR_NULL(ptr: mmu)) |
213 | return ERR_CAST(ptr: mmu); |
214 | |
215 | geometry = msm_iommu_get_geometry(mmu); |
216 | if (IS_ERR(ptr: geometry)) |
217 | return ERR_CAST(ptr: geometry); |
218 | |
219 | /* |
220 | * Use the aperture start or SZ_16M, whichever is greater. This will |
221 | * ensure that we align with the allocated pagetable range while still |
222 | * allowing room in the lower 32 bits for GMEM and whatnot |
223 | */ |
224 | start = max_t(u64, SZ_16M, geometry->aperture_start); |
225 | size = geometry->aperture_end - start + 1; |
226 | |
227 | aspace = msm_gem_address_space_create(mmu, "gpu" , |
228 | start & GENMASK_ULL(48, 0), size); |
229 | |
230 | if (IS_ERR(ptr: aspace) && !IS_ERR(ptr: mmu)) |
231 | mmu->funcs->destroy(mmu); |
232 | |
233 | return aspace; |
234 | } |
235 | |
236 | u64 adreno_private_address_space_size(struct msm_gpu *gpu) |
237 | { |
238 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
239 | |
240 | if (address_space_size) |
241 | return address_space_size; |
242 | |
243 | if (adreno_gpu->info->address_space_size) |
244 | return adreno_gpu->info->address_space_size; |
245 | |
246 | return SZ_4G; |
247 | } |
248 | |
249 | #define ARM_SMMU_FSR_TF BIT(1) |
250 | #define ARM_SMMU_FSR_PF BIT(3) |
251 | #define ARM_SMMU_FSR_EF BIT(4) |
252 | |
253 | int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, |
254 | struct adreno_smmu_fault_info *info, const char *block, |
255 | u32 scratch[4]) |
256 | { |
257 | const char *type = "UNKNOWN" ; |
258 | bool do_devcoredump = info && !READ_ONCE(gpu->crashstate); |
259 | |
260 | /* |
261 | * If we aren't going to be resuming later from fault_worker, then do |
262 | * it now. |
263 | */ |
264 | if (!do_devcoredump) { |
265 | gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); |
266 | } |
267 | |
268 | /* |
269 | * Print a default message if we couldn't get the data from the |
270 | * adreno-smmu-priv |
271 | */ |
272 | if (!info) { |
273 | pr_warn_ratelimited("*** gpu fault: iova=%.16lx flags=%d (%u,%u,%u,%u)\n" , |
274 | iova, flags, |
275 | scratch[0], scratch[1], scratch[2], scratch[3]); |
276 | |
277 | return 0; |
278 | } |
279 | |
280 | if (info->fsr & ARM_SMMU_FSR_TF) |
281 | type = "TRANSLATION" ; |
282 | else if (info->fsr & ARM_SMMU_FSR_PF) |
283 | type = "PERMISSION" ; |
284 | else if (info->fsr & ARM_SMMU_FSR_EF) |
285 | type = "EXTERNAL" ; |
286 | |
287 | pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n" , |
288 | info->ttbr0, iova, |
289 | flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ" , |
290 | type, block, |
291 | scratch[0], scratch[1], scratch[2], scratch[3]); |
292 | |
293 | if (do_devcoredump) { |
294 | /* Turn off the hangcheck timer to keep it from bothering us */ |
295 | del_timer(timer: &gpu->hangcheck_timer); |
296 | |
297 | gpu->fault_info.ttbr0 = info->ttbr0; |
298 | gpu->fault_info.iova = iova; |
299 | gpu->fault_info.flags = flags; |
300 | gpu->fault_info.type = type; |
301 | gpu->fault_info.block = block; |
302 | |
303 | kthread_queue_work(gpu->worker, &gpu->fault_work); |
304 | } |
305 | |
306 | return 0; |
307 | } |
308 | |
309 | int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, |
310 | uint32_t param, uint64_t *value, uint32_t *len) |
311 | { |
312 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
313 | |
314 | /* No pointer params yet */ |
315 | if (*len != 0) |
316 | return -EINVAL; |
317 | |
318 | switch (param) { |
319 | case MSM_PARAM_GPU_ID: |
320 | *value = adreno_gpu->info->revn; |
321 | return 0; |
322 | case MSM_PARAM_GMEM_SIZE: |
323 | *value = adreno_gpu->info->gmem; |
324 | return 0; |
325 | case MSM_PARAM_GMEM_BASE: |
326 | if (adreno_is_a650_family(gpu: adreno_gpu) || |
327 | adreno_is_a740_family(gpu: adreno_gpu)) |
328 | *value = 0; |
329 | else |
330 | *value = 0x100000; |
331 | return 0; |
332 | case MSM_PARAM_CHIP_ID: |
333 | *value = adreno_gpu->chip_id; |
334 | if (!adreno_gpu->info->revn) |
335 | *value |= ((uint64_t) adreno_gpu->speedbin) << 32; |
336 | return 0; |
337 | case MSM_PARAM_MAX_FREQ: |
338 | *value = adreno_gpu->base.fast_rate; |
339 | return 0; |
340 | case MSM_PARAM_TIMESTAMP: |
341 | if (adreno_gpu->funcs->get_timestamp) { |
342 | int ret; |
343 | |
344 | pm_runtime_get_sync(&gpu->pdev->dev); |
345 | ret = adreno_gpu->funcs->get_timestamp(gpu, value); |
346 | pm_runtime_put_autosuspend(&gpu->pdev->dev); |
347 | |
348 | return ret; |
349 | } |
350 | return -EINVAL; |
351 | case MSM_PARAM_PRIORITIES: |
352 | *value = gpu->nr_rings * NR_SCHED_PRIORITIES; |
353 | return 0; |
354 | case MSM_PARAM_PP_PGTABLE: |
355 | *value = 0; |
356 | return 0; |
357 | case MSM_PARAM_FAULTS: |
358 | if (ctx->aspace) |
359 | *value = gpu->global_faults + ctx->aspace->faults; |
360 | else |
361 | *value = gpu->global_faults; |
362 | return 0; |
363 | case MSM_PARAM_SUSPENDS: |
364 | *value = gpu->suspend_count; |
365 | return 0; |
366 | case MSM_PARAM_VA_START: |
367 | if (ctx->aspace == gpu->aspace) |
368 | return -EINVAL; |
369 | *value = ctx->aspace->va_start; |
370 | return 0; |
371 | case MSM_PARAM_VA_SIZE: |
372 | if (ctx->aspace == gpu->aspace) |
373 | return -EINVAL; |
374 | *value = ctx->aspace->va_size; |
375 | return 0; |
376 | case MSM_PARAM_HIGHEST_BANK_BIT: |
377 | *value = adreno_gpu->ubwc_config.highest_bank_bit; |
378 | return 0; |
379 | default: |
380 | DBG("%s: invalid param: %u" , gpu->name, param); |
381 | return -EINVAL; |
382 | } |
383 | } |
384 | |
385 | int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, |
386 | uint32_t param, uint64_t value, uint32_t len) |
387 | { |
388 | switch (param) { |
389 | case MSM_PARAM_COMM: |
390 | case MSM_PARAM_CMDLINE: |
391 | /* kstrdup_quotable_cmdline() limits to PAGE_SIZE, so |
392 | * that should be a reasonable upper bound |
393 | */ |
394 | if (len > PAGE_SIZE) |
395 | return -EINVAL; |
396 | break; |
397 | default: |
398 | if (len != 0) |
399 | return -EINVAL; |
400 | } |
401 | |
402 | switch (param) { |
403 | case MSM_PARAM_COMM: |
404 | case MSM_PARAM_CMDLINE: { |
405 | char *str, **paramp; |
406 | |
407 | str = memdup_user_nul(u64_to_user_ptr(value), len); |
408 | if (IS_ERR(ptr: str)) |
409 | return PTR_ERR(ptr: str); |
410 | |
411 | mutex_lock(&gpu->lock); |
412 | |
413 | if (param == MSM_PARAM_COMM) { |
414 | paramp = &ctx->comm; |
415 | } else { |
416 | paramp = &ctx->cmdline; |
417 | } |
418 | |
419 | kfree(objp: *paramp); |
420 | *paramp = str; |
421 | |
422 | mutex_unlock(lock: &gpu->lock); |
423 | |
424 | return 0; |
425 | } |
426 | case MSM_PARAM_SYSPROF: |
427 | if (!capable(CAP_SYS_ADMIN)) |
428 | return -EPERM; |
429 | return msm_file_private_set_sysprof(ctx, gpu, value); |
430 | default: |
431 | DBG("%s: invalid param: %u" , gpu->name, param); |
432 | return -EINVAL; |
433 | } |
434 | } |
435 | |
436 | const struct firmware * |
437 | adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) |
438 | { |
439 | struct drm_device *drm = adreno_gpu->base.dev; |
440 | const struct firmware *fw = NULL; |
441 | char *newname; |
442 | int ret; |
443 | |
444 | newname = kasprintf(GFP_KERNEL, fmt: "qcom/%s" , fwname); |
445 | if (!newname) |
446 | return ERR_PTR(error: -ENOMEM); |
447 | |
448 | /* |
449 | * Try first to load from qcom/$fwfile using a direct load (to avoid |
450 | * a potential timeout waiting for usermode helper) |
451 | */ |
452 | if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || |
453 | (adreno_gpu->fwloc == FW_LOCATION_NEW)) { |
454 | |
455 | ret = request_firmware_direct(fw: &fw, name: newname, device: drm->dev); |
456 | if (!ret) { |
457 | DRM_DEV_INFO(drm->dev, "loaded %s from new location\n" , |
458 | newname); |
459 | adreno_gpu->fwloc = FW_LOCATION_NEW; |
460 | goto out; |
461 | } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { |
462 | DRM_DEV_ERROR(drm->dev, "failed to load %s: %d\n" , |
463 | newname, ret); |
464 | fw = ERR_PTR(error: ret); |
465 | goto out; |
466 | } |
467 | } |
468 | |
469 | /* |
470 | * Then try the legacy location without qcom/ prefix |
471 | */ |
472 | if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || |
473 | (adreno_gpu->fwloc == FW_LOCATION_LEGACY)) { |
474 | |
475 | ret = request_firmware_direct(fw: &fw, name: fwname, device: drm->dev); |
476 | if (!ret) { |
477 | DRM_DEV_INFO(drm->dev, "loaded %s from legacy location\n" , |
478 | newname); |
479 | adreno_gpu->fwloc = FW_LOCATION_LEGACY; |
480 | goto out; |
481 | } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { |
482 | DRM_DEV_ERROR(drm->dev, "failed to load %s: %d\n" , |
483 | fwname, ret); |
484 | fw = ERR_PTR(error: ret); |
485 | goto out; |
486 | } |
487 | } |
488 | |
489 | /* |
490 | * Finally fall back to request_firmware() for cases where the |
491 | * usermode helper is needed (I think mainly android) |
492 | */ |
493 | if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || |
494 | (adreno_gpu->fwloc == FW_LOCATION_HELPER)) { |
495 | |
496 | ret = request_firmware(fw: &fw, name: newname, device: drm->dev); |
497 | if (!ret) { |
498 | DRM_DEV_INFO(drm->dev, "loaded %s with helper\n" , |
499 | newname); |
500 | adreno_gpu->fwloc = FW_LOCATION_HELPER; |
501 | goto out; |
502 | } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { |
503 | DRM_DEV_ERROR(drm->dev, "failed to load %s: %d\n" , |
504 | newname, ret); |
505 | fw = ERR_PTR(error: ret); |
506 | goto out; |
507 | } |
508 | } |
509 | |
510 | DRM_DEV_ERROR(drm->dev, "failed to load %s\n" , fwname); |
511 | fw = ERR_PTR(error: -ENOENT); |
512 | out: |
513 | kfree(objp: newname); |
514 | return fw; |
515 | } |
516 | |
517 | int adreno_load_fw(struct adreno_gpu *adreno_gpu) |
518 | { |
519 | int i; |
520 | |
521 | for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) { |
522 | const struct firmware *fw; |
523 | |
524 | if (!adreno_gpu->info->fw[i]) |
525 | continue; |
526 | |
527 | /* Skip loading GMU firwmare with GMU Wrapper */ |
528 | if (adreno_has_gmu_wrapper(gpu: adreno_gpu) && i == ADRENO_FW_GMU) |
529 | continue; |
530 | |
531 | /* Skip if the firmware has already been loaded */ |
532 | if (adreno_gpu->fw[i]) |
533 | continue; |
534 | |
535 | fw = adreno_request_fw(adreno_gpu, fwname: adreno_gpu->info->fw[i]); |
536 | if (IS_ERR(ptr: fw)) |
537 | return PTR_ERR(ptr: fw); |
538 | |
539 | adreno_gpu->fw[i] = fw; |
540 | } |
541 | |
542 | return 0; |
543 | } |
544 | |
545 | struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu, |
546 | const struct firmware *fw, u64 *iova) |
547 | { |
548 | struct drm_gem_object *bo; |
549 | void *ptr; |
550 | |
551 | ptr = msm_gem_kernel_new(gpu->dev, fw->size - 4, |
552 | MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); |
553 | |
554 | if (IS_ERR(ptr)) |
555 | return ERR_CAST(ptr); |
556 | |
557 | memcpy(ptr, &fw->data[4], fw->size - 4); |
558 | |
559 | msm_gem_put_vaddr(bo); |
560 | |
561 | return bo; |
562 | } |
563 | |
564 | int adreno_hw_init(struct msm_gpu *gpu) |
565 | { |
566 | VERB("%s" , gpu->name); |
567 | |
568 | for (int i = 0; i < gpu->nr_rings; i++) { |
569 | struct msm_ringbuffer *ring = gpu->rb[i]; |
570 | |
571 | if (!ring) |
572 | continue; |
573 | |
574 | ring->cur = ring->start; |
575 | ring->next = ring->start; |
576 | ring->memptrs->rptr = 0; |
577 | ring->memptrs->bv_fence = ring->fctx->completed_fence; |
578 | |
579 | /* Detect and clean up an impossible fence, ie. if GPU managed |
580 | * to scribble something invalid, we don't want that to confuse |
581 | * us into mistakingly believing that submits have completed. |
582 | */ |
583 | if (fence_before(ring->fctx->last_fence, ring->memptrs->fence)) { |
584 | ring->memptrs->fence = ring->fctx->last_fence; |
585 | } |
586 | } |
587 | |
588 | return 0; |
589 | } |
590 | |
591 | /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ |
592 | static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, |
593 | struct msm_ringbuffer *ring) |
594 | { |
595 | struct msm_gpu *gpu = &adreno_gpu->base; |
596 | |
597 | return gpu->funcs->get_rptr(gpu, ring); |
598 | } |
599 | |
600 | struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) |
601 | { |
602 | return gpu->rb[0]; |
603 | } |
604 | |
605 | void adreno_recover(struct msm_gpu *gpu) |
606 | { |
607 | struct drm_device *dev = gpu->dev; |
608 | int ret; |
609 | |
610 | // XXX pm-runtime?? we *need* the device to be off after this |
611 | // so maybe continuing to call ->pm_suspend/resume() is better? |
612 | |
613 | gpu->funcs->pm_suspend(gpu); |
614 | gpu->funcs->pm_resume(gpu); |
615 | |
616 | ret = msm_gpu_hw_init(gpu); |
617 | if (ret) { |
618 | DRM_DEV_ERROR(dev->dev, "gpu hw init failed: %d\n" , ret); |
619 | /* hmm, oh well? */ |
620 | } |
621 | } |
622 | |
623 | void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, u32 reg) |
624 | { |
625 | uint32_t wptr; |
626 | |
627 | /* Copy the shadow to the actual register */ |
628 | ring->cur = ring->next; |
629 | |
630 | /* |
631 | * Mask wptr value that we calculate to fit in the HW range. This is |
632 | * to account for the possibility that the last command fit exactly into |
633 | * the ringbuffer and rb->next hasn't wrapped to zero yet |
634 | */ |
635 | wptr = get_wptr(ring); |
636 | |
637 | /* ensure writes to ringbuffer have hit system memory: */ |
638 | mb(); |
639 | |
640 | gpu_write(gpu, reg, wptr); |
641 | } |
642 | |
643 | bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) |
644 | { |
645 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
646 | uint32_t wptr = get_wptr(ring); |
647 | |
648 | /* wait for CP to drain ringbuffer: */ |
649 | if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) |
650 | return true; |
651 | |
652 | /* TODO maybe we need to reset GPU here to recover from hang? */ |
653 | DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n" , |
654 | gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr); |
655 | |
656 | return false; |
657 | } |
658 | |
659 | int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state) |
660 | { |
661 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
662 | int i, count = 0; |
663 | |
664 | WARN_ON(!mutex_is_locked(&gpu->lock)); |
665 | |
666 | kref_init(kref: &state->ref); |
667 | |
668 | ktime_get_real_ts64(tv: &state->time); |
669 | |
670 | for (i = 0; i < gpu->nr_rings; i++) { |
671 | int size = 0, j; |
672 | |
673 | state->ring[i].fence = gpu->rb[i]->memptrs->fence; |
674 | state->ring[i].iova = gpu->rb[i]->iova; |
675 | state->ring[i].seqno = gpu->rb[i]->fctx->last_fence; |
676 | state->ring[i].rptr = get_rptr(adreno_gpu, ring: gpu->rb[i]); |
677 | state->ring[i].wptr = get_wptr(ring: gpu->rb[i]); |
678 | |
679 | /* Copy at least 'wptr' dwords of the data */ |
680 | size = state->ring[i].wptr; |
681 | |
682 | /* After wptr find the last non zero dword to save space */ |
683 | for (j = state->ring[i].wptr; j < MSM_GPU_RINGBUFFER_SZ >> 2; j++) |
684 | if (gpu->rb[i]->start[j]) |
685 | size = j + 1; |
686 | |
687 | if (size) { |
688 | state->ring[i].data = kvmalloc(size: size << 2, GFP_KERNEL); |
689 | if (state->ring[i].data) { |
690 | memcpy(state->ring[i].data, gpu->rb[i]->start, size << 2); |
691 | state->ring[i].data_size = size << 2; |
692 | } |
693 | } |
694 | } |
695 | |
696 | /* Some targets prefer to collect their own registers */ |
697 | if (!adreno_gpu->registers) |
698 | return 0; |
699 | |
700 | /* Count the number of registers */ |
701 | for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) |
702 | count += adreno_gpu->registers[i + 1] - |
703 | adreno_gpu->registers[i] + 1; |
704 | |
705 | state->registers = kcalloc(n: count * 2, size: sizeof(u32), GFP_KERNEL); |
706 | if (state->registers) { |
707 | int pos = 0; |
708 | |
709 | for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { |
710 | u32 start = adreno_gpu->registers[i]; |
711 | u32 end = adreno_gpu->registers[i + 1]; |
712 | u32 addr; |
713 | |
714 | for (addr = start; addr <= end; addr++) { |
715 | state->registers[pos++] = addr; |
716 | state->registers[pos++] = gpu_read(gpu, addr); |
717 | } |
718 | } |
719 | |
720 | state->nr_registers = count; |
721 | } |
722 | |
723 | return 0; |
724 | } |
725 | |
726 | void adreno_gpu_state_destroy(struct msm_gpu_state *state) |
727 | { |
728 | int i; |
729 | |
730 | for (i = 0; i < ARRAY_SIZE(state->ring); i++) |
731 | kvfree(addr: state->ring[i].data); |
732 | |
733 | for (i = 0; state->bos && i < state->nr_bos; i++) |
734 | kvfree(addr: state->bos[i].data); |
735 | |
736 | kfree(objp: state->bos); |
737 | kfree(objp: state->comm); |
738 | kfree(objp: state->cmd); |
739 | kfree(objp: state->registers); |
740 | } |
741 | |
742 | static void adreno_gpu_state_kref_destroy(struct kref *kref) |
743 | { |
744 | struct msm_gpu_state *state = container_of(kref, |
745 | struct msm_gpu_state, ref); |
746 | |
747 | adreno_gpu_state_destroy(state); |
748 | kfree(objp: state); |
749 | } |
750 | |
751 | int adreno_gpu_state_put(struct msm_gpu_state *state) |
752 | { |
753 | if (IS_ERR_OR_NULL(ptr: state)) |
754 | return 1; |
755 | |
756 | return kref_put(kref: &state->ref, release: adreno_gpu_state_kref_destroy); |
757 | } |
758 | |
759 | #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) |
760 | |
761 | static char *adreno_gpu_ascii85_encode(u32 *src, size_t len) |
762 | { |
763 | void *buf; |
764 | size_t buf_itr = 0, buffer_size; |
765 | char out[ASCII85_BUFSZ]; |
766 | long l; |
767 | int i; |
768 | |
769 | if (!src || !len) |
770 | return NULL; |
771 | |
772 | l = ascii85_encode_len(len); |
773 | |
774 | /* |
775 | * Ascii85 outputs either a 5 byte string or a 1 byte string. So we |
776 | * account for the worst case of 5 bytes per dword plus the 1 for '\0' |
777 | */ |
778 | buffer_size = (l * 5) + 1; |
779 | |
780 | buf = kvmalloc(size: buffer_size, GFP_KERNEL); |
781 | if (!buf) |
782 | return NULL; |
783 | |
784 | for (i = 0; i < l; i++) |
785 | buf_itr += scnprintf(buf: buf + buf_itr, size: buffer_size - buf_itr, fmt: "%s" , |
786 | ascii85_encode(in: src[i], out)); |
787 | |
788 | return buf; |
789 | } |
790 | |
791 | /* len is expected to be in bytes |
792 | * |
793 | * WARNING: *ptr should be allocated with kvmalloc or friends. It can be free'd |
794 | * with kvfree() and replaced with a newly kvmalloc'd buffer on the first call |
795 | * when the unencoded raw data is encoded |
796 | */ |
797 | void adreno_show_object(struct drm_printer *p, void **ptr, int len, |
798 | bool *encoded) |
799 | { |
800 | if (!*ptr || !len) |
801 | return; |
802 | |
803 | if (!*encoded) { |
804 | long datalen, i; |
805 | u32 *buf = *ptr; |
806 | |
807 | /* |
808 | * Only dump the non-zero part of the buffer - rarely will |
809 | * any data completely fill the entire allocated size of |
810 | * the buffer. |
811 | */ |
812 | for (datalen = 0, i = 0; i < len >> 2; i++) |
813 | if (buf[i]) |
814 | datalen = ((i + 1) << 2); |
815 | |
816 | /* |
817 | * If we reach here, then the originally captured binary buffer |
818 | * will be replaced with the ascii85 encoded string |
819 | */ |
820 | *ptr = adreno_gpu_ascii85_encode(src: buf, len: datalen); |
821 | |
822 | kvfree(addr: buf); |
823 | |
824 | *encoded = true; |
825 | } |
826 | |
827 | if (!*ptr) |
828 | return; |
829 | |
830 | drm_puts(p, " data: !!ascii85 |\n" ); |
831 | drm_puts(p, " " ); |
832 | |
833 | drm_puts(p, *ptr); |
834 | |
835 | drm_puts(p, "\n" ); |
836 | } |
837 | |
838 | void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, |
839 | struct drm_printer *p) |
840 | { |
841 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
842 | int i; |
843 | |
844 | if (IS_ERR_OR_NULL(ptr: state)) |
845 | return; |
846 | |
847 | drm_printf(p, "revision: %u (%" ADRENO_CHIPID_FMT")\n" , |
848 | adreno_gpu->info->revn, |
849 | ADRENO_CHIPID_ARGS(adreno_gpu->chip_id)); |
850 | /* |
851 | * If this is state collected due to iova fault, so fault related info |
852 | * |
853 | * TTBR0 would not be zero, so this is a good way to distinguish |
854 | */ |
855 | if (state->fault_info.ttbr0) { |
856 | const struct msm_gpu_fault_info *info = &state->fault_info; |
857 | |
858 | drm_puts(p, "fault-info:\n" ); |
859 | drm_printf(p, " - ttbr0=%.16llx\n" , info->ttbr0); |
860 | drm_printf(p, " - iova=%.16lx\n" , info->iova); |
861 | drm_printf(p, " - dir=%s\n" , info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ" ); |
862 | drm_printf(p, " - type=%s\n" , info->type); |
863 | drm_printf(p, " - source=%s\n" , info->block); |
864 | } |
865 | |
866 | drm_printf(p, "rbbm-status: 0x%08x\n" , state->rbbm_status); |
867 | |
868 | drm_puts(p, "ringbuffer:\n" ); |
869 | |
870 | for (i = 0; i < gpu->nr_rings; i++) { |
871 | drm_printf(p, " - id: %d\n" , i); |
872 | drm_printf(p, " iova: 0x%016llx\n" , state->ring[i].iova); |
873 | drm_printf(p, " last-fence: %u\n" , state->ring[i].seqno); |
874 | drm_printf(p, " retired-fence: %u\n" , state->ring[i].fence); |
875 | drm_printf(p, " rptr: %u\n" , state->ring[i].rptr); |
876 | drm_printf(p, " wptr: %u\n" , state->ring[i].wptr); |
877 | drm_printf(p, " size: %u\n" , MSM_GPU_RINGBUFFER_SZ); |
878 | |
879 | adreno_show_object(p, ptr: &state->ring[i].data, |
880 | len: state->ring[i].data_size, encoded: &state->ring[i].encoded); |
881 | } |
882 | |
883 | if (state->bos) { |
884 | drm_puts(p, "bos:\n" ); |
885 | |
886 | for (i = 0; i < state->nr_bos; i++) { |
887 | drm_printf(p, " - iova: 0x%016llx\n" , |
888 | state->bos[i].iova); |
889 | drm_printf(p, " size: %zd\n" , state->bos[i].size); |
890 | drm_printf(p, " name: %-32s\n" , state->bos[i].name); |
891 | |
892 | adreno_show_object(p, ptr: &state->bos[i].data, |
893 | len: state->bos[i].size, encoded: &state->bos[i].encoded); |
894 | } |
895 | } |
896 | |
897 | if (state->nr_registers) { |
898 | drm_puts(p, "registers:\n" ); |
899 | |
900 | for (i = 0; i < state->nr_registers; i++) { |
901 | drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n" , |
902 | state->registers[i * 2] << 2, |
903 | state->registers[(i * 2) + 1]); |
904 | } |
905 | } |
906 | } |
907 | #endif |
908 | |
909 | /* Dump common gpu status and scratch registers on any hang, to make |
910 | * the hangcheck logs more useful. The scratch registers seem always |
911 | * safe to read when GPU has hung (unlike some other regs, depending |
912 | * on how the GPU hung), and they are useful to match up to cmdstream |
913 | * dumps when debugging hangs: |
914 | */ |
915 | void adreno_dump_info(struct msm_gpu *gpu) |
916 | { |
917 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
918 | int i; |
919 | |
920 | printk("revision: %u (%" ADRENO_CHIPID_FMT")\n" , |
921 | adreno_gpu->info->revn, |
922 | ADRENO_CHIPID_ARGS(adreno_gpu->chip_id)); |
923 | |
924 | for (i = 0; i < gpu->nr_rings; i++) { |
925 | struct msm_ringbuffer *ring = gpu->rb[i]; |
926 | |
927 | printk("rb %d: fence: %d/%d\n" , i, |
928 | ring->memptrs->fence, |
929 | ring->fctx->last_fence); |
930 | |
931 | printk("rptr: %d\n" , get_rptr(adreno_gpu, ring)); |
932 | printk("rb wptr: %d\n" , get_wptr(ring)); |
933 | } |
934 | } |
935 | |
936 | /* would be nice to not have to duplicate the _show() stuff with printk(): */ |
937 | void adreno_dump(struct msm_gpu *gpu) |
938 | { |
939 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
940 | int i; |
941 | |
942 | if (!adreno_gpu->registers) |
943 | return; |
944 | |
945 | /* dump these out in a form that can be parsed by demsm: */ |
946 | printk("IO:region %s 00000000 00020000\n" , gpu->name); |
947 | for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { |
948 | uint32_t start = adreno_gpu->registers[i]; |
949 | uint32_t end = adreno_gpu->registers[i+1]; |
950 | uint32_t addr; |
951 | |
952 | for (addr = start; addr <= end; addr++) { |
953 | uint32_t val = gpu_read(gpu, addr); |
954 | printk("IO:R %08x %08x\n" , addr<<2, val); |
955 | } |
956 | } |
957 | } |
958 | |
959 | static uint32_t ring_freewords(struct msm_ringbuffer *ring) |
960 | { |
961 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); |
962 | uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; |
963 | /* Use ring->next to calculate free size */ |
964 | uint32_t wptr = ring->next - ring->start; |
965 | uint32_t rptr = get_rptr(adreno_gpu, ring); |
966 | return (rptr + (size - 1) - wptr) % size; |
967 | } |
968 | |
969 | void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) |
970 | { |
971 | if (spin_until(ring_freewords(ring) >= ndwords)) |
972 | DRM_DEV_ERROR(ring->gpu->dev->dev, |
973 | "timeout waiting for space in ringbuffer %d\n" , |
974 | ring->id); |
975 | } |
976 | |
977 | static int adreno_get_pwrlevels(struct device *dev, |
978 | struct msm_gpu *gpu) |
979 | { |
980 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
981 | unsigned long freq = ULONG_MAX; |
982 | struct dev_pm_opp *opp; |
983 | int ret; |
984 | |
985 | gpu->fast_rate = 0; |
986 | |
987 | /* devm_pm_opp_of_add_table may error out but will still create an OPP table */ |
988 | ret = devm_pm_opp_of_add_table(dev); |
989 | if (ret == -ENODEV) { |
990 | /* Special cases for ancient hw with ancient DT bindings */ |
991 | if (adreno_is_a2xx(gpu: adreno_gpu)) { |
992 | dev_warn(dev, "Unable to find the OPP table. Falling back to 200 MHz.\n" ); |
993 | dev_pm_opp_add(dev, freq: 200000000, u_volt: 0); |
994 | } else if (adreno_is_a320(gpu: adreno_gpu)) { |
995 | dev_warn(dev, "Unable to find the OPP table. Falling back to 450 MHz.\n" ); |
996 | dev_pm_opp_add(dev, freq: 450000000, u_volt: 0); |
997 | } else { |
998 | DRM_DEV_ERROR(dev, "Unable to find the OPP table\n" ); |
999 | return -ENODEV; |
1000 | } |
1001 | } else if (ret) { |
1002 | DRM_DEV_ERROR(dev, "Unable to set the OPP table\n" ); |
1003 | return ret; |
1004 | } |
1005 | |
1006 | /* Find the fastest defined rate */ |
1007 | opp = dev_pm_opp_find_freq_floor(dev, freq: &freq); |
1008 | if (IS_ERR(ptr: opp)) |
1009 | return PTR_ERR(ptr: opp); |
1010 | |
1011 | gpu->fast_rate = freq; |
1012 | dev_pm_opp_put(opp); |
1013 | |
1014 | DBG("fast_rate=%u, slow_rate=27000000" , gpu->fast_rate); |
1015 | |
1016 | return 0; |
1017 | } |
1018 | |
1019 | int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu, |
1020 | struct adreno_ocmem *adreno_ocmem) |
1021 | { |
1022 | struct ocmem_buf *ocmem_hdl; |
1023 | struct ocmem *ocmem; |
1024 | |
1025 | ocmem = of_get_ocmem(dev); |
1026 | if (IS_ERR(ptr: ocmem)) { |
1027 | if (PTR_ERR(ptr: ocmem) == -ENODEV) { |
1028 | /* |
1029 | * Return success since either the ocmem property was |
1030 | * not specified in device tree, or ocmem support is |
1031 | * not compiled into the kernel. |
1032 | */ |
1033 | return 0; |
1034 | } |
1035 | |
1036 | return PTR_ERR(ptr: ocmem); |
1037 | } |
1038 | |
1039 | ocmem_hdl = ocmem_allocate(ocmem, client: OCMEM_GRAPHICS, size: adreno_gpu->info->gmem); |
1040 | if (IS_ERR(ptr: ocmem_hdl)) |
1041 | return PTR_ERR(ptr: ocmem_hdl); |
1042 | |
1043 | adreno_ocmem->ocmem = ocmem; |
1044 | adreno_ocmem->base = ocmem_hdl->addr; |
1045 | adreno_ocmem->hdl = ocmem_hdl; |
1046 | |
1047 | if (WARN_ON(ocmem_hdl->len != adreno_gpu->info->gmem)) |
1048 | return -ENOMEM; |
1049 | |
1050 | return 0; |
1051 | } |
1052 | |
1053 | void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *adreno_ocmem) |
1054 | { |
1055 | if (adreno_ocmem && adreno_ocmem->base) |
1056 | ocmem_free(ocmem: adreno_ocmem->ocmem, client: OCMEM_GRAPHICS, |
1057 | buf: adreno_ocmem->hdl); |
1058 | } |
1059 | |
1060 | int adreno_read_speedbin(struct device *dev, u32 *speedbin) |
1061 | { |
1062 | return nvmem_cell_read_variable_le_u32(dev, cell_id: "speed_bin" , val: speedbin); |
1063 | } |
1064 | |
1065 | int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, |
1066 | struct adreno_gpu *adreno_gpu, |
1067 | const struct adreno_gpu_funcs *funcs, int nr_rings) |
1068 | { |
1069 | struct device *dev = &pdev->dev; |
1070 | struct adreno_platform_config *config = dev->platform_data; |
1071 | struct msm_gpu_config adreno_gpu_config = { 0 }; |
1072 | struct msm_gpu *gpu = &adreno_gpu->base; |
1073 | const char *gpu_name; |
1074 | u32 speedbin; |
1075 | int ret; |
1076 | |
1077 | adreno_gpu->funcs = funcs; |
1078 | adreno_gpu->info = config->info; |
1079 | adreno_gpu->chip_id = config->chip_id; |
1080 | |
1081 | gpu->allow_relocs = config->info->family < ADRENO_6XX_GEN1; |
1082 | |
1083 | /* Only handle the core clock when GMU is not in use (or is absent). */ |
1084 | if (adreno_has_gmu_wrapper(gpu: adreno_gpu) || |
1085 | adreno_gpu->info->family < ADRENO_6XX_GEN1) { |
1086 | /* |
1087 | * This can only be done before devm_pm_opp_of_add_table(), or |
1088 | * dev_pm_opp_set_config() will WARN_ON() |
1089 | */ |
1090 | if (IS_ERR(ptr: devm_clk_get(dev, "core" ))) { |
1091 | /* |
1092 | * If "core" is absent, go for the legacy clock name. |
1093 | * If we got this far in probing, it's a given one of |
1094 | * them exists. |
1095 | */ |
1096 | devm_pm_opp_set_clkname(dev, name: "core_clk" ); |
1097 | } else |
1098 | devm_pm_opp_set_clkname(dev, name: "core" ); |
1099 | } |
1100 | |
1101 | if (adreno_read_speedbin(dev, speedbin: &speedbin) || !speedbin) |
1102 | speedbin = 0xffff; |
1103 | adreno_gpu->speedbin = (uint16_t) (0xffff & speedbin); |
1104 | |
1105 | gpu_name = devm_kasprintf(dev, GFP_KERNEL, fmt: "%" ADRENO_CHIPID_FMT, |
1106 | ADRENO_CHIPID_ARGS(config->chip_id)); |
1107 | if (!gpu_name) |
1108 | return -ENOMEM; |
1109 | |
1110 | adreno_gpu_config.ioname = "kgsl_3d0_reg_memory" ; |
1111 | |
1112 | adreno_gpu_config.nr_rings = nr_rings; |
1113 | |
1114 | ret = adreno_get_pwrlevels(dev, gpu); |
1115 | if (ret) |
1116 | return ret; |
1117 | |
1118 | pm_runtime_set_autosuspend_delay(dev, |
1119 | adreno_gpu->info->inactive_period); |
1120 | pm_runtime_use_autosuspend(dev); |
1121 | |
1122 | return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, |
1123 | gpu_name, &adreno_gpu_config); |
1124 | } |
1125 | |
1126 | void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) |
1127 | { |
1128 | struct msm_gpu *gpu = &adreno_gpu->base; |
1129 | struct msm_drm_private *priv = gpu->dev ? gpu->dev->dev_private : NULL; |
1130 | unsigned int i; |
1131 | |
1132 | for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) |
1133 | release_firmware(fw: adreno_gpu->fw[i]); |
1134 | |
1135 | if (priv && pm_runtime_enabled(&priv->gpu_pdev->dev)) |
1136 | pm_runtime_disable(&priv->gpu_pdev->dev); |
1137 | |
1138 | msm_gpu_cleanup(&adreno_gpu->base); |
1139 | } |
1140 | |