1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright 2019 Collabora Ltd */ |
3 | |
4 | #include <linux/completion.h> |
5 | #include <linux/iopoll.h> |
6 | #include <linux/iosys-map.h> |
7 | #include <linux/pm_runtime.h> |
8 | #include <linux/slab.h> |
9 | #include <linux/uaccess.h> |
10 | |
11 | #include <drm/drm_file.h> |
12 | #include <drm/drm_gem_shmem_helper.h> |
13 | #include <drm/panfrost_drm.h> |
14 | |
15 | #include "panfrost_device.h" |
16 | #include "panfrost_features.h" |
17 | #include "panfrost_gem.h" |
18 | #include "panfrost_issues.h" |
19 | #include "panfrost_job.h" |
20 | #include "panfrost_mmu.h" |
21 | #include "panfrost_perfcnt.h" |
22 | #include "panfrost_regs.h" |
23 | |
24 | #define COUNTERS_PER_BLOCK 64 |
25 | #define BYTES_PER_COUNTER 4 |
26 | #define BLOCKS_PER_COREGROUP 8 |
27 | #define V4_SHADERS_PER_COREGROUP 4 |
28 | |
29 | struct panfrost_perfcnt { |
30 | struct panfrost_gem_mapping *mapping; |
31 | size_t bosize; |
32 | void *buf; |
33 | struct panfrost_file_priv *user; |
34 | struct mutex lock; |
35 | struct completion dump_comp; |
36 | }; |
37 | |
38 | void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev) |
39 | { |
40 | complete(&pfdev->perfcnt->dump_comp); |
41 | } |
42 | |
43 | void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev) |
44 | { |
45 | gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES); |
46 | } |
47 | |
48 | static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev) |
49 | { |
50 | u64 gpuva; |
51 | int ret; |
52 | |
53 | reinit_completion(x: &pfdev->perfcnt->dump_comp); |
54 | gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT; |
55 | gpu_write(pfdev, GPU_PERFCNT_BASE_LO, lower_32_bits(gpuva)); |
56 | gpu_write(pfdev, GPU_PERFCNT_BASE_HI, upper_32_bits(gpuva)); |
57 | gpu_write(pfdev, GPU_INT_CLEAR, |
58 | GPU_IRQ_CLEAN_CACHES_COMPLETED | |
59 | GPU_IRQ_PERFCNT_SAMPLE_COMPLETED); |
60 | gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE); |
61 | ret = wait_for_completion_interruptible_timeout(x: &pfdev->perfcnt->dump_comp, |
62 | timeout: msecs_to_jiffies(m: 1000)); |
63 | if (!ret) |
64 | ret = -ETIMEDOUT; |
65 | else if (ret > 0) |
66 | ret = 0; |
67 | |
68 | return ret; |
69 | } |
70 | |
71 | static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, |
72 | struct drm_file *file_priv, |
73 | unsigned int counterset) |
74 | { |
75 | struct panfrost_file_priv *user = file_priv->driver_priv; |
76 | struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; |
77 | struct iosys_map map; |
78 | struct drm_gem_shmem_object *bo; |
79 | u32 cfg, as; |
80 | int ret; |
81 | |
82 | if (user == perfcnt->user) |
83 | return 0; |
84 | else if (perfcnt->user) |
85 | return -EBUSY; |
86 | |
87 | ret = pm_runtime_get_sync(dev: pfdev->dev); |
88 | if (ret < 0) |
89 | goto err_put_pm; |
90 | |
91 | bo = drm_gem_shmem_create(dev: pfdev->ddev, size: perfcnt->bosize); |
92 | if (IS_ERR(ptr: bo)) { |
93 | ret = PTR_ERR(ptr: bo); |
94 | goto err_put_pm; |
95 | } |
96 | |
97 | /* Map the perfcnt buf in the address space attached to file_priv. */ |
98 | ret = panfrost_gem_open(obj: &bo->base, file_priv); |
99 | if (ret) |
100 | goto err_put_bo; |
101 | |
102 | perfcnt->mapping = panfrost_gem_mapping_get(bo: to_panfrost_bo(obj: &bo->base), |
103 | priv: user); |
104 | if (!perfcnt->mapping) { |
105 | ret = -EINVAL; |
106 | goto err_close_bo; |
107 | } |
108 | |
109 | ret = drm_gem_vmap_unlocked(obj: &bo->base, map: &map); |
110 | if (ret) |
111 | goto err_put_mapping; |
112 | perfcnt->buf = map.vaddr; |
113 | |
114 | /* |
115 | * Invalidate the cache and clear the counters to start from a fresh |
116 | * state. |
117 | */ |
118 | reinit_completion(x: &pfdev->perfcnt->dump_comp); |
119 | gpu_write(pfdev, GPU_INT_CLEAR, |
120 | GPU_IRQ_CLEAN_CACHES_COMPLETED | |
121 | GPU_IRQ_PERFCNT_SAMPLE_COMPLETED); |
122 | gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR); |
123 | gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES); |
124 | ret = wait_for_completion_timeout(x: &pfdev->perfcnt->dump_comp, |
125 | timeout: msecs_to_jiffies(m: 1000)); |
126 | if (!ret) { |
127 | ret = -ETIMEDOUT; |
128 | goto err_vunmap; |
129 | } |
130 | |
131 | perfcnt->user = user; |
132 | |
133 | as = panfrost_mmu_as_get(pfdev, mmu: perfcnt->mapping->mmu); |
134 | cfg = GPU_PERFCNT_CFG_AS(as) | |
135 | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL); |
136 | |
137 | /* |
138 | * Bifrost GPUs have 2 set of counters, but we're only interested by |
139 | * the first one for now. |
140 | */ |
141 | if (panfrost_model_is_bifrost(pfdev)) |
142 | cfg |= GPU_PERFCNT_CFG_SETSEL(counterset); |
143 | |
144 | gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff); |
145 | gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff); |
146 | gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff); |
147 | |
148 | /* |
149 | * Due to PRLAM-8186 we need to disable the Tiler before we enable HW |
150 | * counters. |
151 | */ |
152 | if (panfrost_has_hw_issue(pfdev, issue: HW_ISSUE_8186)) |
153 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); |
154 | else |
155 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff); |
156 | |
157 | gpu_write(pfdev, GPU_PERFCNT_CFG, cfg); |
158 | |
159 | if (panfrost_has_hw_issue(pfdev, issue: HW_ISSUE_8186)) |
160 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff); |
161 | |
162 | /* The BO ref is retained by the mapping. */ |
163 | drm_gem_object_put(obj: &bo->base); |
164 | |
165 | return 0; |
166 | |
167 | err_vunmap: |
168 | drm_gem_vunmap_unlocked(obj: &bo->base, map: &map); |
169 | err_put_mapping: |
170 | panfrost_gem_mapping_put(mapping: perfcnt->mapping); |
171 | err_close_bo: |
172 | panfrost_gem_close(obj: &bo->base, file_priv); |
173 | err_put_bo: |
174 | drm_gem_object_put(obj: &bo->base); |
175 | err_put_pm: |
176 | pm_runtime_put(dev: pfdev->dev); |
177 | return ret; |
178 | } |
179 | |
180 | static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, |
181 | struct drm_file *file_priv) |
182 | { |
183 | struct panfrost_file_priv *user = file_priv->driver_priv; |
184 | struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; |
185 | struct iosys_map map = IOSYS_MAP_INIT_VADDR(perfcnt->buf); |
186 | |
187 | if (user != perfcnt->user) |
188 | return -EINVAL; |
189 | |
190 | gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0); |
191 | gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0); |
192 | gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0); |
193 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); |
194 | gpu_write(pfdev, GPU_PERFCNT_CFG, |
195 | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); |
196 | |
197 | perfcnt->user = NULL; |
198 | drm_gem_vunmap_unlocked(obj: &perfcnt->mapping->obj->base.base, map: &map); |
199 | perfcnt->buf = NULL; |
200 | panfrost_gem_close(obj: &perfcnt->mapping->obj->base.base, file_priv); |
201 | panfrost_mmu_as_put(pfdev, mmu: perfcnt->mapping->mmu); |
202 | panfrost_gem_mapping_put(mapping: perfcnt->mapping); |
203 | perfcnt->mapping = NULL; |
204 | pm_runtime_mark_last_busy(dev: pfdev->dev); |
205 | pm_runtime_put_autosuspend(dev: pfdev->dev); |
206 | |
207 | return 0; |
208 | } |
209 | |
210 | int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, |
211 | struct drm_file *file_priv) |
212 | { |
213 | struct panfrost_device *pfdev = dev->dev_private; |
214 | struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; |
215 | struct drm_panfrost_perfcnt_enable *req = data; |
216 | int ret; |
217 | |
218 | ret = panfrost_unstable_ioctl_check(); |
219 | if (ret) |
220 | return ret; |
221 | |
222 | /* Only Bifrost GPUs have 2 set of counters. */ |
223 | if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0)) |
224 | return -EINVAL; |
225 | |
226 | mutex_lock(&perfcnt->lock); |
227 | if (req->enable) |
228 | ret = panfrost_perfcnt_enable_locked(pfdev, file_priv, |
229 | counterset: req->counterset); |
230 | else |
231 | ret = panfrost_perfcnt_disable_locked(pfdev, file_priv); |
232 | mutex_unlock(lock: &perfcnt->lock); |
233 | |
234 | return ret; |
235 | } |
236 | |
237 | int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data, |
238 | struct drm_file *file_priv) |
239 | { |
240 | struct panfrost_device *pfdev = dev->dev_private; |
241 | struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; |
242 | struct drm_panfrost_perfcnt_dump *req = data; |
243 | void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr; |
244 | int ret; |
245 | |
246 | ret = panfrost_unstable_ioctl_check(); |
247 | if (ret) |
248 | return ret; |
249 | |
250 | mutex_lock(&perfcnt->lock); |
251 | if (perfcnt->user != file_priv->driver_priv) { |
252 | ret = -EINVAL; |
253 | goto out; |
254 | } |
255 | |
256 | ret = panfrost_perfcnt_dump_locked(pfdev); |
257 | if (ret) |
258 | goto out; |
259 | |
260 | if (copy_to_user(to: user_ptr, from: perfcnt->buf, n: perfcnt->bosize)) |
261 | ret = -EFAULT; |
262 | |
263 | out: |
264 | mutex_unlock(lock: &perfcnt->lock); |
265 | |
266 | return ret; |
267 | } |
268 | |
269 | void panfrost_perfcnt_close(struct drm_file *file_priv) |
270 | { |
271 | struct panfrost_file_priv *pfile = file_priv->driver_priv; |
272 | struct panfrost_device *pfdev = pfile->pfdev; |
273 | struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; |
274 | |
275 | pm_runtime_get_sync(dev: pfdev->dev); |
276 | mutex_lock(&perfcnt->lock); |
277 | if (perfcnt->user == pfile) |
278 | panfrost_perfcnt_disable_locked(pfdev, file_priv); |
279 | mutex_unlock(lock: &perfcnt->lock); |
280 | pm_runtime_mark_last_busy(dev: pfdev->dev); |
281 | pm_runtime_put_autosuspend(dev: pfdev->dev); |
282 | } |
283 | |
284 | int panfrost_perfcnt_init(struct panfrost_device *pfdev) |
285 | { |
286 | struct panfrost_perfcnt *perfcnt; |
287 | size_t size; |
288 | |
289 | if (panfrost_has_hw_feature(pfdev, feat: HW_FEATURE_V4)) { |
290 | unsigned int ncoregroups; |
291 | |
292 | ncoregroups = hweight64(pfdev->features.l2_present); |
293 | size = ncoregroups * BLOCKS_PER_COREGROUP * |
294 | COUNTERS_PER_BLOCK * BYTES_PER_COUNTER; |
295 | } else { |
296 | unsigned int nl2c, ncores; |
297 | |
298 | /* |
299 | * TODO: define a macro to extract the number of l2 caches from |
300 | * mem_features. |
301 | */ |
302 | nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1; |
303 | |
304 | /* |
305 | * shader_present might be sparse, but the counters layout |
306 | * forces to dump unused regions too, hence the fls64() call |
307 | * instead of hweight64(). |
308 | */ |
309 | ncores = fls64(x: pfdev->features.shader_present); |
310 | |
311 | /* |
312 | * There's always one JM and one Tiler block, hence the '+ 2' |
313 | * here. |
314 | */ |
315 | size = (nl2c + ncores + 2) * |
316 | COUNTERS_PER_BLOCK * BYTES_PER_COUNTER; |
317 | } |
318 | |
319 | perfcnt = devm_kzalloc(dev: pfdev->dev, size: sizeof(*perfcnt), GFP_KERNEL); |
320 | if (!perfcnt) |
321 | return -ENOMEM; |
322 | |
323 | perfcnt->bosize = size; |
324 | |
325 | /* Start with everything disabled. */ |
326 | gpu_write(pfdev, GPU_PERFCNT_CFG, |
327 | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); |
328 | gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0); |
329 | gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0); |
330 | gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0); |
331 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); |
332 | |
333 | init_completion(x: &perfcnt->dump_comp); |
334 | mutex_init(&perfcnt->lock); |
335 | pfdev->perfcnt = perfcnt; |
336 | |
337 | return 0; |
338 | } |
339 | |
340 | void panfrost_perfcnt_fini(struct panfrost_device *pfdev) |
341 | { |
342 | /* Disable everything before leaving. */ |
343 | gpu_write(pfdev, GPU_PERFCNT_CFG, |
344 | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); |
345 | gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0); |
346 | gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0); |
347 | gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0); |
348 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); |
349 | } |
350 | |