1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (c) 2014 The Linux Foundation. All rights reserved. |
4 | * Copyright (C) 2013 Red Hat |
5 | * Author: Rob Clark <robdclark@gmail.com> |
6 | */ |
7 | |
8 | #include <linux/clk.h> |
9 | #include <linux/component.h> |
10 | #include <linux/platform_device.h> |
11 | #include <linux/pm_runtime.h> |
12 | |
13 | #include "vc4_drv.h" |
14 | #include "vc4_regs.h" |
15 | |
16 | static const struct debugfs_reg32 v3d_regs[] = { |
17 | VC4_REG32(V3D_IDENT0), |
18 | VC4_REG32(V3D_IDENT1), |
19 | VC4_REG32(V3D_IDENT2), |
20 | VC4_REG32(V3D_SCRATCH), |
21 | VC4_REG32(V3D_L2CACTL), |
22 | VC4_REG32(V3D_SLCACTL), |
23 | VC4_REG32(V3D_INTCTL), |
24 | VC4_REG32(V3D_INTENA), |
25 | VC4_REG32(V3D_INTDIS), |
26 | VC4_REG32(V3D_CT0CS), |
27 | VC4_REG32(V3D_CT1CS), |
28 | VC4_REG32(V3D_CT0EA), |
29 | VC4_REG32(V3D_CT1EA), |
30 | VC4_REG32(V3D_CT0CA), |
31 | VC4_REG32(V3D_CT1CA), |
32 | VC4_REG32(V3D_CT00RA0), |
33 | VC4_REG32(V3D_CT01RA0), |
34 | VC4_REG32(V3D_CT0LC), |
35 | VC4_REG32(V3D_CT1LC), |
36 | VC4_REG32(V3D_CT0PC), |
37 | VC4_REG32(V3D_CT1PC), |
38 | VC4_REG32(V3D_PCS), |
39 | VC4_REG32(V3D_BFC), |
40 | VC4_REG32(V3D_RFC), |
41 | VC4_REG32(V3D_BPCA), |
42 | VC4_REG32(V3D_BPCS), |
43 | VC4_REG32(V3D_BPOA), |
44 | VC4_REG32(V3D_BPOS), |
45 | VC4_REG32(V3D_BXCF), |
46 | VC4_REG32(V3D_SQRSV0), |
47 | VC4_REG32(V3D_SQRSV1), |
48 | VC4_REG32(V3D_SQCNTL), |
49 | VC4_REG32(V3D_SRQPC), |
50 | VC4_REG32(V3D_SRQUA), |
51 | VC4_REG32(V3D_SRQUL), |
52 | VC4_REG32(V3D_SRQCS), |
53 | VC4_REG32(V3D_VPACNTL), |
54 | VC4_REG32(V3D_VPMBASE), |
55 | VC4_REG32(V3D_PCTRC), |
56 | VC4_REG32(V3D_PCTRE), |
57 | VC4_REG32(V3D_PCTR(0)), |
58 | VC4_REG32(V3D_PCTRS(0)), |
59 | VC4_REG32(V3D_PCTR(1)), |
60 | VC4_REG32(V3D_PCTRS(1)), |
61 | VC4_REG32(V3D_PCTR(2)), |
62 | VC4_REG32(V3D_PCTRS(2)), |
63 | VC4_REG32(V3D_PCTR(3)), |
64 | VC4_REG32(V3D_PCTRS(3)), |
65 | VC4_REG32(V3D_PCTR(4)), |
66 | VC4_REG32(V3D_PCTRS(4)), |
67 | VC4_REG32(V3D_PCTR(5)), |
68 | VC4_REG32(V3D_PCTRS(5)), |
69 | VC4_REG32(V3D_PCTR(6)), |
70 | VC4_REG32(V3D_PCTRS(6)), |
71 | VC4_REG32(V3D_PCTR(7)), |
72 | VC4_REG32(V3D_PCTRS(7)), |
73 | VC4_REG32(V3D_PCTR(8)), |
74 | VC4_REG32(V3D_PCTRS(8)), |
75 | VC4_REG32(V3D_PCTR(9)), |
76 | VC4_REG32(V3D_PCTRS(9)), |
77 | VC4_REG32(V3D_PCTR(10)), |
78 | VC4_REG32(V3D_PCTRS(10)), |
79 | VC4_REG32(V3D_PCTR(11)), |
80 | VC4_REG32(V3D_PCTRS(11)), |
81 | VC4_REG32(V3D_PCTR(12)), |
82 | VC4_REG32(V3D_PCTRS(12)), |
83 | VC4_REG32(V3D_PCTR(13)), |
84 | VC4_REG32(V3D_PCTRS(13)), |
85 | VC4_REG32(V3D_PCTR(14)), |
86 | VC4_REG32(V3D_PCTRS(14)), |
87 | VC4_REG32(V3D_PCTR(15)), |
88 | VC4_REG32(V3D_PCTRS(15)), |
89 | VC4_REG32(V3D_DBGE), |
90 | VC4_REG32(V3D_FDBGO), |
91 | VC4_REG32(V3D_FDBGB), |
92 | VC4_REG32(V3D_FDBGR), |
93 | VC4_REG32(V3D_FDBGS), |
94 | VC4_REG32(V3D_ERRSTAT), |
95 | }; |
96 | |
97 | static int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused) |
98 | { |
99 | struct drm_debugfs_entry *entry = m->private; |
100 | struct drm_device *dev = entry->dev; |
101 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
102 | int ret = vc4_v3d_pm_get(vc4); |
103 | |
104 | if (ret == 0) { |
105 | uint32_t ident1 = V3D_READ(V3D_IDENT1); |
106 | uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC); |
107 | uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS); |
108 | uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS); |
109 | |
110 | seq_printf(m, fmt: "Revision: %d\n" , |
111 | VC4_GET_FIELD(ident1, V3D_IDENT1_REV)); |
112 | seq_printf(m, fmt: "Slices: %d\n" , nslc); |
113 | seq_printf(m, fmt: "TMUs: %d\n" , nslc * tups); |
114 | seq_printf(m, fmt: "QPUs: %d\n" , nslc * qups); |
115 | seq_printf(m, fmt: "Semaphores: %d\n" , |
116 | VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM)); |
117 | vc4_v3d_pm_put(vc4); |
118 | } |
119 | |
120 | return 0; |
121 | } |
122 | |
123 | /* |
124 | * Wraps pm_runtime_get_sync() in a refcount, so that we can reliably |
125 | * get the pm_runtime refcount to 0 in vc4_reset(). |
126 | */ |
127 | int |
128 | vc4_v3d_pm_get(struct vc4_dev *vc4) |
129 | { |
130 | if (WARN_ON_ONCE(vc4->is_vc5)) |
131 | return -ENODEV; |
132 | |
133 | mutex_lock(&vc4->power_lock); |
134 | if (vc4->power_refcount++ == 0) { |
135 | int ret = pm_runtime_get_sync(dev: &vc4->v3d->pdev->dev); |
136 | |
137 | if (ret < 0) { |
138 | vc4->power_refcount--; |
139 | mutex_unlock(lock: &vc4->power_lock); |
140 | return ret; |
141 | } |
142 | } |
143 | mutex_unlock(lock: &vc4->power_lock); |
144 | |
145 | return 0; |
146 | } |
147 | |
148 | void |
149 | vc4_v3d_pm_put(struct vc4_dev *vc4) |
150 | { |
151 | if (WARN_ON_ONCE(vc4->is_vc5)) |
152 | return; |
153 | |
154 | mutex_lock(&vc4->power_lock); |
155 | if (--vc4->power_refcount == 0) { |
156 | pm_runtime_mark_last_busy(dev: &vc4->v3d->pdev->dev); |
157 | pm_runtime_put_autosuspend(dev: &vc4->v3d->pdev->dev); |
158 | } |
159 | mutex_unlock(lock: &vc4->power_lock); |
160 | } |
161 | |
162 | static void vc4_v3d_init_hw(struct drm_device *dev) |
163 | { |
164 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
165 | |
166 | /* Take all the memory that would have been reserved for user |
167 | * QPU programs, since we don't have an interface for running |
168 | * them, anyway. |
169 | */ |
170 | V3D_WRITE(V3D_VPMBASE, 0); |
171 | } |
172 | |
173 | int vc4_v3d_get_bin_slot(struct vc4_dev *vc4) |
174 | { |
175 | struct drm_device *dev = &vc4->base; |
176 | unsigned long irqflags; |
177 | int slot; |
178 | uint64_t seqno = 0; |
179 | struct vc4_exec_info *exec; |
180 | |
181 | if (WARN_ON_ONCE(vc4->is_vc5)) |
182 | return -ENODEV; |
183 | |
184 | try_again: |
185 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
186 | slot = ffs(~vc4->bin_alloc_used); |
187 | if (slot != 0) { |
188 | /* Switch from ffs() bit index to a 0-based index. */ |
189 | slot--; |
190 | vc4->bin_alloc_used |= BIT(slot); |
191 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
192 | return slot; |
193 | } |
194 | |
195 | /* Couldn't find an open slot. Wait for render to complete |
196 | * and try again. |
197 | */ |
198 | exec = vc4_last_render_job(vc4); |
199 | if (exec) |
200 | seqno = exec->seqno; |
201 | spin_unlock_irqrestore(lock: &vc4->job_lock, flags: irqflags); |
202 | |
203 | if (seqno) { |
204 | int ret = vc4_wait_for_seqno(dev, seqno, timeout_ns: ~0ull, interruptible: true); |
205 | |
206 | if (ret == 0) |
207 | goto try_again; |
208 | |
209 | return ret; |
210 | } |
211 | |
212 | return -ENOMEM; |
213 | } |
214 | |
215 | /* |
216 | * bin_bo_alloc() - allocates the memory that will be used for |
217 | * tile binning. |
218 | * |
219 | * The binner has a limitation that the addresses in the tile state |
220 | * buffer that point into the tile alloc buffer or binner overflow |
221 | * memory only have 28 bits (256MB), and the top 4 on the bus for |
222 | * tile alloc references end up coming from the tile state buffer's |
223 | * address. |
224 | * |
225 | * To work around this, we allocate a single large buffer while V3D is |
226 | * in use, make sure that it has the top 4 bits constant across its |
227 | * entire extent, and then put the tile state, tile alloc, and binner |
228 | * overflow memory inside that buffer. |
229 | * |
230 | * This creates a limitation where we may not be able to execute a job |
231 | * if it doesn't fit within the buffer that we allocated up front. |
232 | * However, it turns out that 16MB is "enough for anybody", and |
233 | * real-world applications run into allocation failures from the |
234 | * overall DMA pool before they make scenes complicated enough to run |
235 | * out of bin space. |
236 | */ |
237 | static int bin_bo_alloc(struct vc4_dev *vc4) |
238 | { |
239 | struct vc4_v3d *v3d = vc4->v3d; |
240 | uint32_t size = 16 * 1024 * 1024; |
241 | int ret = 0; |
242 | struct list_head list; |
243 | |
244 | if (!v3d) |
245 | return -ENODEV; |
246 | |
247 | /* We may need to try allocating more than once to get a BO |
248 | * that doesn't cross 256MB. Track the ones we've allocated |
249 | * that failed so far, so that we can free them when we've got |
250 | * one that succeeded (if we freed them right away, our next |
251 | * allocation would probably be the same chunk of memory). |
252 | */ |
253 | INIT_LIST_HEAD(list: &list); |
254 | |
255 | while (true) { |
256 | struct vc4_bo *bo = vc4_bo_create(dev: &vc4->base, size, from_cache: true, |
257 | type: VC4_BO_TYPE_BIN); |
258 | |
259 | if (IS_ERR(ptr: bo)) { |
260 | ret = PTR_ERR(ptr: bo); |
261 | |
262 | dev_err(&v3d->pdev->dev, |
263 | "Failed to allocate memory for tile binning: " |
264 | "%d. You may need to enable DMA or give it " |
265 | "more memory." , |
266 | ret); |
267 | break; |
268 | } |
269 | |
270 | /* Check if this BO won't trigger the addressing bug. */ |
271 | if ((bo->base.dma_addr & 0xf0000000) == |
272 | ((bo->base.dma_addr + bo->base.base.size - 1) & 0xf0000000)) { |
273 | vc4->bin_bo = bo; |
274 | |
275 | /* Set up for allocating 512KB chunks of |
276 | * binner memory. The biggest allocation we |
277 | * need to do is for the initial tile alloc + |
278 | * tile state buffer. We can render to a |
279 | * maximum of ((2048*2048) / (32*32) = 4096 |
280 | * tiles in a frame (until we do floating |
281 | * point rendering, at which point it would be |
282 | * 8192). Tile state is 48b/tile (rounded to |
283 | * a page), and tile alloc is 32b/tile |
284 | * (rounded to a page), plus a page of extra, |
285 | * for a total of 320kb for our worst-case. |
286 | * We choose 512kb so that it divides evenly |
287 | * into our 16MB, and the rest of the 512kb |
288 | * will be used as storage for the overflow |
289 | * from the initial 32b CL per bin. |
290 | */ |
291 | vc4->bin_alloc_size = 512 * 1024; |
292 | vc4->bin_alloc_used = 0; |
293 | vc4->bin_alloc_overflow = 0; |
294 | WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 != |
295 | bo->base.base.size / vc4->bin_alloc_size); |
296 | |
297 | kref_init(kref: &vc4->bin_bo_kref); |
298 | |
299 | /* Enable the out-of-memory interrupt to set our |
300 | * newly-allocated binner BO, potentially from an |
301 | * already-pending-but-masked interrupt. |
302 | */ |
303 | V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM); |
304 | |
305 | break; |
306 | } |
307 | |
308 | /* Put it on the list to free later, and try again. */ |
309 | list_add(new: &bo->unref_head, head: &list); |
310 | } |
311 | |
312 | /* Free all the BOs we allocated but didn't choose. */ |
313 | while (!list_empty(head: &list)) { |
314 | struct vc4_bo *bo = list_last_entry(&list, |
315 | struct vc4_bo, unref_head); |
316 | |
317 | list_del(entry: &bo->unref_head); |
318 | drm_gem_object_put(obj: &bo->base.base); |
319 | } |
320 | |
321 | return ret; |
322 | } |
323 | |
324 | int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used) |
325 | { |
326 | int ret = 0; |
327 | |
328 | if (WARN_ON_ONCE(vc4->is_vc5)) |
329 | return -ENODEV; |
330 | |
331 | mutex_lock(&vc4->bin_bo_lock); |
332 | |
333 | if (used && *used) |
334 | goto complete; |
335 | |
336 | if (vc4->bin_bo) |
337 | kref_get(kref: &vc4->bin_bo_kref); |
338 | else |
339 | ret = bin_bo_alloc(vc4); |
340 | |
341 | if (ret == 0 && used) |
342 | *used = true; |
343 | |
344 | complete: |
345 | mutex_unlock(lock: &vc4->bin_bo_lock); |
346 | |
347 | return ret; |
348 | } |
349 | |
350 | static void bin_bo_release(struct kref *ref) |
351 | { |
352 | struct vc4_dev *vc4 = container_of(ref, struct vc4_dev, bin_bo_kref); |
353 | |
354 | if (WARN_ON_ONCE(!vc4->bin_bo)) |
355 | return; |
356 | |
357 | drm_gem_object_put(obj: &vc4->bin_bo->base.base); |
358 | vc4->bin_bo = NULL; |
359 | } |
360 | |
361 | void vc4_v3d_bin_bo_put(struct vc4_dev *vc4) |
362 | { |
363 | if (WARN_ON_ONCE(vc4->is_vc5)) |
364 | return; |
365 | |
366 | mutex_lock(&vc4->bin_bo_lock); |
367 | kref_put(kref: &vc4->bin_bo_kref, release: bin_bo_release); |
368 | mutex_unlock(lock: &vc4->bin_bo_lock); |
369 | } |
370 | |
371 | #ifdef CONFIG_PM |
372 | static int vc4_v3d_runtime_suspend(struct device *dev) |
373 | { |
374 | struct vc4_v3d *v3d = dev_get_drvdata(dev); |
375 | struct vc4_dev *vc4 = v3d->vc4; |
376 | |
377 | vc4_irq_disable(dev: &vc4->base); |
378 | |
379 | clk_disable_unprepare(clk: v3d->clk); |
380 | |
381 | return 0; |
382 | } |
383 | |
384 | static int vc4_v3d_runtime_resume(struct device *dev) |
385 | { |
386 | struct vc4_v3d *v3d = dev_get_drvdata(dev); |
387 | struct vc4_dev *vc4 = v3d->vc4; |
388 | int ret; |
389 | |
390 | ret = clk_prepare_enable(clk: v3d->clk); |
391 | if (ret != 0) |
392 | return ret; |
393 | |
394 | vc4_v3d_init_hw(dev: &vc4->base); |
395 | |
396 | vc4_irq_enable(dev: &vc4->base); |
397 | |
398 | return 0; |
399 | } |
400 | #endif |
401 | |
402 | int vc4_v3d_debugfs_init(struct drm_minor *minor) |
403 | { |
404 | struct drm_device *drm = minor->dev; |
405 | struct vc4_dev *vc4 = to_vc4_dev(drm); |
406 | struct vc4_v3d *v3d = vc4->v3d; |
407 | |
408 | if (!vc4->v3d) |
409 | return -ENODEV; |
410 | |
411 | drm_debugfs_add_file(dev: drm, name: "v3d_ident" , show: vc4_v3d_debugfs_ident, NULL); |
412 | |
413 | vc4_debugfs_add_regset32(drm, filename: "v3d_regs" , regset: &v3d->regset); |
414 | |
415 | return 0; |
416 | } |
417 | |
418 | static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) |
419 | { |
420 | struct platform_device *pdev = to_platform_device(dev); |
421 | struct drm_device *drm = dev_get_drvdata(dev: master); |
422 | struct vc4_dev *vc4 = to_vc4_dev(drm); |
423 | struct vc4_v3d *v3d = NULL; |
424 | int ret; |
425 | |
426 | v3d = devm_kzalloc(dev: &pdev->dev, size: sizeof(*v3d), GFP_KERNEL); |
427 | if (!v3d) |
428 | return -ENOMEM; |
429 | |
430 | dev_set_drvdata(dev, data: v3d); |
431 | |
432 | v3d->pdev = pdev; |
433 | |
434 | v3d->regs = vc4_ioremap_regs(dev: pdev, index: 0); |
435 | if (IS_ERR(ptr: v3d->regs)) |
436 | return PTR_ERR(ptr: v3d->regs); |
437 | v3d->regset.base = v3d->regs; |
438 | v3d->regset.regs = v3d_regs; |
439 | v3d->regset.nregs = ARRAY_SIZE(v3d_regs); |
440 | |
441 | vc4->v3d = v3d; |
442 | v3d->vc4 = vc4; |
443 | |
444 | v3d->clk = devm_clk_get(dev, NULL); |
445 | if (IS_ERR(ptr: v3d->clk)) { |
446 | int ret = PTR_ERR(ptr: v3d->clk); |
447 | |
448 | if (ret == -ENOENT) { |
449 | /* bcm2835 didn't have a clock reference in the DT. */ |
450 | ret = 0; |
451 | v3d->clk = NULL; |
452 | } else { |
453 | if (ret != -EPROBE_DEFER) |
454 | dev_err(dev, "Failed to get V3D clock: %d\n" , |
455 | ret); |
456 | return ret; |
457 | } |
458 | } |
459 | |
460 | ret = platform_get_irq(pdev, 0); |
461 | if (ret < 0) |
462 | return ret; |
463 | vc4->irq = ret; |
464 | |
465 | ret = devm_pm_runtime_enable(dev); |
466 | if (ret) |
467 | return ret; |
468 | |
469 | ret = pm_runtime_resume_and_get(dev); |
470 | if (ret) |
471 | return ret; |
472 | |
473 | if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) { |
474 | DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n" , |
475 | V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0); |
476 | ret = -EINVAL; |
477 | goto err_put_runtime_pm; |
478 | } |
479 | |
480 | /* Reset the binner overflow address/size at setup, to be sure |
481 | * we don't reuse an old one. |
482 | */ |
483 | V3D_WRITE(V3D_BPOA, 0); |
484 | V3D_WRITE(V3D_BPOS, 0); |
485 | |
486 | ret = vc4_irq_install(dev: drm, irq: vc4->irq); |
487 | if (ret) { |
488 | DRM_ERROR("Failed to install IRQ handler\n" ); |
489 | goto err_put_runtime_pm; |
490 | } |
491 | |
492 | pm_runtime_use_autosuspend(dev); |
493 | pm_runtime_set_autosuspend_delay(dev, delay: 40); /* a little over 2 frames. */ |
494 | |
495 | return 0; |
496 | |
497 | err_put_runtime_pm: |
498 | pm_runtime_put(dev); |
499 | |
500 | return ret; |
501 | } |
502 | |
503 | static void vc4_v3d_unbind(struct device *dev, struct device *master, |
504 | void *data) |
505 | { |
506 | struct drm_device *drm = dev_get_drvdata(dev: master); |
507 | struct vc4_dev *vc4 = to_vc4_dev(drm); |
508 | |
509 | vc4_irq_uninstall(dev: drm); |
510 | |
511 | /* Disable the binner's overflow memory address, so the next |
512 | * driver probe (if any) doesn't try to reuse our old |
513 | * allocation. |
514 | */ |
515 | V3D_WRITE(V3D_BPOA, 0); |
516 | V3D_WRITE(V3D_BPOS, 0); |
517 | |
518 | vc4->v3d = NULL; |
519 | } |
520 | |
521 | static const struct dev_pm_ops vc4_v3d_pm_ops = { |
522 | SET_RUNTIME_PM_OPS(vc4_v3d_runtime_suspend, vc4_v3d_runtime_resume, NULL) |
523 | }; |
524 | |
525 | static const struct component_ops vc4_v3d_ops = { |
526 | .bind = vc4_v3d_bind, |
527 | .unbind = vc4_v3d_unbind, |
528 | }; |
529 | |
530 | static int vc4_v3d_dev_probe(struct platform_device *pdev) |
531 | { |
532 | return component_add(&pdev->dev, &vc4_v3d_ops); |
533 | } |
534 | |
535 | static void vc4_v3d_dev_remove(struct platform_device *pdev) |
536 | { |
537 | component_del(&pdev->dev, &vc4_v3d_ops); |
538 | } |
539 | |
540 | const struct of_device_id vc4_v3d_dt_match[] = { |
541 | { .compatible = "brcm,bcm2835-v3d" }, |
542 | { .compatible = "brcm,cygnus-v3d" }, |
543 | { .compatible = "brcm,vc4-v3d" }, |
544 | {} |
545 | }; |
546 | |
547 | struct platform_driver vc4_v3d_driver = { |
548 | .probe = vc4_v3d_dev_probe, |
549 | .remove_new = vc4_v3d_dev_remove, |
550 | .driver = { |
551 | .name = "vc4_v3d" , |
552 | .of_match_table = vc4_v3d_dt_match, |
553 | .pm = &vc4_v3d_pm_ops, |
554 | }, |
555 | }; |
556 | |