1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * Copyright (C) 2015 Broadcom |
4 | */ |
5 | #ifndef _VC4_DRV_H_ |
6 | #define _VC4_DRV_H_ |
7 | |
8 | #include <linux/delay.h> |
9 | #include <linux/of.h> |
10 | #include <linux/refcount.h> |
11 | #include <linux/uaccess.h> |
12 | |
13 | #include <drm/drm_atomic.h> |
14 | #include <drm/drm_debugfs.h> |
15 | #include <drm/drm_device.h> |
16 | #include <drm/drm_encoder.h> |
17 | #include <drm/drm_gem_dma_helper.h> |
18 | #include <drm/drm_managed.h> |
19 | #include <drm/drm_mm.h> |
20 | #include <drm/drm_modeset_lock.h> |
21 | |
22 | #include <kunit/test-bug.h> |
23 | |
24 | #include "uapi/drm/vc4_drm.h" |
25 | |
26 | struct drm_device; |
27 | struct drm_gem_object; |
28 | |
29 | extern const struct drm_driver vc4_drm_driver; |
30 | extern const struct drm_driver vc5_drm_driver; |
31 | |
32 | /* Don't forget to update vc4_bo.c: bo_type_names[] when adding to |
33 | * this. |
34 | */ |
35 | enum vc4_kernel_bo_type { |
36 | /* Any kernel allocation (gem_create_object hook) before it |
37 | * gets another type set. |
38 | */ |
39 | VC4_BO_TYPE_KERNEL, |
40 | VC4_BO_TYPE_V3D, |
41 | VC4_BO_TYPE_V3D_SHADER, |
42 | VC4_BO_TYPE_DUMB, |
43 | VC4_BO_TYPE_BIN, |
44 | VC4_BO_TYPE_RCL, |
45 | VC4_BO_TYPE_BCL, |
46 | VC4_BO_TYPE_KERNEL_CACHE, |
47 | VC4_BO_TYPE_COUNT |
48 | }; |
49 | |
50 | /* Performance monitor object. The perform lifetime is controlled by userspace |
51 | * using perfmon related ioctls. A perfmon can be attached to a submit_cl |
52 | * request, and when this is the case, HW perf counters will be activated just |
53 | * before the submit_cl is submitted to the GPU and disabled when the job is |
54 | * done. This way, only events related to a specific job will be counted. |
55 | */ |
56 | struct vc4_perfmon { |
57 | struct vc4_dev *dev; |
58 | |
59 | /* Tracks the number of users of the perfmon, when this counter reaches |
60 | * zero the perfmon is destroyed. |
61 | */ |
62 | refcount_t refcnt; |
63 | |
64 | /* Number of counters activated in this perfmon instance |
65 | * (should be less than DRM_VC4_MAX_PERF_COUNTERS). |
66 | */ |
67 | u8 ncounters; |
68 | |
69 | /* Events counted by the HW perf counters. */ |
70 | u8 events[DRM_VC4_MAX_PERF_COUNTERS]; |
71 | |
72 | /* Storage for counter values. Counters are incremented by the HW |
73 | * perf counter values every time the perfmon is attached to a GPU job. |
74 | * This way, perfmon users don't have to retrieve the results after |
75 | * each job if they want to track events covering several submissions. |
76 | * Note that counter values can't be reset, but you can fake a reset by |
77 | * destroying the perfmon and creating a new one. |
78 | */ |
79 | u64 counters[] __counted_by(ncounters); |
80 | }; |
81 | |
82 | struct vc4_dev { |
83 | struct drm_device base; |
84 | struct device *dev; |
85 | |
86 | bool is_vc5; |
87 | |
88 | unsigned int irq; |
89 | |
90 | struct vc4_hvs *hvs; |
91 | struct vc4_v3d *v3d; |
92 | |
93 | struct vc4_hang_state *hang_state; |
94 | |
95 | /* The kernel-space BO cache. Tracks buffers that have been |
96 | * unreferenced by all other users (refcounts of 0!) but not |
97 | * yet freed, so we can do cheap allocations. |
98 | */ |
99 | struct vc4_bo_cache { |
100 | /* Array of list heads for entries in the BO cache, |
101 | * based on number of pages, so we can do O(1) lookups |
102 | * in the cache when allocating. |
103 | */ |
104 | struct list_head *size_list; |
105 | uint32_t size_list_size; |
106 | |
107 | /* List of all BOs in the cache, ordered by age, so we |
108 | * can do O(1) lookups when trying to free old |
109 | * buffers. |
110 | */ |
111 | struct list_head time_list; |
112 | struct work_struct time_work; |
113 | struct timer_list time_timer; |
114 | } bo_cache; |
115 | |
116 | u32 num_labels; |
117 | struct vc4_label { |
118 | const char *name; |
119 | u32 num_allocated; |
120 | u32 size_allocated; |
121 | } *bo_labels; |
122 | |
123 | /* Protects bo_cache and bo_labels. */ |
124 | struct mutex bo_lock; |
125 | |
126 | /* Purgeable BO pool. All BOs in this pool can have their memory |
127 | * reclaimed if the driver is unable to allocate new BOs. We also |
128 | * keep stats related to the purge mechanism here. |
129 | */ |
130 | struct { |
131 | struct list_head list; |
132 | unsigned int num; |
133 | size_t size; |
134 | unsigned int purged_num; |
135 | size_t purged_size; |
136 | struct mutex lock; |
137 | } purgeable; |
138 | |
139 | uint64_t dma_fence_context; |
140 | |
141 | /* Sequence number for the last job queued in bin_job_list. |
142 | * Starts at 0 (no jobs emitted). |
143 | */ |
144 | uint64_t emit_seqno; |
145 | |
146 | /* Sequence number for the last completed job on the GPU. |
147 | * Starts at 0 (no jobs completed). |
148 | */ |
149 | uint64_t finished_seqno; |
150 | |
151 | /* List of all struct vc4_exec_info for jobs to be executed in |
152 | * the binner. The first job in the list is the one currently |
153 | * programmed into ct0ca for execution. |
154 | */ |
155 | struct list_head bin_job_list; |
156 | |
157 | /* List of all struct vc4_exec_info for jobs that have |
158 | * completed binning and are ready for rendering. The first |
159 | * job in the list is the one currently programmed into ct1ca |
160 | * for execution. |
161 | */ |
162 | struct list_head render_job_list; |
163 | |
164 | /* List of the finished vc4_exec_infos waiting to be freed by |
165 | * job_done_work. |
166 | */ |
167 | struct list_head job_done_list; |
168 | /* Spinlock used to synchronize the job_list and seqno |
169 | * accesses between the IRQ handler and GEM ioctls. |
170 | */ |
171 | spinlock_t job_lock; |
172 | wait_queue_head_t job_wait_queue; |
173 | struct work_struct job_done_work; |
174 | |
175 | /* Used to track the active perfmon if any. Access to this field is |
176 | * protected by job_lock. |
177 | */ |
178 | struct vc4_perfmon *active_perfmon; |
179 | |
180 | /* List of struct vc4_seqno_cb for callbacks to be made from a |
181 | * workqueue when the given seqno is passed. |
182 | */ |
183 | struct list_head seqno_cb_list; |
184 | |
185 | /* The memory used for storing binner tile alloc, tile state, |
186 | * and overflow memory allocations. This is freed when V3D |
187 | * powers down. |
188 | */ |
189 | struct vc4_bo *bin_bo; |
190 | |
191 | /* Size of blocks allocated within bin_bo. */ |
192 | uint32_t bin_alloc_size; |
193 | |
194 | /* Bitmask of the bin_alloc_size chunks in bin_bo that are |
195 | * used. |
196 | */ |
197 | uint32_t bin_alloc_used; |
198 | |
199 | /* Bitmask of the current bin_alloc used for overflow memory. */ |
200 | uint32_t bin_alloc_overflow; |
201 | |
202 | /* Incremented when an underrun error happened after an atomic commit. |
203 | * This is particularly useful to detect when a specific modeset is too |
204 | * demanding in term of memory or HVS bandwidth which is hard to guess |
205 | * at atomic check time. |
206 | */ |
207 | atomic_t underrun; |
208 | |
209 | struct work_struct overflow_mem_work; |
210 | |
211 | int power_refcount; |
212 | |
213 | /* Set to true when the load tracker is active. */ |
214 | bool load_tracker_enabled; |
215 | |
216 | /* Mutex controlling the power refcount. */ |
217 | struct mutex power_lock; |
218 | |
219 | struct { |
220 | struct timer_list timer; |
221 | struct work_struct reset_work; |
222 | } hangcheck; |
223 | |
224 | struct drm_modeset_lock ctm_state_lock; |
225 | struct drm_private_obj ctm_manager; |
226 | struct drm_private_obj hvs_channels; |
227 | struct drm_private_obj load_tracker; |
228 | |
229 | /* Mutex for binner bo allocation. */ |
230 | struct mutex bin_bo_lock; |
231 | /* Reference count for our binner bo. */ |
232 | struct kref bin_bo_kref; |
233 | }; |
234 | |
235 | #define to_vc4_dev(_dev) \ |
236 | container_of_const(_dev, struct vc4_dev, base) |
237 | |
238 | struct vc4_bo { |
239 | struct drm_gem_dma_object base; |
240 | |
241 | /* seqno of the last job to render using this BO. */ |
242 | uint64_t seqno; |
243 | |
244 | /* seqno of the last job to use the RCL to write to this BO. |
245 | * |
246 | * Note that this doesn't include binner overflow memory |
247 | * writes. |
248 | */ |
249 | uint64_t write_seqno; |
250 | |
251 | bool t_format; |
252 | |
253 | /* List entry for the BO's position in either |
254 | * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list |
255 | */ |
256 | struct list_head unref_head; |
257 | |
258 | /* Time in jiffies when the BO was put in vc4->bo_cache. */ |
259 | unsigned long free_time; |
260 | |
261 | /* List entry for the BO's position in vc4_dev->bo_cache.size_list */ |
262 | struct list_head size_head; |
263 | |
264 | /* Struct for shader validation state, if created by |
265 | * DRM_IOCTL_VC4_CREATE_SHADER_BO. |
266 | */ |
267 | struct vc4_validated_shader_info *validated_shader; |
268 | |
269 | /* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i |
270 | * for user-allocated labels. |
271 | */ |
272 | int label; |
273 | |
274 | /* Count the number of active users. This is needed to determine |
275 | * whether we can move the BO to the purgeable list or not (when the BO |
276 | * is used by the GPU or the display engine we can't purge it). |
277 | */ |
278 | refcount_t usecnt; |
279 | |
280 | /* Store purgeable/purged state here */ |
281 | u32 madv; |
282 | struct mutex madv_lock; |
283 | }; |
284 | |
285 | #define to_vc4_bo(_bo) \ |
286 | container_of_const(to_drm_gem_dma_obj(_bo), struct vc4_bo, base) |
287 | |
288 | struct vc4_fence { |
289 | struct dma_fence base; |
290 | struct drm_device *dev; |
291 | /* vc4 seqno for signaled() test */ |
292 | uint64_t seqno; |
293 | }; |
294 | |
295 | #define to_vc4_fence(_fence) \ |
296 | container_of_const(_fence, struct vc4_fence, base) |
297 | |
298 | struct vc4_seqno_cb { |
299 | struct work_struct work; |
300 | uint64_t seqno; |
301 | void (*func)(struct vc4_seqno_cb *cb); |
302 | }; |
303 | |
304 | struct vc4_v3d { |
305 | struct vc4_dev *vc4; |
306 | struct platform_device *pdev; |
307 | void __iomem *regs; |
308 | struct clk *clk; |
309 | struct debugfs_regset32 regset; |
310 | }; |
311 | |
312 | struct vc4_hvs { |
313 | struct vc4_dev *vc4; |
314 | struct platform_device *pdev; |
315 | void __iomem *regs; |
316 | u32 __iomem *dlist; |
317 | |
318 | struct clk *core_clk; |
319 | |
320 | unsigned long max_core_rate; |
321 | |
322 | /* Memory manager for CRTCs to allocate space in the display |
323 | * list. Units are dwords. |
324 | */ |
325 | struct drm_mm dlist_mm; |
326 | /* Memory manager for the LBM memory used by HVS scaling. */ |
327 | struct drm_mm lbm_mm; |
328 | spinlock_t mm_lock; |
329 | |
330 | struct drm_mm_node mitchell_netravali_filter; |
331 | |
332 | struct debugfs_regset32 regset; |
333 | |
334 | /* |
335 | * Even if HDMI0 on the RPi4 can output modes requiring a pixel |
336 | * rate higher than 297MHz, it needs some adjustments in the |
337 | * config.txt file to be able to do so and thus won't always be |
338 | * available. |
339 | */ |
340 | bool vc5_hdmi_enable_hdmi_20; |
341 | |
342 | /* |
343 | * 4096x2160@60 requires a core overclock to work, so register |
344 | * whether that is sufficient. |
345 | */ |
346 | bool vc5_hdmi_enable_4096by2160; |
347 | }; |
348 | |
349 | #define HVS_NUM_CHANNELS 3 |
350 | |
351 | struct vc4_hvs_state { |
352 | struct drm_private_state base; |
353 | unsigned long core_clock_rate; |
354 | |
355 | struct { |
356 | unsigned in_use: 1; |
357 | unsigned long fifo_load; |
358 | struct drm_crtc_commit *pending_commit; |
359 | } fifo_state[HVS_NUM_CHANNELS]; |
360 | }; |
361 | |
362 | #define to_vc4_hvs_state(_state) \ |
363 | container_of_const(_state, struct vc4_hvs_state, base) |
364 | |
365 | struct vc4_hvs_state *vc4_hvs_get_global_state(struct drm_atomic_state *state); |
366 | struct vc4_hvs_state *vc4_hvs_get_old_global_state(const struct drm_atomic_state *state); |
367 | struct vc4_hvs_state *vc4_hvs_get_new_global_state(const struct drm_atomic_state *state); |
368 | |
369 | struct vc4_plane { |
370 | struct drm_plane base; |
371 | }; |
372 | |
373 | #define to_vc4_plane(_plane) \ |
374 | container_of_const(_plane, struct vc4_plane, base) |
375 | |
376 | enum vc4_scaling_mode { |
377 | VC4_SCALING_NONE, |
378 | VC4_SCALING_TPZ, |
379 | VC4_SCALING_PPF, |
380 | }; |
381 | |
382 | struct vc4_plane_state { |
383 | struct drm_plane_state base; |
384 | /* System memory copy of the display list for this element, computed |
385 | * at atomic_check time. |
386 | */ |
387 | u32 *dlist; |
388 | u32 dlist_size; /* Number of dwords allocated for the display list */ |
389 | u32 dlist_count; /* Number of used dwords in the display list. */ |
390 | |
391 | /* Offset in the dlist to various words, for pageflip or |
392 | * cursor updates. |
393 | */ |
394 | u32 pos0_offset; |
395 | u32 pos2_offset; |
396 | u32 ptr0_offset; |
397 | u32 lbm_offset; |
398 | |
399 | /* Offset where the plane's dlist was last stored in the |
400 | * hardware at vc4_crtc_atomic_flush() time. |
401 | */ |
402 | u32 __iomem *hw_dlist; |
403 | |
404 | /* Clipped coordinates of the plane on the display. */ |
405 | int crtc_x, crtc_y, crtc_w, crtc_h; |
406 | /* Clipped area being scanned from in the FB. */ |
407 | u32 src_x, src_y; |
408 | |
409 | u32 src_w[2], src_h[2]; |
410 | |
411 | /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */ |
412 | enum vc4_scaling_mode x_scaling[2], y_scaling[2]; |
413 | bool is_unity; |
414 | bool is_yuv; |
415 | |
416 | /* Offset to start scanning out from the start of the plane's |
417 | * BO. |
418 | */ |
419 | u32 offsets[3]; |
420 | |
421 | /* Our allocation in LBM for temporary storage during scaling. */ |
422 | struct drm_mm_node lbm; |
423 | |
424 | /* Set when the plane has per-pixel alpha content or does not cover |
425 | * the entire screen. This is a hint to the CRTC that it might need |
426 | * to enable background color fill. |
427 | */ |
428 | bool needs_bg_fill; |
429 | |
430 | /* Mark the dlist as initialized. Useful to avoid initializing it twice |
431 | * when async update is not possible. |
432 | */ |
433 | bool dlist_initialized; |
434 | |
435 | /* Load of this plane on the HVS block. The load is expressed in HVS |
436 | * cycles/sec. |
437 | */ |
438 | u64 hvs_load; |
439 | |
440 | /* Memory bandwidth needed for this plane. This is expressed in |
441 | * bytes/sec. |
442 | */ |
443 | u64 membus_load; |
444 | }; |
445 | |
446 | #define to_vc4_plane_state(_state) \ |
447 | container_of_const(_state, struct vc4_plane_state, base) |
448 | |
449 | enum vc4_encoder_type { |
450 | VC4_ENCODER_TYPE_NONE, |
451 | VC4_ENCODER_TYPE_HDMI0, |
452 | VC4_ENCODER_TYPE_HDMI1, |
453 | VC4_ENCODER_TYPE_VEC, |
454 | VC4_ENCODER_TYPE_DSI0, |
455 | VC4_ENCODER_TYPE_DSI1, |
456 | VC4_ENCODER_TYPE_SMI, |
457 | VC4_ENCODER_TYPE_DPI, |
458 | VC4_ENCODER_TYPE_TXP, |
459 | }; |
460 | |
461 | struct vc4_encoder { |
462 | struct drm_encoder base; |
463 | enum vc4_encoder_type type; |
464 | u32 clock_select; |
465 | |
466 | void (*pre_crtc_configure)(struct drm_encoder *encoder, struct drm_atomic_state *state); |
467 | void (*pre_crtc_enable)(struct drm_encoder *encoder, struct drm_atomic_state *state); |
468 | void (*post_crtc_enable)(struct drm_encoder *encoder, struct drm_atomic_state *state); |
469 | |
470 | void (*post_crtc_disable)(struct drm_encoder *encoder, struct drm_atomic_state *state); |
471 | void (*post_crtc_powerdown)(struct drm_encoder *encoder, struct drm_atomic_state *state); |
472 | }; |
473 | |
474 | #define to_vc4_encoder(_encoder) \ |
475 | container_of_const(_encoder, struct vc4_encoder, base) |
476 | |
477 | static inline |
478 | struct drm_encoder *vc4_find_encoder_by_type(struct drm_device *drm, |
479 | enum vc4_encoder_type type) |
480 | { |
481 | struct drm_encoder *encoder; |
482 | |
483 | drm_for_each_encoder(encoder, drm) { |
484 | struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder); |
485 | |
486 | if (vc4_encoder->type == type) |
487 | return encoder; |
488 | } |
489 | |
490 | return NULL; |
491 | } |
492 | |
493 | struct vc4_crtc_data { |
494 | const char *name; |
495 | |
496 | const char *debugfs_name; |
497 | |
498 | /* Bitmask of channels (FIFOs) of the HVS that the output can source from */ |
499 | unsigned int hvs_available_channels; |
500 | |
501 | /* Which output of the HVS this pixelvalve sources from. */ |
502 | int hvs_output; |
503 | }; |
504 | |
505 | extern const struct vc4_crtc_data vc4_txp_crtc_data; |
506 | |
507 | struct vc4_pv_data { |
508 | struct vc4_crtc_data base; |
509 | |
510 | /* Depth of the PixelValve FIFO in bytes */ |
511 | unsigned int fifo_depth; |
512 | |
513 | /* Number of pixels output per clock period */ |
514 | u8 pixels_per_clock; |
515 | |
516 | enum vc4_encoder_type encoder_types[4]; |
517 | }; |
518 | |
519 | extern const struct vc4_pv_data bcm2835_pv0_data; |
520 | extern const struct vc4_pv_data bcm2835_pv1_data; |
521 | extern const struct vc4_pv_data bcm2835_pv2_data; |
522 | extern const struct vc4_pv_data bcm2711_pv0_data; |
523 | extern const struct vc4_pv_data bcm2711_pv1_data; |
524 | extern const struct vc4_pv_data bcm2711_pv2_data; |
525 | extern const struct vc4_pv_data bcm2711_pv3_data; |
526 | extern const struct vc4_pv_data bcm2711_pv4_data; |
527 | |
528 | struct vc4_crtc { |
529 | struct drm_crtc base; |
530 | struct platform_device *pdev; |
531 | const struct vc4_crtc_data *data; |
532 | void __iomem *regs; |
533 | |
534 | /* Timestamp at start of vblank irq - unaffected by lock delays. */ |
535 | ktime_t t_vblank; |
536 | |
537 | u8 lut_r[256]; |
538 | u8 lut_g[256]; |
539 | u8 lut_b[256]; |
540 | |
541 | struct drm_pending_vblank_event *event; |
542 | |
543 | struct debugfs_regset32 regset; |
544 | |
545 | /** |
546 | * @feeds_txp: True if the CRTC feeds our writeback controller. |
547 | */ |
548 | bool feeds_txp; |
549 | |
550 | /** |
551 | * @irq_lock: Spinlock protecting the resources shared between |
552 | * the atomic code and our vblank handler. |
553 | */ |
554 | spinlock_t irq_lock; |
555 | |
556 | /** |
557 | * @current_dlist: Start offset of the display list currently |
558 | * set in the HVS for that CRTC. Protected by @irq_lock, and |
559 | * copied in vc4_hvs_update_dlist() for the CRTC interrupt |
560 | * handler to have access to that value. |
561 | */ |
562 | unsigned int current_dlist; |
563 | |
564 | /** |
565 | * @current_hvs_channel: HVS channel currently assigned to the |
566 | * CRTC. Protected by @irq_lock, and copied in |
567 | * vc4_hvs_atomic_begin() for the CRTC interrupt handler to have |
568 | * access to that value. |
569 | */ |
570 | unsigned int current_hvs_channel; |
571 | }; |
572 | |
573 | #define to_vc4_crtc(_crtc) \ |
574 | container_of_const(_crtc, struct vc4_crtc, base) |
575 | |
576 | static inline const struct vc4_crtc_data * |
577 | vc4_crtc_to_vc4_crtc_data(const struct vc4_crtc *crtc) |
578 | { |
579 | return crtc->data; |
580 | } |
581 | |
582 | static inline const struct vc4_pv_data * |
583 | vc4_crtc_to_vc4_pv_data(const struct vc4_crtc *crtc) |
584 | { |
585 | const struct vc4_crtc_data *data = vc4_crtc_to_vc4_crtc_data(crtc); |
586 | |
587 | return container_of_const(data, struct vc4_pv_data, base); |
588 | } |
589 | |
590 | struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc, |
591 | struct drm_crtc_state *state); |
592 | |
593 | struct vc4_crtc_state { |
594 | struct drm_crtc_state base; |
595 | /* Dlist area for this CRTC configuration. */ |
596 | struct drm_mm_node mm; |
597 | bool txp_armed; |
598 | unsigned int assigned_channel; |
599 | |
600 | struct { |
601 | unsigned int left; |
602 | unsigned int right; |
603 | unsigned int top; |
604 | unsigned int bottom; |
605 | } margins; |
606 | |
607 | unsigned long hvs_load; |
608 | |
609 | /* Transitional state below, only valid during atomic commits */ |
610 | bool update_muxing; |
611 | }; |
612 | |
613 | #define VC4_HVS_CHANNEL_DISABLED ((unsigned int)-1) |
614 | |
615 | #define to_vc4_crtc_state(_state) \ |
616 | container_of_const(_state, struct vc4_crtc_state, base) |
617 | |
618 | #define V3D_READ(offset) \ |
619 | ({ \ |
620 | kunit_fail_current_test("Accessing a register in a unit test!\n"); \ |
621 | readl(vc4->v3d->regs + (offset)); \ |
622 | }) |
623 | |
624 | #define V3D_WRITE(offset, val) \ |
625 | do { \ |
626 | kunit_fail_current_test("Accessing a register in a unit test!\n"); \ |
627 | writel(val, vc4->v3d->regs + (offset)); \ |
628 | } while (0) |
629 | |
630 | #define HVS_READ(offset) \ |
631 | ({ \ |
632 | kunit_fail_current_test("Accessing a register in a unit test!\n"); \ |
633 | readl(hvs->regs + (offset)); \ |
634 | }) |
635 | |
636 | #define HVS_WRITE(offset, val) \ |
637 | do { \ |
638 | kunit_fail_current_test("Accessing a register in a unit test!\n"); \ |
639 | writel(val, hvs->regs + (offset)); \ |
640 | } while (0) |
641 | |
642 | #define VC4_REG32(reg) { .name = #reg, .offset = reg } |
643 | |
644 | struct vc4_exec_info { |
645 | struct vc4_dev *dev; |
646 | |
647 | /* Sequence number for this bin/render job. */ |
648 | uint64_t seqno; |
649 | |
650 | /* Latest write_seqno of any BO that binning depends on. */ |
651 | uint64_t bin_dep_seqno; |
652 | |
653 | struct dma_fence *fence; |
654 | |
655 | /* Last current addresses the hardware was processing when the |
656 | * hangcheck timer checked on us. |
657 | */ |
658 | uint32_t last_ct0ca, last_ct1ca; |
659 | |
660 | /* Kernel-space copy of the ioctl arguments */ |
661 | struct drm_vc4_submit_cl *args; |
662 | |
663 | /* This is the array of BOs that were looked up at the start of exec. |
664 | * Command validation will use indices into this array. |
665 | */ |
666 | struct drm_gem_object **bo; |
667 | uint32_t bo_count; |
668 | |
669 | /* List of BOs that are being written by the RCL. Other than |
670 | * the binner temporary storage, this is all the BOs written |
671 | * by the job. |
672 | */ |
673 | struct drm_gem_dma_object *rcl_write_bo[4]; |
674 | uint32_t rcl_write_bo_count; |
675 | |
676 | /* Pointers for our position in vc4->job_list */ |
677 | struct list_head head; |
678 | |
679 | /* List of other BOs used in the job that need to be released |
680 | * once the job is complete. |
681 | */ |
682 | struct list_head unref_list; |
683 | |
684 | /* Current unvalidated indices into @bo loaded by the non-hardware |
685 | * VC4_PACKET_GEM_HANDLES. |
686 | */ |
687 | uint32_t bo_index[2]; |
688 | |
689 | /* This is the BO where we store the validated command lists, shader |
690 | * records, and uniforms. |
691 | */ |
692 | struct drm_gem_dma_object *exec_bo; |
693 | |
694 | /** |
695 | * This tracks the per-shader-record state (packet 64) that |
696 | * determines the length of the shader record and the offset |
697 | * it's expected to be found at. It gets read in from the |
698 | * command lists. |
699 | */ |
700 | struct vc4_shader_state { |
701 | uint32_t addr; |
702 | /* Maximum vertex index referenced by any primitive using this |
703 | * shader state. |
704 | */ |
705 | uint32_t max_index; |
706 | } *shader_state; |
707 | |
708 | /** How many shader states the user declared they were using. */ |
709 | uint32_t shader_state_size; |
710 | /** How many shader state records the validator has seen. */ |
711 | uint32_t shader_state_count; |
712 | |
713 | bool found_tile_binning_mode_config_packet; |
714 | bool found_start_tile_binning_packet; |
715 | bool found_increment_semaphore_packet; |
716 | bool found_flush; |
717 | uint8_t bin_tiles_x, bin_tiles_y; |
718 | /* Physical address of the start of the tile alloc array |
719 | * (where each tile's binned CL will start) |
720 | */ |
721 | uint32_t tile_alloc_offset; |
722 | /* Bitmask of which binner slots are freed when this job completes. */ |
723 | uint32_t bin_slots; |
724 | |
725 | /** |
726 | * Computed addresses pointing into exec_bo where we start the |
727 | * bin thread (ct0) and render thread (ct1). |
728 | */ |
729 | uint32_t ct0ca, ct0ea; |
730 | uint32_t ct1ca, ct1ea; |
731 | |
732 | /* Pointer to the unvalidated bin CL (if present). */ |
733 | void *bin_u; |
734 | |
735 | /* Pointers to the shader recs. These paddr gets incremented as CL |
736 | * packets are relocated in validate_gl_shader_state, and the vaddrs |
737 | * (u and v) get incremented and size decremented as the shader recs |
738 | * themselves are validated. |
739 | */ |
740 | void *shader_rec_u; |
741 | void *shader_rec_v; |
742 | uint32_t shader_rec_p; |
743 | uint32_t shader_rec_size; |
744 | |
745 | /* Pointers to the uniform data. These pointers are incremented, and |
746 | * size decremented, as each batch of uniforms is uploaded. |
747 | */ |
748 | void *uniforms_u; |
749 | void *uniforms_v; |
750 | uint32_t uniforms_p; |
751 | uint32_t uniforms_size; |
752 | |
753 | /* Pointer to a performance monitor object if the user requested it, |
754 | * NULL otherwise. |
755 | */ |
756 | struct vc4_perfmon *perfmon; |
757 | |
758 | /* Whether the exec has taken a reference to the binner BO, which should |
759 | * happen with a VC4_PACKET_TILE_BINNING_MODE_CONFIG packet. |
760 | */ |
761 | bool bin_bo_used; |
762 | }; |
763 | |
764 | /* Per-open file private data. Any driver-specific resource that has to be |
765 | * released when the DRM file is closed should be placed here. |
766 | */ |
767 | struct vc4_file { |
768 | struct vc4_dev *dev; |
769 | |
770 | struct { |
771 | struct idr idr; |
772 | struct mutex lock; |
773 | } perfmon; |
774 | |
775 | bool bin_bo_used; |
776 | }; |
777 | |
778 | static inline struct vc4_exec_info * |
779 | vc4_first_bin_job(struct vc4_dev *vc4) |
780 | { |
781 | return list_first_entry_or_null(&vc4->bin_job_list, |
782 | struct vc4_exec_info, head); |
783 | } |
784 | |
785 | static inline struct vc4_exec_info * |
786 | vc4_first_render_job(struct vc4_dev *vc4) |
787 | { |
788 | return list_first_entry_or_null(&vc4->render_job_list, |
789 | struct vc4_exec_info, head); |
790 | } |
791 | |
792 | static inline struct vc4_exec_info * |
793 | vc4_last_render_job(struct vc4_dev *vc4) |
794 | { |
795 | if (list_empty(head: &vc4->render_job_list)) |
796 | return NULL; |
797 | return list_last_entry(&vc4->render_job_list, |
798 | struct vc4_exec_info, head); |
799 | } |
800 | |
801 | /** |
802 | * struct vc4_texture_sample_info - saves the offsets into the UBO for texture |
803 | * setup parameters. |
804 | * |
805 | * This will be used at draw time to relocate the reference to the texture |
806 | * contents in p0, and validate that the offset combined with |
807 | * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO. |
808 | * Note that the hardware treats unprovided config parameters as 0, so not all |
809 | * of them need to be set up for every texure sample, and we'll store ~0 as |
810 | * the offset to mark the unused ones. |
811 | * |
812 | * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit |
813 | * Setup") for definitions of the texture parameters. |
814 | */ |
815 | struct vc4_texture_sample_info { |
816 | bool is_direct; |
817 | uint32_t p_offset[4]; |
818 | }; |
819 | |
820 | /** |
821 | * struct vc4_validated_shader_info - information about validated shaders that |
822 | * needs to be used from command list validation. |
823 | * |
824 | * For a given shader, each time a shader state record references it, we need |
825 | * to verify that the shader doesn't read more uniforms than the shader state |
826 | * record's uniform BO pointer can provide, and we need to apply relocations |
827 | * and validate the shader state record's uniforms that define the texture |
828 | * samples. |
829 | */ |
830 | struct vc4_validated_shader_info { |
831 | uint32_t uniforms_size; |
832 | uint32_t uniforms_src_size; |
833 | uint32_t num_texture_samples; |
834 | struct vc4_texture_sample_info *texture_samples; |
835 | |
836 | uint32_t num_uniform_addr_offsets; |
837 | uint32_t *uniform_addr_offsets; |
838 | |
839 | bool is_threaded; |
840 | }; |
841 | |
842 | /** |
843 | * __wait_for - magic wait macro |
844 | * |
845 | * Macro to help avoid open coding check/wait/timeout patterns. Note that it's |
846 | * important that we check the condition again after having timed out, since the |
847 | * timeout could be due to preemption or similar and we've never had a chance to |
848 | * check the condition before the timeout. |
849 | */ |
850 | #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ |
851 | const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ |
852 | long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ |
853 | int ret__; \ |
854 | might_sleep(); \ |
855 | for (;;) { \ |
856 | const bool expired__ = ktime_after(ktime_get_raw(), end__); \ |
857 | OP; \ |
858 | /* Guarantee COND check prior to timeout */ \ |
859 | barrier(); \ |
860 | if (COND) { \ |
861 | ret__ = 0; \ |
862 | break; \ |
863 | } \ |
864 | if (expired__) { \ |
865 | ret__ = -ETIMEDOUT; \ |
866 | break; \ |
867 | } \ |
868 | usleep_range(wait__, wait__ * 2); \ |
869 | if (wait__ < (Wmax)) \ |
870 | wait__ <<= 1; \ |
871 | } \ |
872 | ret__; \ |
873 | }) |
874 | |
875 | #define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ |
876 | (Wmax)) |
877 | #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) |
878 | |
879 | /* vc4_bo.c */ |
880 | struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); |
881 | struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, |
882 | bool from_cache, enum vc4_kernel_bo_type type); |
883 | int vc4_bo_dumb_create(struct drm_file *file_priv, |
884 | struct drm_device *dev, |
885 | struct drm_mode_create_dumb *args); |
886 | int vc4_create_bo_ioctl(struct drm_device *dev, void *data, |
887 | struct drm_file *file_priv); |
888 | int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, |
889 | struct drm_file *file_priv); |
890 | int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, |
891 | struct drm_file *file_priv); |
892 | int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, |
893 | struct drm_file *file_priv); |
894 | int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, |
895 | struct drm_file *file_priv); |
896 | int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, |
897 | struct drm_file *file_priv); |
898 | int vc4_label_bo_ioctl(struct drm_device *dev, void *data, |
899 | struct drm_file *file_priv); |
900 | int vc4_bo_cache_init(struct drm_device *dev); |
901 | int vc4_bo_inc_usecnt(struct vc4_bo *bo); |
902 | void vc4_bo_dec_usecnt(struct vc4_bo *bo); |
903 | void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo); |
904 | void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo); |
905 | int vc4_bo_debugfs_init(struct drm_minor *minor); |
906 | |
907 | /* vc4_crtc.c */ |
908 | extern struct platform_driver vc4_crtc_driver; |
909 | int vc4_crtc_disable_at_boot(struct drm_crtc *crtc); |
910 | int __vc4_crtc_init(struct drm_device *drm, struct platform_device *pdev, |
911 | struct vc4_crtc *vc4_crtc, const struct vc4_crtc_data *data, |
912 | struct drm_plane *primary_plane, |
913 | const struct drm_crtc_funcs *crtc_funcs, |
914 | const struct drm_crtc_helper_funcs *crtc_helper_funcs, |
915 | bool feeds_txp); |
916 | int vc4_crtc_init(struct drm_device *drm, struct platform_device *pdev, |
917 | struct vc4_crtc *vc4_crtc, const struct vc4_crtc_data *data, |
918 | const struct drm_crtc_funcs *crtc_funcs, |
919 | const struct drm_crtc_helper_funcs *crtc_helper_funcs, |
920 | bool feeds_txp); |
921 | int vc4_page_flip(struct drm_crtc *crtc, |
922 | struct drm_framebuffer *fb, |
923 | struct drm_pending_vblank_event *event, |
924 | uint32_t flags, |
925 | struct drm_modeset_acquire_ctx *ctx); |
926 | int vc4_crtc_atomic_check(struct drm_crtc *crtc, |
927 | struct drm_atomic_state *state); |
928 | struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc); |
929 | void vc4_crtc_destroy_state(struct drm_crtc *crtc, |
930 | struct drm_crtc_state *state); |
931 | void vc4_crtc_reset(struct drm_crtc *crtc); |
932 | void vc4_crtc_handle_vblank(struct vc4_crtc *crtc); |
933 | void vc4_crtc_send_vblank(struct drm_crtc *crtc); |
934 | int vc4_crtc_late_register(struct drm_crtc *crtc); |
935 | void vc4_crtc_get_margins(struct drm_crtc_state *state, |
936 | unsigned int *left, unsigned int *right, |
937 | unsigned int *top, unsigned int *bottom); |
938 | |
939 | /* vc4_debugfs.c */ |
940 | void vc4_debugfs_init(struct drm_minor *minor); |
941 | #ifdef CONFIG_DEBUG_FS |
942 | void vc4_debugfs_add_regset32(struct drm_device *drm, |
943 | const char *filename, |
944 | struct debugfs_regset32 *regset); |
945 | #else |
946 | |
947 | static inline void vc4_debugfs_add_regset32(struct drm_device *drm, |
948 | const char *filename, |
949 | struct debugfs_regset32 *regset) |
950 | {} |
951 | #endif |
952 | |
953 | /* vc4_drv.c */ |
954 | void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); |
955 | int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args); |
956 | |
957 | /* vc4_dpi.c */ |
958 | extern struct platform_driver vc4_dpi_driver; |
959 | |
960 | /* vc4_dsi.c */ |
961 | extern struct platform_driver vc4_dsi_driver; |
962 | |
963 | /* vc4_fence.c */ |
964 | extern const struct dma_fence_ops vc4_fence_ops; |
965 | |
966 | /* vc4_gem.c */ |
967 | int vc4_gem_init(struct drm_device *dev); |
968 | int vc4_submit_cl_ioctl(struct drm_device *dev, void *data, |
969 | struct drm_file *file_priv); |
970 | int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, |
971 | struct drm_file *file_priv); |
972 | int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, |
973 | struct drm_file *file_priv); |
974 | void vc4_submit_next_bin_job(struct drm_device *dev); |
975 | void vc4_submit_next_render_job(struct drm_device *dev); |
976 | void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec); |
977 | int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, |
978 | uint64_t timeout_ns, bool interruptible); |
979 | void vc4_job_handle_completed(struct vc4_dev *vc4); |
980 | int vc4_queue_seqno_cb(struct drm_device *dev, |
981 | struct vc4_seqno_cb *cb, uint64_t seqno, |
982 | void (*func)(struct vc4_seqno_cb *cb)); |
983 | int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, |
984 | struct drm_file *file_priv); |
985 | |
986 | /* vc4_hdmi.c */ |
987 | extern struct platform_driver vc4_hdmi_driver; |
988 | |
989 | /* vc4_vec.c */ |
990 | extern struct platform_driver vc4_vec_driver; |
991 | |
992 | /* vc4_txp.c */ |
993 | extern struct platform_driver vc4_txp_driver; |
994 | |
995 | /* vc4_irq.c */ |
996 | void vc4_irq_enable(struct drm_device *dev); |
997 | void vc4_irq_disable(struct drm_device *dev); |
998 | int vc4_irq_install(struct drm_device *dev, int irq); |
999 | void vc4_irq_uninstall(struct drm_device *dev); |
1000 | void vc4_irq_reset(struct drm_device *dev); |
1001 | |
1002 | /* vc4_hvs.c */ |
1003 | extern struct platform_driver vc4_hvs_driver; |
1004 | struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4, struct platform_device *pdev); |
1005 | void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int output); |
1006 | int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output); |
1007 | u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo); |
1008 | int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state); |
1009 | void vc4_hvs_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state); |
1010 | void vc4_hvs_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state); |
1011 | void vc4_hvs_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state); |
1012 | void vc4_hvs_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state); |
1013 | void vc4_hvs_dump_state(struct vc4_hvs *hvs); |
1014 | void vc4_hvs_unmask_underrun(struct vc4_hvs *hvs, int channel); |
1015 | void vc4_hvs_mask_underrun(struct vc4_hvs *hvs, int channel); |
1016 | int vc4_hvs_debugfs_init(struct drm_minor *minor); |
1017 | |
1018 | /* vc4_kms.c */ |
1019 | int vc4_kms_load(struct drm_device *dev); |
1020 | |
1021 | /* vc4_plane.c */ |
1022 | struct drm_plane *vc4_plane_init(struct drm_device *dev, |
1023 | enum drm_plane_type type, |
1024 | uint32_t possible_crtcs); |
1025 | int vc4_plane_create_additional_planes(struct drm_device *dev); |
1026 | u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); |
1027 | u32 vc4_plane_dlist_size(const struct drm_plane_state *state); |
1028 | void vc4_plane_async_set_fb(struct drm_plane *plane, |
1029 | struct drm_framebuffer *fb); |
1030 | |
1031 | /* vc4_v3d.c */ |
1032 | extern struct platform_driver vc4_v3d_driver; |
1033 | extern const struct of_device_id vc4_v3d_dt_match[]; |
1034 | int vc4_v3d_get_bin_slot(struct vc4_dev *vc4); |
1035 | int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used); |
1036 | void vc4_v3d_bin_bo_put(struct vc4_dev *vc4); |
1037 | int vc4_v3d_pm_get(struct vc4_dev *vc4); |
1038 | void vc4_v3d_pm_put(struct vc4_dev *vc4); |
1039 | int vc4_v3d_debugfs_init(struct drm_minor *minor); |
1040 | |
1041 | /* vc4_validate.c */ |
1042 | int |
1043 | vc4_validate_bin_cl(struct drm_device *dev, |
1044 | void *validated, |
1045 | void *unvalidated, |
1046 | struct vc4_exec_info *exec); |
1047 | |
1048 | int |
1049 | vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); |
1050 | |
1051 | struct drm_gem_dma_object *vc4_use_bo(struct vc4_exec_info *exec, |
1052 | uint32_t hindex); |
1053 | |
1054 | int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec); |
1055 | |
1056 | bool vc4_check_tex_size(struct vc4_exec_info *exec, |
1057 | struct drm_gem_dma_object *fbo, |
1058 | uint32_t offset, uint8_t tiling_format, |
1059 | uint32_t width, uint32_t height, uint8_t cpp); |
1060 | |
1061 | /* vc4_validate_shader.c */ |
1062 | struct vc4_validated_shader_info * |
1063 | vc4_validate_shader(struct drm_gem_dma_object *shader_obj); |
1064 | |
1065 | /* vc4_perfmon.c */ |
1066 | void vc4_perfmon_get(struct vc4_perfmon *perfmon); |
1067 | void vc4_perfmon_put(struct vc4_perfmon *perfmon); |
1068 | void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon); |
1069 | void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, |
1070 | bool capture); |
1071 | struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id); |
1072 | void vc4_perfmon_open_file(struct vc4_file *vc4file); |
1073 | void vc4_perfmon_close_file(struct vc4_file *vc4file); |
1074 | int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, |
1075 | struct drm_file *file_priv); |
1076 | int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data, |
1077 | struct drm_file *file_priv); |
1078 | int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data, |
1079 | struct drm_file *file_priv); |
1080 | |
1081 | #endif /* _VC4_DRV_H_ */ |
1082 | |