1 | /* |
2 | * Copyright © 2014 Broadcom |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice (including the next |
12 | * paragraph) shall be included in all copies or substantial portions of the |
13 | * Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
21 | * IN THE SOFTWARE. |
22 | */ |
23 | |
24 | /** |
25 | * DOC: Command list validator for VC4. |
26 | * |
27 | * Since the VC4 has no IOMMU between it and system memory, a user |
28 | * with access to execute command lists could escalate privilege by |
29 | * overwriting system memory (drawing to it as a framebuffer) or |
30 | * reading system memory it shouldn't (reading it as a vertex buffer |
31 | * or index buffer) |
32 | * |
33 | * We validate binner command lists to ensure that all accesses are |
34 | * within the bounds of the GEM objects referenced by the submitted |
35 | * job. It explicitly whitelists packets, and looks at the offsets in |
36 | * any address fields to make sure they're contained within the BOs |
37 | * they reference. |
38 | * |
39 | * Note that because CL validation is already reading the |
40 | * user-submitted CL and writing the validated copy out to the memory |
41 | * that the GPU will actually read, this is also where GEM relocation |
42 | * processing (turning BO references into actual addresses for the GPU |
43 | * to use) happens. |
44 | */ |
45 | |
46 | #include "uapi/drm/vc4_drm.h" |
47 | #include "vc4_drv.h" |
48 | #include "vc4_packet.h" |
49 | |
50 | #define VALIDATE_ARGS \ |
51 | struct vc4_exec_info *exec, \ |
52 | void *validated, \ |
53 | void *untrusted |
54 | |
55 | /** Return the width in pixels of a 64-byte microtile. */ |
56 | static uint32_t |
57 | utile_width(int cpp) |
58 | { |
59 | switch (cpp) { |
60 | case 1: |
61 | case 2: |
62 | return 8; |
63 | case 4: |
64 | return 4; |
65 | case 8: |
66 | return 2; |
67 | default: |
68 | DRM_ERROR("unknown cpp: %d\n" , cpp); |
69 | return 1; |
70 | } |
71 | } |
72 | |
73 | /** Return the height in pixels of a 64-byte microtile. */ |
74 | static uint32_t |
75 | utile_height(int cpp) |
76 | { |
77 | switch (cpp) { |
78 | case 1: |
79 | return 8; |
80 | case 2: |
81 | case 4: |
82 | case 8: |
83 | return 4; |
84 | default: |
85 | DRM_ERROR("unknown cpp: %d\n" , cpp); |
86 | return 1; |
87 | } |
88 | } |
89 | |
90 | /** |
91 | * size_is_lt() - Returns whether a miplevel of the given size will |
92 | * use the lineartile (LT) tiling layout rather than the normal T |
93 | * tiling layout. |
94 | * @width: Width in pixels of the miplevel |
95 | * @height: Height in pixels of the miplevel |
96 | * @cpp: Bytes per pixel of the pixel format |
97 | */ |
98 | static bool |
99 | size_is_lt(uint32_t width, uint32_t height, int cpp) |
100 | { |
101 | return (width <= 4 * utile_width(cpp) || |
102 | height <= 4 * utile_height(cpp)); |
103 | } |
104 | |
105 | struct drm_gem_dma_object * |
106 | vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex) |
107 | { |
108 | struct vc4_dev *vc4 = exec->dev; |
109 | struct drm_gem_dma_object *obj; |
110 | struct vc4_bo *bo; |
111 | |
112 | if (WARN_ON_ONCE(vc4->is_vc5)) |
113 | return NULL; |
114 | |
115 | if (hindex >= exec->bo_count) { |
116 | DRM_DEBUG("BO index %d greater than BO count %d\n" , |
117 | hindex, exec->bo_count); |
118 | return NULL; |
119 | } |
120 | obj = to_drm_gem_dma_obj(exec->bo[hindex]); |
121 | bo = to_vc4_bo(&obj->base); |
122 | |
123 | if (bo->validated_shader) { |
124 | DRM_DEBUG("Trying to use shader BO as something other than " |
125 | "a shader\n" ); |
126 | return NULL; |
127 | } |
128 | |
129 | return obj; |
130 | } |
131 | |
132 | static struct drm_gem_dma_object * |
133 | vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index) |
134 | { |
135 | return vc4_use_bo(exec, hindex: exec->bo_index[gem_handles_packet_index]); |
136 | } |
137 | |
138 | static bool |
139 | validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos) |
140 | { |
141 | /* Note that the untrusted pointer passed to these functions is |
142 | * incremented past the packet byte. |
143 | */ |
144 | return (untrusted - 1 == exec->bin_u + pos); |
145 | } |
146 | |
147 | static uint32_t |
148 | gl_shader_rec_size(uint32_t pointer_bits) |
149 | { |
150 | uint32_t attribute_count = pointer_bits & 7; |
151 | bool extended = pointer_bits & 8; |
152 | |
153 | if (attribute_count == 0) |
154 | attribute_count = 8; |
155 | |
156 | if (extended) |
157 | return 100 + attribute_count * 4; |
158 | else |
159 | return 36 + attribute_count * 8; |
160 | } |
161 | |
162 | bool |
163 | vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_dma_object *fbo, |
164 | uint32_t offset, uint8_t tiling_format, |
165 | uint32_t width, uint32_t height, uint8_t cpp) |
166 | { |
167 | struct vc4_dev *vc4 = exec->dev; |
168 | uint32_t aligned_width, aligned_height, stride, size; |
169 | uint32_t utile_w = utile_width(cpp); |
170 | uint32_t utile_h = utile_height(cpp); |
171 | |
172 | if (WARN_ON_ONCE(vc4->is_vc5)) |
173 | return false; |
174 | |
175 | /* The shaded vertex format stores signed 12.4 fixed point |
176 | * (-2048,2047) offsets from the viewport center, so we should |
177 | * never have a render target larger than 4096. The texture |
178 | * unit can only sample from 2048x2048, so it's even more |
179 | * restricted. This lets us avoid worrying about overflow in |
180 | * our math. |
181 | */ |
182 | if (width > 4096 || height > 4096) { |
183 | DRM_DEBUG("Surface dimensions (%d,%d) too large" , |
184 | width, height); |
185 | return false; |
186 | } |
187 | |
188 | switch (tiling_format) { |
189 | case VC4_TILING_FORMAT_LINEAR: |
190 | aligned_width = round_up(width, utile_w); |
191 | aligned_height = height; |
192 | break; |
193 | case VC4_TILING_FORMAT_T: |
194 | aligned_width = round_up(width, utile_w * 8); |
195 | aligned_height = round_up(height, utile_h * 8); |
196 | break; |
197 | case VC4_TILING_FORMAT_LT: |
198 | aligned_width = round_up(width, utile_w); |
199 | aligned_height = round_up(height, utile_h); |
200 | break; |
201 | default: |
202 | DRM_DEBUG("buffer tiling %d unsupported\n" , tiling_format); |
203 | return false; |
204 | } |
205 | |
206 | stride = aligned_width * cpp; |
207 | size = stride * aligned_height; |
208 | |
209 | if (size + offset < size || |
210 | size + offset > fbo->base.size) { |
211 | DRM_DEBUG("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n" , |
212 | width, height, |
213 | aligned_width, aligned_height, |
214 | size, offset, fbo->base.size); |
215 | return false; |
216 | } |
217 | |
218 | return true; |
219 | } |
220 | |
221 | static int |
222 | validate_flush(VALIDATE_ARGS) |
223 | { |
224 | if (!validate_bin_pos(exec, untrusted, pos: exec->args->bin_cl_size - 1)) { |
225 | DRM_DEBUG("Bin CL must end with VC4_PACKET_FLUSH\n" ); |
226 | return -EINVAL; |
227 | } |
228 | exec->found_flush = true; |
229 | |
230 | return 0; |
231 | } |
232 | |
233 | static int |
234 | validate_start_tile_binning(VALIDATE_ARGS) |
235 | { |
236 | if (exec->found_start_tile_binning_packet) { |
237 | DRM_DEBUG("Duplicate VC4_PACKET_START_TILE_BINNING\n" ); |
238 | return -EINVAL; |
239 | } |
240 | exec->found_start_tile_binning_packet = true; |
241 | |
242 | if (!exec->found_tile_binning_mode_config_packet) { |
243 | DRM_DEBUG("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n" ); |
244 | return -EINVAL; |
245 | } |
246 | |
247 | return 0; |
248 | } |
249 | |
250 | static int |
251 | validate_increment_semaphore(VALIDATE_ARGS) |
252 | { |
253 | if (!validate_bin_pos(exec, untrusted, pos: exec->args->bin_cl_size - 2)) { |
254 | DRM_DEBUG("Bin CL must end with " |
255 | "VC4_PACKET_INCREMENT_SEMAPHORE\n" ); |
256 | return -EINVAL; |
257 | } |
258 | exec->found_increment_semaphore_packet = true; |
259 | |
260 | return 0; |
261 | } |
262 | |
263 | static int |
264 | validate_indexed_prim_list(VALIDATE_ARGS) |
265 | { |
266 | struct drm_gem_dma_object *ib; |
267 | uint32_t length = *(uint32_t *)(untrusted + 1); |
268 | uint32_t offset = *(uint32_t *)(untrusted + 5); |
269 | uint32_t max_index = *(uint32_t *)(untrusted + 9); |
270 | uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1; |
271 | struct vc4_shader_state *shader_state; |
272 | |
273 | /* Check overflow condition */ |
274 | if (exec->shader_state_count == 0) { |
275 | DRM_DEBUG("shader state must precede primitives\n" ); |
276 | return -EINVAL; |
277 | } |
278 | shader_state = &exec->shader_state[exec->shader_state_count - 1]; |
279 | |
280 | if (max_index > shader_state->max_index) |
281 | shader_state->max_index = max_index; |
282 | |
283 | ib = vc4_use_handle(exec, gem_handles_packet_index: 0); |
284 | if (!ib) |
285 | return -EINVAL; |
286 | |
287 | exec->bin_dep_seqno = max(exec->bin_dep_seqno, |
288 | to_vc4_bo(&ib->base)->write_seqno); |
289 | |
290 | if (offset > ib->base.size || |
291 | (ib->base.size - offset) / index_size < length) { |
292 | DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n" , |
293 | offset, length, index_size, ib->base.size); |
294 | return -EINVAL; |
295 | } |
296 | |
297 | *(uint32_t *)(validated + 5) = ib->dma_addr + offset; |
298 | |
299 | return 0; |
300 | } |
301 | |
302 | static int |
303 | validate_gl_array_primitive(VALIDATE_ARGS) |
304 | { |
305 | uint32_t length = *(uint32_t *)(untrusted + 1); |
306 | uint32_t base_index = *(uint32_t *)(untrusted + 5); |
307 | uint32_t max_index; |
308 | struct vc4_shader_state *shader_state; |
309 | |
310 | /* Check overflow condition */ |
311 | if (exec->shader_state_count == 0) { |
312 | DRM_DEBUG("shader state must precede primitives\n" ); |
313 | return -EINVAL; |
314 | } |
315 | shader_state = &exec->shader_state[exec->shader_state_count - 1]; |
316 | |
317 | if (length + base_index < length) { |
318 | DRM_DEBUG("primitive vertex count overflow\n" ); |
319 | return -EINVAL; |
320 | } |
321 | max_index = length + base_index - 1; |
322 | |
323 | if (max_index > shader_state->max_index) |
324 | shader_state->max_index = max_index; |
325 | |
326 | return 0; |
327 | } |
328 | |
329 | static int |
330 | validate_gl_shader_state(VALIDATE_ARGS) |
331 | { |
332 | uint32_t i = exec->shader_state_count++; |
333 | |
334 | if (i >= exec->shader_state_size) { |
335 | DRM_DEBUG("More requests for shader states than declared\n" ); |
336 | return -EINVAL; |
337 | } |
338 | |
339 | exec->shader_state[i].addr = *(uint32_t *)untrusted; |
340 | exec->shader_state[i].max_index = 0; |
341 | |
342 | if (exec->shader_state[i].addr & ~0xf) { |
343 | DRM_DEBUG("high bits set in GL shader rec reference\n" ); |
344 | return -EINVAL; |
345 | } |
346 | |
347 | *(uint32_t *)validated = (exec->shader_rec_p + |
348 | exec->shader_state[i].addr); |
349 | |
350 | exec->shader_rec_p += |
351 | roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16); |
352 | |
353 | return 0; |
354 | } |
355 | |
356 | static int |
357 | validate_tile_binning_config(VALIDATE_ARGS) |
358 | { |
359 | struct drm_device *dev = exec->exec_bo->base.dev; |
360 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
361 | uint8_t flags; |
362 | uint32_t tile_state_size; |
363 | uint32_t tile_count, bin_addr; |
364 | int bin_slot; |
365 | |
366 | if (exec->found_tile_binning_mode_config_packet) { |
367 | DRM_DEBUG("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n" ); |
368 | return -EINVAL; |
369 | } |
370 | exec->found_tile_binning_mode_config_packet = true; |
371 | |
372 | exec->bin_tiles_x = *(uint8_t *)(untrusted + 12); |
373 | exec->bin_tiles_y = *(uint8_t *)(untrusted + 13); |
374 | tile_count = exec->bin_tiles_x * exec->bin_tiles_y; |
375 | flags = *(uint8_t *)(untrusted + 14); |
376 | |
377 | if (exec->bin_tiles_x == 0 || |
378 | exec->bin_tiles_y == 0) { |
379 | DRM_DEBUG("Tile binning config of %dx%d too small\n" , |
380 | exec->bin_tiles_x, exec->bin_tiles_y); |
381 | return -EINVAL; |
382 | } |
383 | |
384 | if (flags & (VC4_BIN_CONFIG_DB_NON_MS | |
385 | VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) { |
386 | DRM_DEBUG("unsupported binning config flags 0x%02x\n" , flags); |
387 | return -EINVAL; |
388 | } |
389 | |
390 | bin_slot = vc4_v3d_get_bin_slot(vc4); |
391 | if (bin_slot < 0) { |
392 | if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) { |
393 | DRM_ERROR("Failed to allocate binner memory: %d\n" , |
394 | bin_slot); |
395 | } |
396 | return bin_slot; |
397 | } |
398 | |
399 | /* The slot we allocated will only be used by this job, and is |
400 | * free when the job completes rendering. |
401 | */ |
402 | exec->bin_slots |= BIT(bin_slot); |
403 | bin_addr = vc4->bin_bo->base.dma_addr + bin_slot * vc4->bin_alloc_size; |
404 | |
405 | /* The tile state data array is 48 bytes per tile, and we put it at |
406 | * the start of a BO containing both it and the tile alloc. |
407 | */ |
408 | tile_state_size = 48 * tile_count; |
409 | |
410 | /* Since the tile alloc array will follow us, align. */ |
411 | exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096); |
412 | |
413 | *(uint8_t *)(validated + 14) = |
414 | ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK | |
415 | VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) | |
416 | VC4_BIN_CONFIG_AUTO_INIT_TSDA | |
417 | VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32, |
418 | VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) | |
419 | VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128, |
420 | VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE)); |
421 | |
422 | /* tile alloc address. */ |
423 | *(uint32_t *)(validated + 0) = exec->tile_alloc_offset; |
424 | /* tile alloc size. */ |
425 | *(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size - |
426 | exec->tile_alloc_offset); |
427 | /* tile state address. */ |
428 | *(uint32_t *)(validated + 8) = bin_addr; |
429 | |
430 | return 0; |
431 | } |
432 | |
433 | static int |
434 | validate_gem_handles(VALIDATE_ARGS) |
435 | { |
436 | memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index)); |
437 | return 0; |
438 | } |
439 | |
440 | #define VC4_DEFINE_PACKET(packet, func) \ |
441 | [packet] = { packet ## _SIZE, #packet, func } |
442 | |
443 | static const struct cmd_info { |
444 | uint16_t len; |
445 | const char *name; |
446 | int (*func)(struct vc4_exec_info *exec, void *validated, |
447 | void *untrusted); |
448 | } cmd_info[] = { |
449 | VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL), |
450 | VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL), |
451 | VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush), |
452 | VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL), |
453 | VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, |
454 | validate_start_tile_binning), |
455 | VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, |
456 | validate_increment_semaphore), |
457 | |
458 | VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, |
459 | validate_indexed_prim_list), |
460 | VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, |
461 | validate_gl_array_primitive), |
462 | |
463 | VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL), |
464 | |
465 | VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state), |
466 | |
467 | VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL), |
468 | VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL), |
469 | VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL), |
470 | VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL), |
471 | VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL), |
472 | VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL), |
473 | VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL), |
474 | VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL), |
475 | VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL), |
476 | /* Note: The docs say this was also 105, but it was 106 in the |
477 | * initial userland code drop. |
478 | */ |
479 | VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL), |
480 | |
481 | VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, |
482 | validate_tile_binning_config), |
483 | |
484 | VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles), |
485 | }; |
486 | |
487 | int |
488 | vc4_validate_bin_cl(struct drm_device *dev, |
489 | void *validated, |
490 | void *unvalidated, |
491 | struct vc4_exec_info *exec) |
492 | { |
493 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
494 | uint32_t len = exec->args->bin_cl_size; |
495 | uint32_t dst_offset = 0; |
496 | uint32_t src_offset = 0; |
497 | |
498 | if (WARN_ON_ONCE(vc4->is_vc5)) |
499 | return -ENODEV; |
500 | |
501 | while (src_offset < len) { |
502 | void *dst_pkt = validated + dst_offset; |
503 | void *src_pkt = unvalidated + src_offset; |
504 | u8 cmd = *(uint8_t *)src_pkt; |
505 | const struct cmd_info *info; |
506 | |
507 | if (cmd >= ARRAY_SIZE(cmd_info)) { |
508 | DRM_DEBUG("0x%08x: packet %d out of bounds\n" , |
509 | src_offset, cmd); |
510 | return -EINVAL; |
511 | } |
512 | |
513 | info = &cmd_info[cmd]; |
514 | if (!info->name) { |
515 | DRM_DEBUG("0x%08x: packet %d invalid\n" , |
516 | src_offset, cmd); |
517 | return -EINVAL; |
518 | } |
519 | |
520 | if (src_offset + info->len > len) { |
521 | DRM_DEBUG("0x%08x: packet %d (%s) length 0x%08x " |
522 | "exceeds bounds (0x%08x)\n" , |
523 | src_offset, cmd, info->name, info->len, |
524 | src_offset + len); |
525 | return -EINVAL; |
526 | } |
527 | |
528 | if (cmd != VC4_PACKET_GEM_HANDLES) |
529 | memcpy(dst_pkt, src_pkt, info->len); |
530 | |
531 | if (info->func && info->func(exec, |
532 | dst_pkt + 1, |
533 | src_pkt + 1)) { |
534 | DRM_DEBUG("0x%08x: packet %d (%s) failed to validate\n" , |
535 | src_offset, cmd, info->name); |
536 | return -EINVAL; |
537 | } |
538 | |
539 | src_offset += info->len; |
540 | /* GEM handle loading doesn't produce HW packets. */ |
541 | if (cmd != VC4_PACKET_GEM_HANDLES) |
542 | dst_offset += info->len; |
543 | |
544 | /* When the CL hits halt, it'll stop reading anything else. */ |
545 | if (cmd == VC4_PACKET_HALT) |
546 | break; |
547 | } |
548 | |
549 | exec->ct0ea = exec->ct0ca + dst_offset; |
550 | |
551 | if (!exec->found_start_tile_binning_packet) { |
552 | DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n" ); |
553 | return -EINVAL; |
554 | } |
555 | |
556 | /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The |
557 | * semaphore is used to trigger the render CL to start up, and the |
558 | * FLUSH is what caps the bin lists with |
559 | * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main |
560 | * render CL when they get called to) and actually triggers the queued |
561 | * semaphore increment. |
562 | */ |
563 | if (!exec->found_increment_semaphore_packet || !exec->found_flush) { |
564 | DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + " |
565 | "VC4_PACKET_FLUSH\n" ); |
566 | return -EINVAL; |
567 | } |
568 | |
569 | return 0; |
570 | } |
571 | |
572 | static bool |
573 | reloc_tex(struct vc4_exec_info *exec, |
574 | void *uniform_data_u, |
575 | struct vc4_texture_sample_info *sample, |
576 | uint32_t texture_handle_index, bool is_cs) |
577 | { |
578 | struct drm_gem_dma_object *tex; |
579 | uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]); |
580 | uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]); |
581 | uint32_t p2 = (sample->p_offset[2] != ~0 ? |
582 | *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0); |
583 | uint32_t p3 = (sample->p_offset[3] != ~0 ? |
584 | *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0); |
585 | uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0]; |
586 | uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK; |
587 | uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS); |
588 | uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH); |
589 | uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT); |
590 | uint32_t cpp, tiling_format, utile_w, utile_h; |
591 | uint32_t i; |
592 | uint32_t cube_map_stride = 0; |
593 | enum vc4_texture_data_type type; |
594 | |
595 | tex = vc4_use_bo(exec, hindex: texture_handle_index); |
596 | if (!tex) |
597 | return false; |
598 | |
599 | if (sample->is_direct) { |
600 | uint32_t remaining_size = tex->base.size - p0; |
601 | |
602 | if (p0 > tex->base.size - 4) { |
603 | DRM_DEBUG("UBO offset greater than UBO size\n" ); |
604 | goto fail; |
605 | } |
606 | if (p1 > remaining_size - 4) { |
607 | DRM_DEBUG("UBO clamp would allow reads " |
608 | "outside of UBO\n" ); |
609 | goto fail; |
610 | } |
611 | *validated_p0 = tex->dma_addr + p0; |
612 | return true; |
613 | } |
614 | |
615 | if (width == 0) |
616 | width = 2048; |
617 | if (height == 0) |
618 | height = 2048; |
619 | |
620 | if (p0 & VC4_TEX_P0_CMMODE_MASK) { |
621 | if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) == |
622 | VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) |
623 | cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK; |
624 | if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) == |
625 | VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) { |
626 | if (cube_map_stride) { |
627 | DRM_DEBUG("Cube map stride set twice\n" ); |
628 | goto fail; |
629 | } |
630 | |
631 | cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK; |
632 | } |
633 | if (!cube_map_stride) { |
634 | DRM_DEBUG("Cube map stride not set\n" ); |
635 | goto fail; |
636 | } |
637 | } |
638 | |
639 | type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) | |
640 | (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4)); |
641 | |
642 | switch (type) { |
643 | case VC4_TEXTURE_TYPE_RGBA8888: |
644 | case VC4_TEXTURE_TYPE_RGBX8888: |
645 | case VC4_TEXTURE_TYPE_RGBA32R: |
646 | cpp = 4; |
647 | break; |
648 | case VC4_TEXTURE_TYPE_RGBA4444: |
649 | case VC4_TEXTURE_TYPE_RGBA5551: |
650 | case VC4_TEXTURE_TYPE_RGB565: |
651 | case VC4_TEXTURE_TYPE_LUMALPHA: |
652 | case VC4_TEXTURE_TYPE_S16F: |
653 | case VC4_TEXTURE_TYPE_S16: |
654 | cpp = 2; |
655 | break; |
656 | case VC4_TEXTURE_TYPE_LUMINANCE: |
657 | case VC4_TEXTURE_TYPE_ALPHA: |
658 | case VC4_TEXTURE_TYPE_S8: |
659 | cpp = 1; |
660 | break; |
661 | case VC4_TEXTURE_TYPE_ETC1: |
662 | /* ETC1 is arranged as 64-bit blocks, where each block is 4x4 |
663 | * pixels. |
664 | */ |
665 | cpp = 8; |
666 | width = (width + 3) >> 2; |
667 | height = (height + 3) >> 2; |
668 | break; |
669 | case VC4_TEXTURE_TYPE_BW1: |
670 | case VC4_TEXTURE_TYPE_A4: |
671 | case VC4_TEXTURE_TYPE_A1: |
672 | case VC4_TEXTURE_TYPE_RGBA64: |
673 | case VC4_TEXTURE_TYPE_YUV422R: |
674 | default: |
675 | DRM_DEBUG("Texture format %d unsupported\n" , type); |
676 | goto fail; |
677 | } |
678 | utile_w = utile_width(cpp); |
679 | utile_h = utile_height(cpp); |
680 | |
681 | if (type == VC4_TEXTURE_TYPE_RGBA32R) { |
682 | tiling_format = VC4_TILING_FORMAT_LINEAR; |
683 | } else { |
684 | if (size_is_lt(width, height, cpp)) |
685 | tiling_format = VC4_TILING_FORMAT_LT; |
686 | else |
687 | tiling_format = VC4_TILING_FORMAT_T; |
688 | } |
689 | |
690 | if (!vc4_check_tex_size(exec, fbo: tex, offset: offset + cube_map_stride * 5, |
691 | tiling_format, width, height, cpp)) { |
692 | goto fail; |
693 | } |
694 | |
695 | /* The mipmap levels are stored before the base of the texture. Make |
696 | * sure there is actually space in the BO. |
697 | */ |
698 | for (i = 1; i <= miplevels; i++) { |
699 | uint32_t level_width = max(width >> i, 1u); |
700 | uint32_t level_height = max(height >> i, 1u); |
701 | uint32_t aligned_width, aligned_height; |
702 | uint32_t level_size; |
703 | |
704 | /* Once the levels get small enough, they drop from T to LT. */ |
705 | if (tiling_format == VC4_TILING_FORMAT_T && |
706 | size_is_lt(width: level_width, height: level_height, cpp)) { |
707 | tiling_format = VC4_TILING_FORMAT_LT; |
708 | } |
709 | |
710 | switch (tiling_format) { |
711 | case VC4_TILING_FORMAT_T: |
712 | aligned_width = round_up(level_width, utile_w * 8); |
713 | aligned_height = round_up(level_height, utile_h * 8); |
714 | break; |
715 | case VC4_TILING_FORMAT_LT: |
716 | aligned_width = round_up(level_width, utile_w); |
717 | aligned_height = round_up(level_height, utile_h); |
718 | break; |
719 | default: |
720 | aligned_width = round_up(level_width, utile_w); |
721 | aligned_height = level_height; |
722 | break; |
723 | } |
724 | |
725 | level_size = aligned_width * cpp * aligned_height; |
726 | |
727 | if (offset < level_size) { |
728 | DRM_DEBUG("Level %d (%dx%d -> %dx%d) size %db " |
729 | "overflowed buffer bounds (offset %d)\n" , |
730 | i, level_width, level_height, |
731 | aligned_width, aligned_height, |
732 | level_size, offset); |
733 | goto fail; |
734 | } |
735 | |
736 | offset -= level_size; |
737 | } |
738 | |
739 | *validated_p0 = tex->dma_addr + p0; |
740 | |
741 | if (is_cs) { |
742 | exec->bin_dep_seqno = max(exec->bin_dep_seqno, |
743 | to_vc4_bo(&tex->base)->write_seqno); |
744 | } |
745 | |
746 | return true; |
747 | fail: |
748 | DRM_INFO("Texture p0 at %d: 0x%08x\n" , sample->p_offset[0], p0); |
749 | DRM_INFO("Texture p1 at %d: 0x%08x\n" , sample->p_offset[1], p1); |
750 | DRM_INFO("Texture p2 at %d: 0x%08x\n" , sample->p_offset[2], p2); |
751 | DRM_INFO("Texture p3 at %d: 0x%08x\n" , sample->p_offset[3], p3); |
752 | return false; |
753 | } |
754 | |
755 | static int |
756 | validate_gl_shader_rec(struct drm_device *dev, |
757 | struct vc4_exec_info *exec, |
758 | struct vc4_shader_state *state) |
759 | { |
760 | uint32_t *src_handles; |
761 | void *pkt_u, *pkt_v; |
762 | static const uint32_t shader_reloc_offsets[] = { |
763 | 4, /* fs */ |
764 | 16, /* vs */ |
765 | 28, /* cs */ |
766 | }; |
767 | uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets); |
768 | struct drm_gem_dma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8]; |
769 | uint32_t nr_attributes, nr_relocs, packet_size; |
770 | int i; |
771 | |
772 | nr_attributes = state->addr & 0x7; |
773 | if (nr_attributes == 0) |
774 | nr_attributes = 8; |
775 | packet_size = gl_shader_rec_size(pointer_bits: state->addr); |
776 | |
777 | nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes; |
778 | if (nr_relocs * 4 > exec->shader_rec_size) { |
779 | DRM_DEBUG("overflowed shader recs reading %d handles " |
780 | "from %d bytes left\n" , |
781 | nr_relocs, exec->shader_rec_size); |
782 | return -EINVAL; |
783 | } |
784 | src_handles = exec->shader_rec_u; |
785 | exec->shader_rec_u += nr_relocs * 4; |
786 | exec->shader_rec_size -= nr_relocs * 4; |
787 | |
788 | if (packet_size > exec->shader_rec_size) { |
789 | DRM_DEBUG("overflowed shader recs copying %db packet " |
790 | "from %d bytes left\n" , |
791 | packet_size, exec->shader_rec_size); |
792 | return -EINVAL; |
793 | } |
794 | pkt_u = exec->shader_rec_u; |
795 | pkt_v = exec->shader_rec_v; |
796 | memcpy(pkt_v, pkt_u, packet_size); |
797 | exec->shader_rec_u += packet_size; |
798 | /* Shader recs have to be aligned to 16 bytes (due to the attribute |
799 | * flags being in the low bytes), so round the next validated shader |
800 | * rec address up. This should be safe, since we've got so many |
801 | * relocations in a shader rec packet. |
802 | */ |
803 | BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4); |
804 | exec->shader_rec_v += roundup(packet_size, 16); |
805 | exec->shader_rec_size -= packet_size; |
806 | |
807 | for (i = 0; i < shader_reloc_count; i++) { |
808 | if (src_handles[i] > exec->bo_count) { |
809 | DRM_DEBUG("Shader handle %d too big\n" , src_handles[i]); |
810 | return -EINVAL; |
811 | } |
812 | |
813 | bo[i] = to_drm_gem_dma_obj(exec->bo[src_handles[i]]); |
814 | if (!bo[i]) |
815 | return -EINVAL; |
816 | } |
817 | for (i = shader_reloc_count; i < nr_relocs; i++) { |
818 | bo[i] = vc4_use_bo(exec, hindex: src_handles[i]); |
819 | if (!bo[i]) |
820 | return -EINVAL; |
821 | } |
822 | |
823 | if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) != |
824 | to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) { |
825 | DRM_DEBUG("Thread mode of CL and FS do not match\n" ); |
826 | return -EINVAL; |
827 | } |
828 | |
829 | if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded || |
830 | to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) { |
831 | DRM_DEBUG("cs and vs cannot be threaded\n" ); |
832 | return -EINVAL; |
833 | } |
834 | |
835 | for (i = 0; i < shader_reloc_count; i++) { |
836 | struct vc4_validated_shader_info *validated_shader; |
837 | uint32_t o = shader_reloc_offsets[i]; |
838 | uint32_t src_offset = *(uint32_t *)(pkt_u + o); |
839 | uint32_t *texture_handles_u; |
840 | void *uniform_data_u; |
841 | uint32_t tex, uni; |
842 | |
843 | *(uint32_t *)(pkt_v + o) = bo[i]->dma_addr + src_offset; |
844 | |
845 | if (src_offset != 0) { |
846 | DRM_DEBUG("Shaders must be at offset 0 of " |
847 | "the BO.\n" ); |
848 | return -EINVAL; |
849 | } |
850 | |
851 | validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader; |
852 | if (!validated_shader) |
853 | return -EINVAL; |
854 | |
855 | if (validated_shader->uniforms_src_size > |
856 | exec->uniforms_size) { |
857 | DRM_DEBUG("Uniforms src buffer overflow\n" ); |
858 | return -EINVAL; |
859 | } |
860 | |
861 | texture_handles_u = exec->uniforms_u; |
862 | uniform_data_u = (texture_handles_u + |
863 | validated_shader->num_texture_samples); |
864 | |
865 | memcpy(exec->uniforms_v, uniform_data_u, |
866 | validated_shader->uniforms_size); |
867 | |
868 | for (tex = 0; |
869 | tex < validated_shader->num_texture_samples; |
870 | tex++) { |
871 | if (!reloc_tex(exec, |
872 | uniform_data_u, |
873 | sample: &validated_shader->texture_samples[tex], |
874 | texture_handle_index: texture_handles_u[tex], |
875 | is_cs: i == 2)) { |
876 | return -EINVAL; |
877 | } |
878 | } |
879 | |
880 | /* Fill in the uniform slots that need this shader's |
881 | * start-of-uniforms address (used for resetting the uniform |
882 | * stream in the presence of control flow). |
883 | */ |
884 | for (uni = 0; |
885 | uni < validated_shader->num_uniform_addr_offsets; |
886 | uni++) { |
887 | uint32_t o = validated_shader->uniform_addr_offsets[uni]; |
888 | ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p; |
889 | } |
890 | |
891 | *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p; |
892 | |
893 | exec->uniforms_u += validated_shader->uniforms_src_size; |
894 | exec->uniforms_v += validated_shader->uniforms_size; |
895 | exec->uniforms_p += validated_shader->uniforms_size; |
896 | } |
897 | |
898 | for (i = 0; i < nr_attributes; i++) { |
899 | struct drm_gem_dma_object *vbo = |
900 | bo[ARRAY_SIZE(shader_reloc_offsets) + i]; |
901 | uint32_t o = 36 + i * 8; |
902 | uint32_t offset = *(uint32_t *)(pkt_u + o + 0); |
903 | uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1; |
904 | uint32_t stride = *(uint8_t *)(pkt_u + o + 5); |
905 | uint32_t max_index; |
906 | |
907 | exec->bin_dep_seqno = max(exec->bin_dep_seqno, |
908 | to_vc4_bo(&vbo->base)->write_seqno); |
909 | |
910 | if (state->addr & 0x8) |
911 | stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff; |
912 | |
913 | if (vbo->base.size < offset || |
914 | vbo->base.size - offset < attr_size) { |
915 | DRM_DEBUG("BO offset overflow (%d + %d > %zu)\n" , |
916 | offset, attr_size, vbo->base.size); |
917 | return -EINVAL; |
918 | } |
919 | |
920 | if (stride != 0) { |
921 | max_index = ((vbo->base.size - offset - attr_size) / |
922 | stride); |
923 | if (state->max_index > max_index) { |
924 | DRM_DEBUG("primitives use index %d out of " |
925 | "supplied %d\n" , |
926 | state->max_index, max_index); |
927 | return -EINVAL; |
928 | } |
929 | } |
930 | |
931 | *(uint32_t *)(pkt_v + o) = vbo->dma_addr + offset; |
932 | } |
933 | |
934 | return 0; |
935 | } |
936 | |
937 | int |
938 | vc4_validate_shader_recs(struct drm_device *dev, |
939 | struct vc4_exec_info *exec) |
940 | { |
941 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
942 | uint32_t i; |
943 | int ret = 0; |
944 | |
945 | if (WARN_ON_ONCE(vc4->is_vc5)) |
946 | return -ENODEV; |
947 | |
948 | for (i = 0; i < exec->shader_state_count; i++) { |
949 | ret = validate_gl_shader_rec(dev, exec, state: &exec->shader_state[i]); |
950 | if (ret) |
951 | return ret; |
952 | } |
953 | |
954 | return ret; |
955 | } |
956 | |