vc4_validate_shaders.c source code [linux/drivers/gpu/drm/vc4/vc4_validate_shaders.c]

1	/*
2	* Copyright © 2014 Broadcom
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice (including the next
12	* paragraph) shall be included in all copies or substantial portions of the
13	* Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21	* IN THE SOFTWARE.
22	*/
23
24	/**
25	* DOC: Shader validator for VC4.
26	*
27	* Since the VC4 has no IOMMU between it and system memory, a user
28	* with access to execute shaders could escalate privilege by
29	* overwriting system memory (using the VPM write address register in
30	* the general-purpose DMA mode) or reading system memory it shouldn't
31	* (reading it as a texture, uniform data, or direct-addressed TMU
32	* lookup).
33	*
34	* The shader validator walks over a shader's BO, ensuring that its
35	* accesses are appropriately bounded, and recording where texture
36	* accesses are made so that we can do relocations for them in the
37	* uniform stream.
38	*
39	* Shader BO are immutable for their lifetimes (enforced by not
40	* allowing mmaps, GEM prime export, or rendering to from a CL), so
41	* this validation is only performed at BO creation time.
42	*/
43
44	#include "vc4_drv.h"
45	#include "vc4_qpu_defines.h"
46
47	#define LIVE_REG_COUNT (32 + 32 + 4)
48
49	struct vc4_shader_validation_state {
50	/ Current IP being validated. /
51	uint32_t ip;
52
53	/ IP at the end of the BO, do not read shader[max_ip] /
54	uint32_t max_ip;
55
56	uint64_t *shader;
57
58	struct vc4_texture_sample_info tmu_setup[`2`];
59	int tmu_write_count[`2`];
60
61	/ For registers that were last written to by a MIN instruction with*
62	* one argument being a uniform, the address of the uniform.
63	* Otherwise, ~0.
64	*
65	* This is used for the validation of direct address memory reads.
66	*/
67	uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
68	bool live_max_clamp_regs[LIVE_REG_COUNT];
69	uint32_t live_immediates[LIVE_REG_COUNT];
70
71	/ Bitfield of which IPs are used as branch targets.*
72	*
73	* Used for validation that the uniform stream is updated at the right
74	* points and clearing the texturing/clamping state.
75	*/
76	unsigned long *branch_targets;
77
78	/ Set when entering a basic block, and cleared when the uniform*
79	* address update is found. This is used to make sure that we don't
80	* read uniforms when the address is undefined.
81	*/
82	bool needs_uniform_address_update;
83
84	/ Set when we find a backwards branch. If the branch is backwards,*
85	* the taraget is probably doing an address reset to read uniforms,
86	* and so we need to be sure that a uniforms address is present in the
87	* stream, even if the shader didn't need to read uniforms in later
88	* basic blocks.
89	*/
90	bool needs_uniform_address_for_loop;
91
92	/ Set when we find an instruction writing the top half of the*
93	* register files. If we allowed writing the unusable regs in
94	* a threaded shader, then the other shader running on our
95	* QPU's clamp validation would be invalid.
96	*/
97	bool all_registers_used;
98	};
99
100	static uint32_t
101	waddr_to_live_reg_index(uint32_t waddr, bool is_b)
102	{
103	if (waddr < `32`) {
104	if (is_b)
105	return `32` + waddr;
106	else
107	return waddr;
108	} else if (waddr <= QPU_W_ACC3) {
109	return `64` + waddr - QPU_W_ACC0;
110	} else {
111	return ~`0`;
112	}
113	}
114
115	static uint32_t
116	raddr_add_a_to_live_reg_index(uint64_t inst)
117	{
118	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
119	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
120	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
121	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
122
123	if (add_a == QPU_MUX_A)
124	return raddr_a;
125	else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
126	return `32` + raddr_b;
127	else if (add_a <= QPU_MUX_R3)
128	return `64` + add_a;
129	else
130	return ~`0`;
131	}
132
133	static bool
134	live_reg_is_upper_half(uint32_t lri)
135	{
136	return (lri >= `16` && lri < `32`) \|\|
137	(lri >= `32` + `16` && lri < `32` + `32`);
138	}
139
140	static bool
141	is_tmu_submit(uint32_t waddr)
142	{
143	return (waddr == QPU_W_TMU0_S \|\|
144	waddr == QPU_W_TMU1_S);
145	}
146
147	static bool
148	is_tmu_write(uint32_t waddr)
149	{
150	return (waddr >= QPU_W_TMU0_S &&
151	waddr <= QPU_W_TMU1_B);
152	}
153
154	static bool
155	record_texture_sample(struct vc4_validated_shader_info *validated_shader,
156	struct vc4_shader_validation_state *validation_state,
157	int tmu)
158	{
159	uint32_t s = validated_shader->num_texture_samples;
160	int i;
161	struct vc4_texture_sample_info *temp_samples;
162
163	temp_samples = krealloc(objp: validated_shader->texture_samples,
164	new_size: (s + `1`) * sizeof(*temp_samples),
165	GFP_KERNEL);
166	if (!temp_samples)
167	return false;
168
169	memcpy(&temp_samples[s],
170	&validation_state->tmu_setup[tmu],
171	sizeof(*temp_samples));
172
173	validated_shader->num_texture_samples = s + `1`;
174	validated_shader->texture_samples = temp_samples;
175
176	for (i = `0`; i < `4`; i++)
177	validation_state->tmu_setup[tmu].p_offset[i] = ~`0`;
178
179	return true;
180	}
181
182	static bool
183	check_tmu_write(struct vc4_validated_shader_info *validated_shader,
184	struct vc4_shader_validation_state *validation_state,
185	bool is_mul)
186	{
187	uint64_t inst = validation_state->shader[validation_state->ip];
188	uint32_t waddr = (is_mul ?
189	QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
190	QPU_GET_FIELD(inst, QPU_WADDR_ADD));
191	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
192	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
193	int tmu = waddr > QPU_W_TMU0_B;
194	bool submit = is_tmu_submit(waddr);
195	bool is_direct = submit && validation_state->tmu_write_count[tmu] == `0`;
196	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
197
198	if (is_direct) {
199	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
200	uint32_t clamp_reg, clamp_offset;
201
202	if (sig == QPU_SIG_SMALL_IMM) {
203	DRM_DEBUG("direct TMU read used small immediate\n");
204	return false;
205	}
206
207	/ Make sure that this texture load is an add of the base*
208	* address of the UBO to a clamped offset within the UBO.
209	*/
210	if (is_mul \|\|
211	QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
212	DRM_DEBUG("direct TMU load wasn't an add\n");
213	return false;
214	}
215
216	/ We assert that the clamped address is the first*
217	* argument, and the UBO base address is the second argument.
218	* This is arbitrary, but simpler than supporting flipping the
219	* two either way.
220	*/
221	clamp_reg = raddr_add_a_to_live_reg_index(inst);
222	if (clamp_reg == ~`0`) {
223	DRM_DEBUG("direct TMU load wasn't clamped\n");
224	return false;
225	}
226
227	clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
228	if (clamp_offset == ~`0`) {
229	DRM_DEBUG("direct TMU load wasn't clamped\n");
230	return false;
231	}
232
233	/ Store the clamp value's offset in p1 (see reloc_tex() in*
234	* vc4_validate.c).
235	*/
236	validation_state->tmu_setup[tmu].p_offset[`1`] =
237	clamp_offset;
238
239	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
240	!(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
241	DRM_DEBUG("direct TMU load didn't add to a uniform\n");
242	return false;
243	}
244
245	validation_state->tmu_setup[tmu].is_direct = true;
246	} else {
247	if (raddr_a == QPU_R_UNIF \|\| (sig != QPU_SIG_SMALL_IMM &&
248	raddr_b == QPU_R_UNIF)) {
249	DRM_DEBUG("uniform read in the same instruction as "
250	"texture setup.\n");
251	return false;
252	}
253	}
254
255	if (validation_state->tmu_write_count[tmu] >= `4`) {
256	DRM_DEBUG("TMU%d got too many parameters before dispatch\n",
257	tmu);
258	return false;
259	}
260	validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
261	validated_shader->uniforms_size;
262	validation_state->tmu_write_count[tmu]++;
263	/ Since direct uses a RADDR uniform reference, it will get counted in*
264	* check_instruction_reads()
265	*/
266	if (!is_direct) {
267	if (validation_state->needs_uniform_address_update) {
268	DRM_DEBUG("Texturing with undefined uniform address\n");
269	return false;
270	}
271
272	validated_shader->uniforms_size += `4`;
273	}
274
275	if (submit) {
276	if (!record_texture_sample(validated_shader,
277	validation_state, tmu)) {
278	return false;
279	}
280
281	validation_state->tmu_write_count[tmu] = `0`;
282	}
283
284	return true;
285	}
286
287	static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
288	{
289	uint32_t o = validated_shader->num_uniform_addr_offsets;
290	uint32_t num_uniforms = validated_shader->uniforms_size / `4`;
291
292	validated_shader->uniform_addr_offsets =
293	krealloc(objp: validated_shader->uniform_addr_offsets,
294	new_size: (o + `1`) *
295	sizeof(*validated_shader->uniform_addr_offsets),
296	GFP_KERNEL);
297	if (!validated_shader->uniform_addr_offsets)
298	return false;
299
300	validated_shader->uniform_addr_offsets[o] = num_uniforms;
301	validated_shader->num_uniform_addr_offsets++;
302
303	return true;
304	}
305
306	static bool
307	validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
308	struct vc4_shader_validation_state *validation_state,
309	bool is_mul)
310	{
311	uint64_t inst = validation_state->shader[validation_state->ip];
312	u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
313	u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
314	u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
315	u32 add_lri = raddr_add_a_to_live_reg_index(inst);
316	/ We want our reset to be pointing at whatever uniform follows the*
317	* uniforms base address.
318	*/
319	u32 expected_offset = validated_shader->uniforms_size + `4`;
320
321	/ We only support absolute uniform address changes, and we*
322	* require that they be in the current basic block before any
323	* of its uniform reads.
324	*
325	* One could potentially emit more efficient QPU code, by
326	* noticing that (say) an if statement does uniform control
327	* flow for all threads and that the if reads the same number
328	* of uniforms on each side. However, this scheme is easy to
329	* validate so it's all we allow for now.
330	*/
331	switch (QPU_GET_FIELD(inst, QPU_SIG)) {
332	case QPU_SIG_NONE:
333	case QPU_SIG_SCOREBOARD_UNLOCK:
334	case QPU_SIG_COLOR_LOAD:
335	case QPU_SIG_LOAD_TMU0:
336	case QPU_SIG_LOAD_TMU1:
337	break;
338	default:
339	DRM_DEBUG("uniforms address change must be "
340	"normal math\n");
341	return false;
342	}
343
344	if (is_mul \|\| QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
345	DRM_DEBUG("Uniform address reset must be an ADD.\n");
346	return false;
347	}
348
349	if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
350	DRM_DEBUG("Uniform address reset must be unconditional.\n");
351	return false;
352	}
353
354	if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
355	!(inst & QPU_PM)) {
356	DRM_DEBUG("No packing allowed on uniforms reset\n");
357	return false;
358	}
359
360	if (add_lri == -`1`) {
361	DRM_DEBUG("First argument of uniform address write must be "
362	"an immediate value.\n");
363	return false;
364	}
365
366	if (validation_state->live_immediates[add_lri] != expected_offset) {
367	DRM_DEBUG("Resetting uniforms with offset %db instead of %db\n",
368	validation_state->live_immediates[add_lri],
369	expected_offset);
370	return false;
371	}
372
373	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
374	!(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
375	DRM_DEBUG("Second argument of uniform address write must be "
376	"a uniform.\n");
377	return false;
378	}
379
380	validation_state->needs_uniform_address_update = false;
381	validation_state->needs_uniform_address_for_loop = false;
382	return require_uniform_address_uniform(validated_shader);
383	}
384
385	static bool
386	check_reg_write(struct vc4_validated_shader_info *validated_shader,
387	struct vc4_shader_validation_state *validation_state,
388	bool is_mul)
389	{
390	uint64_t inst = validation_state->shader[validation_state->ip];
391	uint32_t waddr = (is_mul ?
392	QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
393	QPU_GET_FIELD(inst, QPU_WADDR_ADD));
394	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
395	bool ws = inst & QPU_WS;
396	bool is_b = is_mul ^ ws;
397	u32 lri = waddr_to_live_reg_index(waddr, is_b);
398
399	if (lri != -`1`) {
400	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
401	uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
402
403	if (sig == QPU_SIG_LOAD_IMM &&
404	QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
405	((is_mul && cond_mul == QPU_COND_ALWAYS) \|\|
406	(!is_mul && cond_add == QPU_COND_ALWAYS))) {
407	validation_state->live_immediates[lri] =
408	QPU_GET_FIELD(inst, QPU_LOAD_IMM);
409	} else {
410	validation_state->live_immediates[lri] = ~`0`;
411	}
412
413	if (live_reg_is_upper_half(lri))
414	validation_state->all_registers_used = true;
415	}
416
417	switch (waddr) {
418	case QPU_W_UNIFORMS_ADDRESS:
419	if (is_b) {
420	DRM_DEBUG("relative uniforms address change "
421	"unsupported\n");
422	return false;
423	}
424
425	return validate_uniform_address_write(validated_shader,
426	validation_state,
427	is_mul);
428
429	case QPU_W_TLB_COLOR_MS:
430	case QPU_W_TLB_COLOR_ALL:
431	case QPU_W_TLB_Z:
432	/ These only interact with the tile buffer, not main memory,*
433	* so they're safe.
434	*/
435	return true;
436
437	case QPU_W_TMU0_S:
438	case QPU_W_TMU0_T:
439	case QPU_W_TMU0_R:
440	case QPU_W_TMU0_B:
441	case QPU_W_TMU1_S:
442	case QPU_W_TMU1_T:
443	case QPU_W_TMU1_R:
444	case QPU_W_TMU1_B:
445	return check_tmu_write(validated_shader, validation_state,
446	is_mul);
447
448	case QPU_W_HOST_INT:
449	case QPU_W_TMU_NOSWAP:
450	case QPU_W_TLB_ALPHA_MASK:
451	case QPU_W_MUTEX_RELEASE:
452	/ XXX: I haven't thought about these, so don't support them*
453	* for now.
454	*/
455	DRM_DEBUG("Unsupported waddr %d\n", waddr);
456	return false;
457
458	case QPU_W_VPM_ADDR:
459	DRM_DEBUG("General VPM DMA unsupported\n");
460	return false;
461
462	case QPU_W_VPM:
463	case QPU_W_VPMVCD_SETUP:
464	/ We allow VPM setup in general, even including VPM DMA*
465	* configuration setup, because the (unsafe) DMA can only be
466	* triggered by QPU_W_VPM_ADDR writes.
467	*/
468	return true;
469
470	case QPU_W_TLB_STENCIL_SETUP:
471	return true;
472	}
473
474	return true;
475	}
476
477	static void
478	track_live_clamps(struct vc4_validated_shader_info *validated_shader,
479	struct vc4_shader_validation_state *validation_state)
480	{
481	uint64_t inst = validation_state->shader[validation_state->ip];
482	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
483	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
484	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
485	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
486	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
487	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
488	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
489	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
490	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
491	bool ws = inst & QPU_WS;
492	uint32_t lri_add_a, lri_add, lri_mul;
493	bool add_a_is_min_0;
494
495	/ Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),*
496	* before we clear previous live state.
497	*/
498	lri_add_a = raddr_add_a_to_live_reg_index(inst);
499	add_a_is_min_0 = (lri_add_a != ~`0` &&
500	validation_state->live_max_clamp_regs[lri_add_a]);
501
502	/ Clear live state for registers written by our instruction. /
503	lri_add = waddr_to_live_reg_index(waddr: waddr_add, is_b: ws);
504	lri_mul = waddr_to_live_reg_index(waddr: waddr_mul, is_b: !ws);
505	if (lri_mul != ~`0`) {
506	validation_state->live_max_clamp_regs[lri_mul] = false;
507	validation_state->live_min_clamp_offsets[lri_mul] = ~`0`;
508	}
509	if (lri_add != ~`0`) {
510	validation_state->live_max_clamp_regs[lri_add] = false;
511	validation_state->live_min_clamp_offsets[lri_add] = ~`0`;
512	} else {
513	/ Nothing further to do for live tracking, since only ADDs*
514	* generate new live clamp registers.
515	*/
516	return;
517	}
518
519	/ Now, handle remaining live clamp tracking for the ADD operation. /
520
521	if (cond_add != QPU_COND_ALWAYS)
522	return;
523
524	if (op_add == QPU_A_MAX) {
525	/ Track live clamps of a value to a minimum of 0 (in either*
526	* arg).
527	*/
528	if (sig != QPU_SIG_SMALL_IMM \|\| raddr_b != `0` \|\|
529	(add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
530	return;
531	}
532
533	validation_state->live_max_clamp_regs[lri_add] = true;
534	} else if (op_add == QPU_A_MIN) {
535	/ Track live clamps of a value clamped to a minimum of 0 and*
536	* a maximum of some uniform's offset.
537	*/
538	if (!add_a_is_min_0)
539	return;
540
541	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
542	!(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
543	sig != QPU_SIG_SMALL_IMM)) {
544	return;
545	}
546
547	validation_state->live_min_clamp_offsets[lri_add] =
548	validated_shader->uniforms_size;
549	}
550	}
551
552	static bool
553	check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
554	struct vc4_shader_validation_state *validation_state)
555	{
556	uint64_t inst = validation_state->shader[validation_state->ip];
557	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
558	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
559	bool ok;
560
561	if (is_tmu_write(waddr: waddr_add) && is_tmu_write(waddr: waddr_mul)) {
562	DRM_DEBUG("ADD and MUL both set up textures\n");
563	return false;
564	}
565
566	ok = (check_reg_write(validated_shader, validation_state, is_mul: false) &&
567	check_reg_write(validated_shader, validation_state, is_mul: true));
568
569	track_live_clamps(validated_shader, validation_state);
570
571	return ok;
572	}
573
574	static bool
575	check_branch(uint64_t inst,
576	struct vc4_validated_shader_info *validated_shader,
577	struct vc4_shader_validation_state *validation_state,
578	int ip)
579	{
580	int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
581	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
582	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
583
584	if ((int)branch_imm < `0`)
585	validation_state->needs_uniform_address_for_loop = true;
586
587	/ We don't want to have to worry about validation of this, and*
588	* there's no need for it.
589	*/
590	if (waddr_add != QPU_W_NOP \|\| waddr_mul != QPU_W_NOP) {
591	DRM_DEBUG("branch instruction at %d wrote a register.\n",
592	validation_state->ip);
593	return false;
594	}
595
596	return true;
597	}
598
599	static bool
600	check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
601	struct vc4_shader_validation_state *validation_state)
602	{
603	uint64_t inst = validation_state->shader[validation_state->ip];
604	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
605	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
606	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
607
608	if (raddr_a == QPU_R_UNIF \|\|
609	(raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
610	/ This can't overflow the uint32_t, because we're reading 8*
611	* bytes of instruction to increment by 4 here, so we'd
612	* already be OOM.
613	*/
614	validated_shader->uniforms_size += `4`;
615
616	if (validation_state->needs_uniform_address_update) {
617	DRM_DEBUG("Uniform read with undefined uniform "
618	"address\n");
619	return false;
620	}
621	}
622
623	if ((raddr_a >= `16` && raddr_a < `32`) \|\|
624	(raddr_b >= `16` && raddr_b < `32` && sig != QPU_SIG_SMALL_IMM)) {
625	validation_state->all_registers_used = true;
626	}
627
628	return true;
629	}
630
631	/ Make sure that all branches are absolute and point within the shader, and*
632	* note their targets for later.
633	*/
634	static bool
635	vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
636	{
637	uint32_t max_branch_target = `0`;
638	int ip;
639	int last_branch = -`2`;
640
641	for (ip = `0`; ip < validation_state->max_ip; ip++) {
642	uint64_t inst = validation_state->shader[ip];
643	int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
644	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
645	uint32_t after_delay_ip = ip + `4`;
646	uint32_t branch_target_ip;
647
648	if (sig == QPU_SIG_PROG_END) {
649	/ There are two delay slots after program end is*
650	* signaled that are still executed, then we're
651	* finished. validation_state->max_ip is the
652	* instruction after the last valid instruction in the
653	* program.
654	*/
655	validation_state->max_ip = ip + `3`;
656	continue;
657	}
658
659	if (sig != QPU_SIG_BRANCH)
660	continue;
661
662	if (ip - last_branch < `4`) {
663	DRM_DEBUG("Branch at %d during delay slots\n", ip);
664	return false;
665	}
666	last_branch = ip;
667
668	if (inst & QPU_BRANCH_REG) {
669	DRM_DEBUG("branching from register relative "
670	"not supported\n");
671	return false;
672	}
673
674	if (!(inst & QPU_BRANCH_REL)) {
675	DRM_DEBUG("relative branching required\n");
676	return false;
677	}
678
679	/ The actual branch target is the instruction after the delay*
680	* slots, plus whatever byte offset is in the low 32 bits of
681	* the instruction. Make sure we're not branching beyond the
682	* end of the shader object.
683	*/
684	if (branch_imm % sizeof(inst) != `0`) {
685	DRM_DEBUG("branch target not aligned\n");
686	return false;
687	}
688
689	branch_target_ip = after_delay_ip + (branch_imm >> `3`);
690	if (branch_target_ip >= validation_state->max_ip) {
691	DRM_DEBUG("Branch at %d outside of shader (ip %d/%d)\n",
692	ip, branch_target_ip,
693	validation_state->max_ip);
694	return false;
695	}
696	set_bit(nr: branch_target_ip, addr: validation_state->branch_targets);
697
698	/ Make sure that the non-branching path is also not outside*
699	* the shader.
700	*/
701	if (after_delay_ip >= validation_state->max_ip) {
702	DRM_DEBUG("Branch at %d continues past shader end "
703	"(%d/%d)\n",
704	ip, after_delay_ip, validation_state->max_ip);
705	return false;
706	}
707	set_bit(nr: after_delay_ip, addr: validation_state->branch_targets);
708	max_branch_target = max(max_branch_target, after_delay_ip);
709	}
710
711	if (max_branch_target > validation_state->max_ip - `3`) {
712	DRM_DEBUG("Branch landed after QPU_SIG_PROG_END");
713	return false;
714	}
715
716	return true;
717	}
718
719	/ Resets any known state for the shader, used when we may be branched to from*
720	* multiple locations in the program (or at shader start).
721	*/
722	static void
723	reset_validation_state(struct vc4_shader_validation_state *validation_state)
724	{
725	int i;
726
727	for (i = `0`; i < `8`; i++)
728	validation_state->tmu_setup[i / `4`].p_offset[i % `4`] = ~`0`;
729
730	for (i = `0`; i < LIVE_REG_COUNT; i++) {
731	validation_state->live_min_clamp_offsets[i] = ~`0`;
732	validation_state->live_max_clamp_regs[i] = false;
733	validation_state->live_immediates[i] = ~`0`;
734	}
735	}
736
737	static bool
738	texturing_in_progress(struct vc4_shader_validation_state *validation_state)
739	{
740	return (validation_state->tmu_write_count[`0`] != `0` \|\|
741	validation_state->tmu_write_count[`1`] != `0`);
742	}
743
744	static bool
745	vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
746	{
747	uint32_t ip = validation_state->ip;
748
749	if (!test_bit(ip, validation_state->branch_targets))
750	return true;
751
752	if (texturing_in_progress(validation_state)) {
753	DRM_DEBUG("Branch target landed during TMU setup\n");
754	return false;
755	}
756
757	/ Reset our live values tracking, since this instruction may have*
758	* multiple predecessors.
759	*
760	* One could potentially do analysis to determine that, for
761	* example, all predecessors have a live max clamp in the same
762	* register, but we don't bother with that.
763	*/
764	reset_validation_state(validation_state);
765
766	/ Since we've entered a basic block from potentially multiple*
767	* predecessors, we need the uniforms address to be updated before any
768	* unforms are read. We require that after any branch point, the next
769	* uniform to be loaded is a uniform address offset. That uniform's
770	* offset will be marked by the uniform address register write
771	* validation, or a one-off the end-of-program check.
772	*/
773	validation_state->needs_uniform_address_update = true;
774
775	return true;
776	}
777
778	struct vc4_validated_shader_info *
779	vc4_validate_shader(struct drm_gem_dma_object *shader_obj)
780	{
781	struct vc4_dev *vc4 = to_vc4_dev(shader_obj->base.dev);
782	bool found_shader_end = false;
783	int shader_end_ip = `0`;
784	uint32_t last_thread_switch_ip = -`3`;
785	uint32_t ip;
786	struct vc4_validated_shader_info *validated_shader = NULL;
787	struct vc4_shader_validation_state validation_state;
788
789	if (WARN_ON_ONCE(vc4->is_vc5))
790	return NULL;
791
792	memset(&validation_state, `0`, sizeof(validation_state));
793	validation_state.shader = shader_obj->vaddr;
794	validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
795
796	reset_validation_state(validation_state: &validation_state);
797
798	validation_state.branch_targets =
799	kcalloc(BITS_TO_LONGS(validation_state.max_ip),
800	size: sizeof(unsigned long), GFP_KERNEL);
801	if (!validation_state.branch_targets)
802	goto fail;
803
804	validated_shader = kcalloc(n: `1`, size: sizeof(*validated_shader), GFP_KERNEL);
805	if (!validated_shader)
806	goto fail;
807
808	if (!vc4_validate_branches(validation_state: &validation_state))
809	goto fail;
810
811	for (ip = `0`; ip < validation_state.max_ip; ip++) {
812	uint64_t inst = validation_state.shader[ip];
813	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
814
815	validation_state.ip = ip;
816
817	if (!vc4_handle_branch_target(validation_state: &validation_state))
818	goto fail;
819
820	if (ip == last_thread_switch_ip + `3`) {
821	/ Reset r0-r3 live clamp data /
822	int i;
823
824	for (i = `64`; i < LIVE_REG_COUNT; i++) {
825	validation_state.live_min_clamp_offsets[i] = ~`0`;
826	validation_state.live_max_clamp_regs[i] = false;
827	validation_state.live_immediates[i] = ~`0`;
828	}
829	}
830
831	switch (sig) {
832	case QPU_SIG_NONE:
833	case QPU_SIG_WAIT_FOR_SCOREBOARD:
834	case QPU_SIG_SCOREBOARD_UNLOCK:
835	case QPU_SIG_COLOR_LOAD:
836	case QPU_SIG_LOAD_TMU0:
837	case QPU_SIG_LOAD_TMU1:
838	case QPU_SIG_PROG_END:
839	case QPU_SIG_SMALL_IMM:
840	case QPU_SIG_THREAD_SWITCH:
841	case QPU_SIG_LAST_THREAD_SWITCH:
842	if (!check_instruction_writes(validated_shader,
843	validation_state: &validation_state)) {
844	DRM_DEBUG("Bad write at ip %d\n", ip);
845	goto fail;
846	}
847
848	if (!check_instruction_reads(validated_shader,
849	validation_state: &validation_state))
850	goto fail;
851
852	if (sig == QPU_SIG_PROG_END) {
853	found_shader_end = true;
854	shader_end_ip = ip;
855	}
856
857	if (sig == QPU_SIG_THREAD_SWITCH \|\|
858	sig == QPU_SIG_LAST_THREAD_SWITCH) {
859	validated_shader->is_threaded = true;
860
861	if (ip < last_thread_switch_ip + `3`) {
862	DRM_DEBUG("Thread switch too soon after "
863	"last switch at ip %d\n", ip);
864	goto fail;
865	}
866	last_thread_switch_ip = ip;
867	}
868
869	break;
870
871	case QPU_SIG_LOAD_IMM:
872	if (!check_instruction_writes(validated_shader,
873	validation_state: &validation_state)) {
874	DRM_DEBUG("Bad LOAD_IMM write at ip %d\n", ip);
875	goto fail;
876	}
877	break;
878
879	case QPU_SIG_BRANCH:
880	if (!check_branch(inst, validated_shader,
881	validation_state: &validation_state, ip))
882	goto fail;
883
884	if (ip < last_thread_switch_ip + `3`) {
885	DRM_DEBUG("Branch in thread switch at ip %d",
886	ip);
887	goto fail;
888	}
889
890	break;
891	default:
892	DRM_DEBUG("Unsupported QPU signal %d at "
893	"instruction %d\n", sig, ip);
894	goto fail;
895	}
896
897	/ There are two delay slots after program end is signaled*
898	* that are still executed, then we're finished.
899	*/
900	if (found_shader_end && ip == shader_end_ip + `2`)
901	break;
902	}
903
904	if (ip == validation_state.max_ip) {
905	DRM_DEBUG("shader failed to terminate before "
906	"shader BO end at %zd\n",
907	shader_obj->base.size);
908	goto fail;
909	}
910
911	/ Might corrupt other thread /
912	if (validated_shader->is_threaded &&
913	validation_state.all_registers_used) {
914	DRM_DEBUG("Shader uses threading, but uses the upper "
915	"half of the registers, too\n");
916	goto fail;
917	}
918
919	/ If we did a backwards branch and we haven't emitted a uniforms*
920	* reset since then, we still need the uniforms stream to have the
921	* uniforms address available so that the backwards branch can do its
922	* uniforms reset.
923	*
924	* We could potentially prove that the backwards branch doesn't
925	* contain any uses of uniforms until program exit, but that doesn't
926	* seem to be worth the trouble.
927	*/
928	if (validation_state.needs_uniform_address_for_loop) {
929	if (!require_uniform_address_uniform(validated_shader))
930	goto fail;
931	validated_shader->uniforms_size += `4`;
932	}
933
934	/ Again, no chance of integer overflow here because the worst case*
935	* scenario is 8 bytes of uniforms plus handles per 8-byte
936	* instruction.
937	*/
938	validated_shader->uniforms_src_size =
939	(validated_shader->uniforms_size +
940	`4` * validated_shader->num_texture_samples);
941
942	kfree(objp: validation_state.branch_targets);
943
944	return validated_shader;
945
946	fail:
947	kfree(objp: validation_state.branch_targets);
948	if (validated_shader) {
949	kfree(objp: validated_shader->uniform_addr_offsets);
950	kfree(objp: validated_shader->texture_samples);
951	kfree(objp: validated_shader);
952	}
953	return NULL;
954	}
955

source code of linux/drivers/gpu/drm/vc4/vc4_validate_shaders.c