region-model-asm.cc source code [gcc/analyzer/region-model-asm.cc]

1	/ Handling inline asm in the analyzer.*
2	Copyright (C) 2021-2024 Free Software Foundation, Inc.
3	Contributed by David Malcolm <dmalcolm@redhat.com>.
4
5	This file is part of GCC.
6
7	GCC is free software; you can redistribute it and/or modify it
8	under the terms of the GNU General Public License as published by
9	the Free Software Foundation; either version 3, or (at your option)
10	any later version.
11
12	GCC is distributed in the hope that it will be useful, but
13	WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	General Public License for more details.
16
17	You should have received a copy of the GNU General Public License
18	along with GCC; see the file COPYING3. If not see
19	<http://www.gnu.org/licenses/>. /*
20
21	#include "config.h"
22	#define INCLUDE_MEMORY
23	#include "system.h"
24	#include "coretypes.h"
25	#include "tree.h"
26	#include "function.h"
27	#include "basic-block.h"
28	#include "gimple.h"
29	#include "gimple-iterator.h"
30	#include "diagnostic-core.h"
31	#include "pretty-print.h"
32	#include "analyzer/analyzer.h"
33	#include "analyzer/analyzer-logging.h"
34	#include "options.h"
35	#include "analyzer/call-string.h"
36	#include "analyzer/program-point.h"
37	#include "analyzer/store.h"
38	#include "analyzer/region-model.h"
39	#include "analyzer/region-model-reachability.h"
40	#include "stmt.h"
41
42	#if ENABLE_ANALYZER
43
44	namespace ana {
45
46	/ Minimal asm support for the analyzer.*
47
48	The objective of this code is to:
49	- minimize false positives from the analyzer on the Linux kernel
50	(which makes heavy use of inline asm), whilst
51	- avoiding having to "teach" the compiler anything about specific strings
52	in asm statements.
53
54	Specifically, we want to:
55
56	(a) mark asm outputs and certain other regions as having been written to,
57	to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
58
59	(b) identify some of these stmts as "deterministic" so that we can
60	write consistent outputs given consistent inputs, so that we can
61	avoid false positives for paths in which an asm is invoked twice
62	with the same inputs and is expected to emit the same output.
63
64	This file implements heuristics for achieving the above. /*
65
66	/ Determine if ASM_STMT is deterministic, in the sense of (b) above.*
67
68	Consider this x86 function taken from the Linux kernel
69	(arch/x86/include/asm/barrier.h):
70
71	static inline unsigned long array_index_mask_nospec(unsigned long index,
72	unsigned long size)
73	{
74	unsigned long mask;
75
76	asm volatile ("cmp %1,%2; sbb %0,%0;"
77	:"=r" (mask)
78	:"g"(size),"r" (index)
79	:"cc");
80	return mask;
81	}
82
83	The above is a mitigation for Spectre-variant-1 attacks, for clamping
84	an array access to within the range of [0, size] if the CPU speculates
85	past the array bounds.
86
87	However, it is ultimately used to implement wdev_to_wvif:
88
89	static inline struct wfx_vif *
90	wdev_to_wvif(struct wfx_dev wdev, int vif_id)*
91	{
92	vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
93	if (!wdev->vif[vif_id]) {
94	return NULL;
95	}
96	return (struct wfx_vif )wdev->vif[vif_id]->drv_priv;*
97	}
98
99	which is used by:
100
101	if (wdev_to_wvif(wvif->wdev, 1))
102	return wdev_to_wvif(wvif->wdev, 1)->vif;
103
104	The code has been written to assume that wdev_to_wvif is deterministic,
105	and won't change from returning non-NULL at the "if" clause to
106	returning NULL at the "->vif" dereference.
107
108	By treating the above specific "asm volatile" as deterministic we avoid
109	a false positive from -Wanalyzer-null-dereference. /*
110
111	static bool
112	deterministic_p (const gasm *asm_stmt)
113	{
114	/ Assume something volatile with no inputs is querying*
115	changeable state e.g. rdtsc. /*
116	if (gimple_asm_ninputs (asm_stmt) == `0`
117	&& gimple_asm_volatile_p (asm_stmt))
118	return false;
119
120	/ Otherwise assume it's purely a function of its inputs. /
121	return true;
122	}
123
124	/ Update this model for the asm STMT, using CTXT to report any*
125	diagnostics.
126
127	Compare with cfgexpand.cc: expand_asm_stmt. /*
128
129	void
130	region_model::on_asm_stmt (const gasm stmt, region_model_context ctxt)
131	{
132	logger *logger = ctxt ? ctxt->get_logger () : NULL;
133	LOG_SCOPE (logger);
134
135	const unsigned noutputs = gimple_asm_noutputs (asm_stmt: stmt);
136	const unsigned ninputs = gimple_asm_ninputs (asm_stmt: stmt);
137
138	auto_vec<tree> output_tvec;
139	auto_vec<tree> input_tvec;
140	auto_vec<const char *> constraints;
141
142	/ Copy the gimple vectors into new vectors that we can manipulate. /
143	output_tvec.safe_grow (len: noutputs, exact: true);
144	input_tvec.safe_grow (len: ninputs, exact: true);
145	constraints.safe_grow (len: noutputs + ninputs, exact: true);
146
147	for (unsigned i = `0`; i < noutputs; ++i)
148	{
149	tree t = gimple_asm_output_op (asm_stmt: stmt, index: i);
150	output_tvec [i] = TREE_VALUE (t);
151	constraints [i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
152	}
153	for (unsigned i = `0`; i < ninputs; i++)
154	{
155	tree t = gimple_asm_input_op (asm_stmt: stmt, index: i);
156	input_tvec [i] = TREE_VALUE (t);
157	constraints [i + noutputs]
158	= TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
159	}
160
161	/ Determine which regions are reachable from the inputs*
162	to this stmt. /*
163	reachable_regions reachable_regs (this);
164
165	int num_errors = `0`;
166
167	auto_vec<const region *> output_regions (noutputs);
168	for (unsigned i = `0`; i < noutputs; ++i)
169	{
170	tree val = output_tvec [i];
171	const char *constraint;
172	bool is_inout;
173	bool allows_reg;
174	bool allows_mem;
175
176	const region *dst_reg = get_lvalue (expr: val, ctxt);
177	output_regions.quick_push (obj: dst_reg);
178	reachable_regs.add (reg: dst_reg, is_mutable: true);
179
180	/ Try to parse the output constraint. If that fails, there's*
181	no point in going further. /*
182	constraint = constraints [i];
183	if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
184	&allows_mem, &allows_reg, &is_inout))
185	{
186	if (logger)
187	logger->log (fmt: "error parsing constraint for output %i: %qs",
188	i, constraint);
189	num_errors++;
190	continue;
191	}
192
193	if (logger)
194	{
195	logger->log (fmt: "output %i: %qs %qE"
196	" is_inout: %i allows_reg: %i allows_mem: %i",
197	i, constraint, val,
198	(int)is_inout, (int)allows_reg, (int)allows_mem);
199	logger->start_log_line ();
200	logger->log_partial (fmt: " region: ");
201	dst_reg->dump_to_pp (pp: logger->get_printer (), simple: true);
202	logger->end_log_line ();
203	}
204
205	}
206
207	/ Ideally should combine with inout_svals to determine the*
208	"effective inputs" and use this for the asm_output_svalue. /*
209
210	auto_vec<const svalue *> input_svals (ninputs);
211	for (unsigned i = `0`; i < ninputs; i++)
212	{
213	tree val = input_tvec [i];
214	const char *constraint = constraints [i + noutputs];
215	bool allows_reg, allows_mem;
216	if (! parse_input_constraint (&constraint, i, ninputs, noutputs, `0`,
217	constraints.address (),
218	&allows_mem, &allows_reg))
219	{
220	if (logger)
221	logger->log (fmt: "error parsing constraint for input %i: %qs",
222	i, constraint);
223	num_errors++;
224	continue;
225	}
226
227	tree src_expr = input_tvec [i];
228	const svalue *src_sval = get_rvalue (expr: src_expr, ctxt);
229	check_for_poison (sval: src_sval, expr: src_expr, NULL, ctxt);
230	input_svals.quick_push (obj: src_sval);
231	reachable_regs.handle_sval (sval: src_sval);
232
233	if (logger)
234	{
235	logger->log (fmt: "input %i: %qs %qE"
236	" allows_reg: %i allows_mem: %i",
237	i, constraint, val,
238	(int)allows_reg, (int)allows_mem);
239	logger->start_log_line ();
240	logger->log_partial (fmt: " sval: ");
241	src_sval->dump_to_pp (pp: logger->get_printer (), simple: true);
242	logger->end_log_line ();
243	}
244	}
245
246	if (num_errors > `0`)
247	gcc_unreachable ();
248
249	if (logger)
250	{
251	logger->log (fmt: "reachability: ");
252	reachable_regs.dump_to_pp (pp: logger->get_printer ());
253	logger->end_log_line ();
254	}
255
256	/ Given the regions that were reachable from the inputs we*
257	want to clobber them.
258	This is similar to region_model::handle_unrecognized_call,
259	but the unknown call policies seems too aggressive (e.g. purging state
260	from anything that's ever escaped). Instead, clobber any clusters
261	that were reachable in this* asm stmt, rather than those that*
262	escaped, and we don't treat the values as having escaped.
263	We also assume that asm stmts don't affect sm-state. /*
264	for (auto iter = reachable_regs.begin_mutable_base_regs ();
265	iter != reachable_regs.end_mutable_base_regs (); ++iter)
266	{
267	const region base_reg = iter;
268	if (base_reg->symbolic_for_unknown_ptr_p ()
269	\|\| !base_reg->tracked_p ())
270	continue;
271
272	binding_cluster *cluster = m_store.get_or_create_cluster (base_reg);
273	cluster->on_asm (stmt, mgr: m_mgr->get_store_manager (),
274	p: conjured_purge (this, ctxt));
275	}
276
277	/ Update the outputs. /
278	for (unsigned output_idx = `0`; output_idx < noutputs; output_idx++)
279	{
280	tree dst_expr = output_tvec [output_idx];
281	const region *dst_reg = output_regions [output_idx];
282
283	const svalue *sval;
284	if (deterministic_p (asm_stmt: stmt)
285	&& input_svals.length () <= asm_output_svalue::MAX_INPUTS)
286	sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
287	asm_stmt: stmt,
288	output_idx,
289	inputs: input_svals);
290	else
291	{
292	sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
293	stmt,
294	id_reg: dst_reg,
295	p: conjured_purge (this,
296	ctxt));
297	}
298	set_value (lhs_reg: dst_reg, rhs_sval: sval, ctxt);
299	}
300	}
301
302	} // namespace ana
303
304	#endif /* #if ENABLE_ANALYZER */
305

source code of gcc/analyzer/region-model-asm.cc