1/* Handling inline asm in the analyzer.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 Contributed by David Malcolm <dmalcolm@redhat.com>.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful, but
13WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#define INCLUDE_MEMORY
23#include "system.h"
24#include "coretypes.h"
25#include "tree.h"
26#include "function.h"
27#include "basic-block.h"
28#include "gimple.h"
29#include "gimple-iterator.h"
30#include "diagnostic-core.h"
31#include "pretty-print.h"
32#include "analyzer/analyzer.h"
33#include "analyzer/analyzer-logging.h"
34#include "options.h"
35#include "analyzer/call-string.h"
36#include "analyzer/program-point.h"
37#include "analyzer/store.h"
38#include "analyzer/region-model.h"
39#include "analyzer/region-model-reachability.h"
40#include "stmt.h"
41
42#if ENABLE_ANALYZER
43
44namespace ana {
45
46/* Minimal asm support for the analyzer.
47
48 The objective of this code is to:
49 - minimize false positives from the analyzer on the Linux kernel
50 (which makes heavy use of inline asm), whilst
51 - avoiding having to "teach" the compiler anything about specific strings
52 in asm statements.
53
54 Specifically, we want to:
55
56 (a) mark asm outputs and certain other regions as having been written to,
57 to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
58
59 (b) identify some of these stmts as "deterministic" so that we can
60 write consistent outputs given consistent inputs, so that we can
61 avoid false positives for paths in which an asm is invoked twice
62 with the same inputs and is expected to emit the same output.
63
64 This file implements heuristics for achieving the above. */
65
66/* Determine if ASM_STMT is deterministic, in the sense of (b) above.
67
68 Consider this x86 function taken from the Linux kernel
69 (arch/x86/include/asm/barrier.h):
70
71 static inline unsigned long array_index_mask_nospec(unsigned long index,
72 unsigned long size)
73 {
74 unsigned long mask;
75
76 asm volatile ("cmp %1,%2; sbb %0,%0;"
77 :"=r" (mask)
78 :"g"(size),"r" (index)
79 :"cc");
80 return mask;
81 }
82
83 The above is a mitigation for Spectre-variant-1 attacks, for clamping
84 an array access to within the range of [0, size] if the CPU speculates
85 past the array bounds.
86
87 However, it is ultimately used to implement wdev_to_wvif:
88
89 static inline struct wfx_vif *
90 wdev_to_wvif(struct wfx_dev *wdev, int vif_id)
91 {
92 vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
93 if (!wdev->vif[vif_id]) {
94 return NULL;
95 }
96 return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv;
97 }
98
99 which is used by:
100
101 if (wdev_to_wvif(wvif->wdev, 1))
102 return wdev_to_wvif(wvif->wdev, 1)->vif;
103
104 The code has been written to assume that wdev_to_wvif is deterministic,
105 and won't change from returning non-NULL at the "if" clause to
106 returning NULL at the "->vif" dereference.
107
108 By treating the above specific "asm volatile" as deterministic we avoid
109 a false positive from -Wanalyzer-null-dereference. */
110
111static bool
112deterministic_p (const gasm *asm_stmt)
113{
114 /* Assume something volatile with no inputs is querying
115 changeable state e.g. rdtsc. */
116 if (gimple_asm_ninputs (asm_stmt) == 0
117 && gimple_asm_volatile_p (asm_stmt))
118 return false;
119
120 /* Otherwise assume it's purely a function of its inputs. */
121 return true;
122}
123
124/* Update this model for the asm STMT, using CTXT to report any
125 diagnostics.
126
127 Compare with cfgexpand.cc: expand_asm_stmt. */
128
129void
130region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt)
131{
132 logger *logger = ctxt ? ctxt->get_logger () : NULL;
133 LOG_SCOPE (logger);
134
135 const unsigned noutputs = gimple_asm_noutputs (asm_stmt: stmt);
136 const unsigned ninputs = gimple_asm_ninputs (asm_stmt: stmt);
137
138 auto_vec<tree> output_tvec;
139 auto_vec<tree> input_tvec;
140 auto_vec<const char *> constraints;
141
142 /* Copy the gimple vectors into new vectors that we can manipulate. */
143 output_tvec.safe_grow (len: noutputs, exact: true);
144 input_tvec.safe_grow (len: ninputs, exact: true);
145 constraints.safe_grow (len: noutputs + ninputs, exact: true);
146
147 for (unsigned i = 0; i < noutputs; ++i)
148 {
149 tree t = gimple_asm_output_op (asm_stmt: stmt, index: i);
150 output_tvec[i] = TREE_VALUE (t);
151 constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
152 }
153 for (unsigned i = 0; i < ninputs; i++)
154 {
155 tree t = gimple_asm_input_op (asm_stmt: stmt, index: i);
156 input_tvec[i] = TREE_VALUE (t);
157 constraints[i + noutputs]
158 = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
159 }
160
161 /* Determine which regions are reachable from the inputs
162 to this stmt. */
163 reachable_regions reachable_regs (this);
164
165 int num_errors = 0;
166
167 auto_vec<const region *> output_regions (noutputs);
168 for (unsigned i = 0; i < noutputs; ++i)
169 {
170 tree val = output_tvec[i];
171 const char *constraint;
172 bool is_inout;
173 bool allows_reg;
174 bool allows_mem;
175
176 const region *dst_reg = get_lvalue (expr: val, ctxt);
177 output_regions.quick_push (obj: dst_reg);
178 reachable_regs.add (reg: dst_reg, is_mutable: true);
179
180 /* Try to parse the output constraint. If that fails, there's
181 no point in going further. */
182 constraint = constraints[i];
183 if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
184 &allows_mem, &allows_reg, &is_inout))
185 {
186 if (logger)
187 logger->log (fmt: "error parsing constraint for output %i: %qs",
188 i, constraint);
189 num_errors++;
190 continue;
191 }
192
193 if (logger)
194 {
195 logger->log (fmt: "output %i: %qs %qE"
196 " is_inout: %i allows_reg: %i allows_mem: %i",
197 i, constraint, val,
198 (int)is_inout, (int)allows_reg, (int)allows_mem);
199 logger->start_log_line ();
200 logger->log_partial (fmt: " region: ");
201 dst_reg->dump_to_pp (pp: logger->get_printer (), simple: true);
202 logger->end_log_line ();
203 }
204
205 }
206
207 /* Ideally should combine with inout_svals to determine the
208 "effective inputs" and use this for the asm_output_svalue. */
209
210 auto_vec<const svalue *> input_svals (ninputs);
211 for (unsigned i = 0; i < ninputs; i++)
212 {
213 tree val = input_tvec[i];
214 const char *constraint = constraints[i + noutputs];
215 bool allows_reg, allows_mem;
216 if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
217 constraints.address (),
218 &allows_mem, &allows_reg))
219 {
220 if (logger)
221 logger->log (fmt: "error parsing constraint for input %i: %qs",
222 i, constraint);
223 num_errors++;
224 continue;
225 }
226
227 tree src_expr = input_tvec[i];
228 const svalue *src_sval = get_rvalue (expr: src_expr, ctxt);
229 check_for_poison (sval: src_sval, expr: src_expr, NULL, ctxt);
230 input_svals.quick_push (obj: src_sval);
231 reachable_regs.handle_sval (sval: src_sval);
232
233 if (logger)
234 {
235 logger->log (fmt: "input %i: %qs %qE"
236 " allows_reg: %i allows_mem: %i",
237 i, constraint, val,
238 (int)allows_reg, (int)allows_mem);
239 logger->start_log_line ();
240 logger->log_partial (fmt: " sval: ");
241 src_sval->dump_to_pp (pp: logger->get_printer (), simple: true);
242 logger->end_log_line ();
243 }
244 }
245
246 if (num_errors > 0)
247 gcc_unreachable ();
248
249 if (logger)
250 {
251 logger->log (fmt: "reachability: ");
252 reachable_regs.dump_to_pp (pp: logger->get_printer ());
253 logger->end_log_line ();
254 }
255
256 /* Given the regions that were reachable from the inputs we
257 want to clobber them.
258 This is similar to region_model::handle_unrecognized_call,
259 but the unknown call policies seems too aggressive (e.g. purging state
260 from anything that's ever escaped). Instead, clobber any clusters
261 that were reachable in *this* asm stmt, rather than those that
262 escaped, and we don't treat the values as having escaped.
263 We also assume that asm stmts don't affect sm-state. */
264 for (auto iter = reachable_regs.begin_mutable_base_regs ();
265 iter != reachable_regs.end_mutable_base_regs (); ++iter)
266 {
267 const region *base_reg = *iter;
268 if (base_reg->symbolic_for_unknown_ptr_p ()
269 || !base_reg->tracked_p ())
270 continue;
271
272 binding_cluster *cluster = m_store.get_or_create_cluster (base_reg);
273 cluster->on_asm (stmt, mgr: m_mgr->get_store_manager (),
274 p: conjured_purge (this, ctxt));
275 }
276
277 /* Update the outputs. */
278 for (unsigned output_idx = 0; output_idx < noutputs; output_idx++)
279 {
280 tree dst_expr = output_tvec[output_idx];
281 const region *dst_reg = output_regions[output_idx];
282
283 const svalue *sval;
284 if (deterministic_p (asm_stmt: stmt)
285 && input_svals.length () <= asm_output_svalue::MAX_INPUTS)
286 sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
287 asm_stmt: stmt,
288 output_idx,
289 inputs: input_svals);
290 else
291 {
292 sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
293 stmt,
294 id_reg: dst_reg,
295 p: conjured_purge (this,
296 ctxt));
297 }
298 set_value (lhs_reg: dst_reg, rhs_sval: sval, ctxt);
299 }
300}
301
302} // namespace ana
303
304#endif /* #if ENABLE_ANALYZER */
305

source code of gcc/analyzer/region-model-asm.cc