1 | /* Handling inline asm in the analyzer. |
2 | Copyright (C) 2021-2024 Free Software Foundation, Inc. |
3 | Contributed by David Malcolm <dmalcolm@redhat.com>. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it |
8 | under the terms of the GNU General Public License as published by |
9 | the Free Software Foundation; either version 3, or (at your option) |
10 | any later version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include "config.h" |
22 | #define INCLUDE_MEMORY |
23 | #include "system.h" |
24 | #include "coretypes.h" |
25 | #include "tree.h" |
26 | #include "function.h" |
27 | #include "basic-block.h" |
28 | #include "gimple.h" |
29 | #include "gimple-iterator.h" |
30 | #include "diagnostic-core.h" |
31 | #include "pretty-print.h" |
32 | #include "analyzer/analyzer.h" |
33 | #include "analyzer/analyzer-logging.h" |
34 | #include "options.h" |
35 | #include "analyzer/call-string.h" |
36 | #include "analyzer/program-point.h" |
37 | #include "analyzer/store.h" |
38 | #include "analyzer/region-model.h" |
39 | #include "analyzer/region-model-reachability.h" |
40 | #include "stmt.h" |
41 | |
42 | #if ENABLE_ANALYZER |
43 | |
44 | namespace ana { |
45 | |
46 | /* Minimal asm support for the analyzer. |
47 | |
48 | The objective of this code is to: |
49 | - minimize false positives from the analyzer on the Linux kernel |
50 | (which makes heavy use of inline asm), whilst |
51 | - avoiding having to "teach" the compiler anything about specific strings |
52 | in asm statements. |
53 | |
54 | Specifically, we want to: |
55 | |
56 | (a) mark asm outputs and certain other regions as having been written to, |
57 | to avoid false postives from -Wanalyzer-use-of-uninitialized-value. |
58 | |
59 | (b) identify some of these stmts as "deterministic" so that we can |
60 | write consistent outputs given consistent inputs, so that we can |
61 | avoid false positives for paths in which an asm is invoked twice |
62 | with the same inputs and is expected to emit the same output. |
63 | |
64 | This file implements heuristics for achieving the above. */ |
65 | |
66 | /* Determine if ASM_STMT is deterministic, in the sense of (b) above. |
67 | |
68 | Consider this x86 function taken from the Linux kernel |
69 | (arch/x86/include/asm/barrier.h): |
70 | |
71 | static inline unsigned long array_index_mask_nospec(unsigned long index, |
72 | unsigned long size) |
73 | { |
74 | unsigned long mask; |
75 | |
76 | asm volatile ("cmp %1,%2; sbb %0,%0;" |
77 | :"=r" (mask) |
78 | :"g"(size),"r" (index) |
79 | :"cc"); |
80 | return mask; |
81 | } |
82 | |
83 | The above is a mitigation for Spectre-variant-1 attacks, for clamping |
84 | an array access to within the range of [0, size] if the CPU speculates |
85 | past the array bounds. |
86 | |
87 | However, it is ultimately used to implement wdev_to_wvif: |
88 | |
89 | static inline struct wfx_vif * |
90 | wdev_to_wvif(struct wfx_dev *wdev, int vif_id) |
91 | { |
92 | vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif)); |
93 | if (!wdev->vif[vif_id]) { |
94 | return NULL; |
95 | } |
96 | return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv; |
97 | } |
98 | |
99 | which is used by: |
100 | |
101 | if (wdev_to_wvif(wvif->wdev, 1)) |
102 | return wdev_to_wvif(wvif->wdev, 1)->vif; |
103 | |
104 | The code has been written to assume that wdev_to_wvif is deterministic, |
105 | and won't change from returning non-NULL at the "if" clause to |
106 | returning NULL at the "->vif" dereference. |
107 | |
108 | By treating the above specific "asm volatile" as deterministic we avoid |
109 | a false positive from -Wanalyzer-null-dereference. */ |
110 | |
111 | static bool |
112 | deterministic_p (const gasm *asm_stmt) |
113 | { |
114 | /* Assume something volatile with no inputs is querying |
115 | changeable state e.g. rdtsc. */ |
116 | if (gimple_asm_ninputs (asm_stmt) == 0 |
117 | && gimple_asm_volatile_p (asm_stmt)) |
118 | return false; |
119 | |
120 | /* Otherwise assume it's purely a function of its inputs. */ |
121 | return true; |
122 | } |
123 | |
124 | /* Update this model for the asm STMT, using CTXT to report any |
125 | diagnostics. |
126 | |
127 | Compare with cfgexpand.cc: expand_asm_stmt. */ |
128 | |
129 | void |
130 | region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt) |
131 | { |
132 | logger *logger = ctxt ? ctxt->get_logger () : NULL; |
133 | LOG_SCOPE (logger); |
134 | |
135 | const unsigned noutputs = gimple_asm_noutputs (asm_stmt: stmt); |
136 | const unsigned ninputs = gimple_asm_ninputs (asm_stmt: stmt); |
137 | |
138 | auto_vec<tree> output_tvec; |
139 | auto_vec<tree> input_tvec; |
140 | auto_vec<const char *> constraints; |
141 | |
142 | /* Copy the gimple vectors into new vectors that we can manipulate. */ |
143 | output_tvec.safe_grow (len: noutputs, exact: true); |
144 | input_tvec.safe_grow (len: ninputs, exact: true); |
145 | constraints.safe_grow (len: noutputs + ninputs, exact: true); |
146 | |
147 | for (unsigned i = 0; i < noutputs; ++i) |
148 | { |
149 | tree t = gimple_asm_output_op (asm_stmt: stmt, index: i); |
150 | output_tvec[i] = TREE_VALUE (t); |
151 | constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))); |
152 | } |
153 | for (unsigned i = 0; i < ninputs; i++) |
154 | { |
155 | tree t = gimple_asm_input_op (asm_stmt: stmt, index: i); |
156 | input_tvec[i] = TREE_VALUE (t); |
157 | constraints[i + noutputs] |
158 | = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))); |
159 | } |
160 | |
161 | /* Determine which regions are reachable from the inputs |
162 | to this stmt. */ |
163 | reachable_regions reachable_regs (this); |
164 | |
165 | int num_errors = 0; |
166 | |
167 | auto_vec<const region *> output_regions (noutputs); |
168 | for (unsigned i = 0; i < noutputs; ++i) |
169 | { |
170 | tree val = output_tvec[i]; |
171 | const char *constraint; |
172 | bool is_inout; |
173 | bool allows_reg; |
174 | bool allows_mem; |
175 | |
176 | const region *dst_reg = get_lvalue (expr: val, ctxt); |
177 | output_regions.quick_push (obj: dst_reg); |
178 | reachable_regs.add (reg: dst_reg, is_mutable: true); |
179 | |
180 | /* Try to parse the output constraint. If that fails, there's |
181 | no point in going further. */ |
182 | constraint = constraints[i]; |
183 | if (!parse_output_constraint (&constraint, i, ninputs, noutputs, |
184 | &allows_mem, &allows_reg, &is_inout)) |
185 | { |
186 | if (logger) |
187 | logger->log (fmt: "error parsing constraint for output %i: %qs" , |
188 | i, constraint); |
189 | num_errors++; |
190 | continue; |
191 | } |
192 | |
193 | if (logger) |
194 | { |
195 | logger->log (fmt: "output %i: %qs %qE" |
196 | " is_inout: %i allows_reg: %i allows_mem: %i" , |
197 | i, constraint, val, |
198 | (int)is_inout, (int)allows_reg, (int)allows_mem); |
199 | logger->start_log_line (); |
200 | logger->log_partial (fmt: " region: " ); |
201 | dst_reg->dump_to_pp (pp: logger->get_printer (), simple: true); |
202 | logger->end_log_line (); |
203 | } |
204 | |
205 | } |
206 | |
207 | /* Ideally should combine with inout_svals to determine the |
208 | "effective inputs" and use this for the asm_output_svalue. */ |
209 | |
210 | auto_vec<const svalue *> input_svals (ninputs); |
211 | for (unsigned i = 0; i < ninputs; i++) |
212 | { |
213 | tree val = input_tvec[i]; |
214 | const char *constraint = constraints[i + noutputs]; |
215 | bool allows_reg, allows_mem; |
216 | if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0, |
217 | constraints.address (), |
218 | &allows_mem, &allows_reg)) |
219 | { |
220 | if (logger) |
221 | logger->log (fmt: "error parsing constraint for input %i: %qs" , |
222 | i, constraint); |
223 | num_errors++; |
224 | continue; |
225 | } |
226 | |
227 | tree src_expr = input_tvec[i]; |
228 | const svalue *src_sval = get_rvalue (expr: src_expr, ctxt); |
229 | check_for_poison (sval: src_sval, expr: src_expr, NULL, ctxt); |
230 | input_svals.quick_push (obj: src_sval); |
231 | reachable_regs.handle_sval (sval: src_sval); |
232 | |
233 | if (logger) |
234 | { |
235 | logger->log (fmt: "input %i: %qs %qE" |
236 | " allows_reg: %i allows_mem: %i" , |
237 | i, constraint, val, |
238 | (int)allows_reg, (int)allows_mem); |
239 | logger->start_log_line (); |
240 | logger->log_partial (fmt: " sval: " ); |
241 | src_sval->dump_to_pp (pp: logger->get_printer (), simple: true); |
242 | logger->end_log_line (); |
243 | } |
244 | } |
245 | |
246 | if (num_errors > 0) |
247 | gcc_unreachable (); |
248 | |
249 | if (logger) |
250 | { |
251 | logger->log (fmt: "reachability: " ); |
252 | reachable_regs.dump_to_pp (pp: logger->get_printer ()); |
253 | logger->end_log_line (); |
254 | } |
255 | |
256 | /* Given the regions that were reachable from the inputs we |
257 | want to clobber them. |
258 | This is similar to region_model::handle_unrecognized_call, |
259 | but the unknown call policies seems too aggressive (e.g. purging state |
260 | from anything that's ever escaped). Instead, clobber any clusters |
261 | that were reachable in *this* asm stmt, rather than those that |
262 | escaped, and we don't treat the values as having escaped. |
263 | We also assume that asm stmts don't affect sm-state. */ |
264 | for (auto iter = reachable_regs.begin_mutable_base_regs (); |
265 | iter != reachable_regs.end_mutable_base_regs (); ++iter) |
266 | { |
267 | const region *base_reg = *iter; |
268 | if (base_reg->symbolic_for_unknown_ptr_p () |
269 | || !base_reg->tracked_p ()) |
270 | continue; |
271 | |
272 | binding_cluster *cluster = m_store.get_or_create_cluster (base_reg); |
273 | cluster->on_asm (stmt, mgr: m_mgr->get_store_manager (), |
274 | p: conjured_purge (this, ctxt)); |
275 | } |
276 | |
277 | /* Update the outputs. */ |
278 | for (unsigned output_idx = 0; output_idx < noutputs; output_idx++) |
279 | { |
280 | tree dst_expr = output_tvec[output_idx]; |
281 | const region *dst_reg = output_regions[output_idx]; |
282 | |
283 | const svalue *sval; |
284 | if (deterministic_p (asm_stmt: stmt) |
285 | && input_svals.length () <= asm_output_svalue::MAX_INPUTS) |
286 | sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr), |
287 | asm_stmt: stmt, |
288 | output_idx, |
289 | inputs: input_svals); |
290 | else |
291 | { |
292 | sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr), |
293 | stmt, |
294 | id_reg: dst_reg, |
295 | p: conjured_purge (this, |
296 | ctxt)); |
297 | } |
298 | set_value (lhs_reg: dst_reg, rhs_sval: sval, ctxt); |
299 | } |
300 | } |
301 | |
302 | } // namespace ana |
303 | |
304 | #endif /* #if ENABLE_ANALYZER */ |
305 | |