1 | /* Detection of infinite loops. |
2 | Copyright (C) 2022-2024 Free Software Foundation, Inc. |
3 | Contributed by David Malcolm <dmalcolm@redhat.com>. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it |
8 | under the terms of the GNU General Public License as published by |
9 | the Free Software Foundation; either version 3, or (at your option) |
10 | any later version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include "config.h" |
22 | #define INCLUDE_MEMORY |
23 | #define INCLUDE_VECTOR |
24 | #include "system.h" |
25 | #include "coretypes.h" |
26 | #include "tree.h" |
27 | #include "fold-const.h" |
28 | #include "gcc-rich-location.h" |
29 | #include "alloc-pool.h" |
30 | #include "fibonacci_heap.h" |
31 | #include "shortest-paths.h" |
32 | #include "diagnostic-core.h" |
33 | #include "diagnostic-event-id.h" |
34 | #include "diagnostic-path.h" |
35 | #include "function.h" |
36 | #include "pretty-print.h" |
37 | #include "sbitmap.h" |
38 | #include "bitmap.h" |
39 | #include "tristate.h" |
40 | #include "ordered-hash-map.h" |
41 | #include "selftest.h" |
42 | #include "json.h" |
43 | #include "analyzer/analyzer.h" |
44 | #include "analyzer/analyzer-logging.h" |
45 | #include "analyzer/call-string.h" |
46 | #include "analyzer/program-point.h" |
47 | #include "analyzer/store.h" |
48 | #include "analyzer/region-model.h" |
49 | #include "analyzer/constraint-manager.h" |
50 | #include "analyzer/sm.h" |
51 | #include "analyzer/pending-diagnostic.h" |
52 | #include "analyzer/diagnostic-manager.h" |
53 | #include "cfg.h" |
54 | #include "basic-block.h" |
55 | #include "gimple.h" |
56 | #include "gimple-iterator.h" |
57 | #include "gimple-pretty-print.h" |
58 | #include "cgraph.h" |
59 | #include "digraph.h" |
60 | #include "analyzer/supergraph.h" |
61 | #include "analyzer/program-state.h" |
62 | #include "analyzer/exploded-graph.h" |
63 | #include "analyzer/checker-path.h" |
64 | #include "analyzer/feasible-graph.h" |
65 | #include "make-unique.h" |
66 | #include "diagnostic-format-sarif.h" |
67 | |
68 | /* A bundle of data characterizing a particular infinite loop |
69 | identified within the exploded graph. */ |
70 | |
71 | struct infinite_loop |
72 | { |
73 | infinite_loop (const exploded_node &enode, |
74 | location_t loc, |
75 | std::vector<const exploded_edge *> &&eedges, |
76 | logger *logger) |
77 | : m_enode (enode), |
78 | m_loc (loc), |
79 | m_eedge_vec (eedges) |
80 | { |
81 | LOG_SCOPE (logger); |
82 | if (logger) |
83 | { |
84 | logger->start_log_line (); |
85 | logger->log_partial (fmt: "infinite loop: EN: %i" , m_enode.m_index); |
86 | for (auto eedge : m_eedge_vec) |
87 | { |
88 | logger->log_partial (fmt: " ->" ); |
89 | if (const superedge *sedge = eedge->m_sedge) |
90 | { |
91 | sedge->dump_label_to_pp (pp: logger->get_printer (), user_facing: false); |
92 | } |
93 | logger->log_partial (fmt: " EN: %i" , eedge->m_dest->m_index); |
94 | } |
95 | logger->end_log_line (); |
96 | } |
97 | } |
98 | |
99 | bool |
100 | operator== (const infinite_loop &other) const |
101 | { |
102 | /* Compare underlying supernode, rather than enodes, so that |
103 | we don't get duplicates in functions that are called from |
104 | elsewhere. */ |
105 | return (m_enode.get_supernode () == other.m_enode.get_supernode () |
106 | && m_loc == other.m_loc); |
107 | } |
108 | |
109 | json::object * |
110 | to_json () const |
111 | { |
112 | json::object *loop_obj = new json::object (); |
113 | loop_obj->set_integer (key: "enode" , v: m_enode.m_index); |
114 | json::array *edge_arr = new json::array (); |
115 | for (auto eedge : m_eedge_vec) |
116 | edge_arr->append (v: eedge->to_json ()); |
117 | loop_obj->set (key: "eedges" , v: edge_arr); |
118 | return loop_obj; |
119 | } |
120 | |
121 | const exploded_node &m_enode; |
122 | location_t m_loc; |
123 | std::vector<const exploded_edge *> m_eedge_vec; |
124 | }; |
125 | |
126 | /* A custom subclass of start_cfg_edge_event that rewords the |
127 | message to indicate that the CFG edge is *always* taken on |
128 | subsequent iterations, assuming it's been taken once. */ |
129 | |
130 | class perpetual_start_cfg_edge_event : public start_cfg_edge_event |
131 | { |
132 | public: |
133 | perpetual_start_cfg_edge_event (const exploded_edge &eedge, |
134 | const event_loc_info &loc_info) |
135 | : start_cfg_edge_event (eedge, loc_info) |
136 | { |
137 | } |
138 | |
139 | label_text get_desc (bool can_colorize) const final override |
140 | { |
141 | bool user_facing = !flag_analyzer_verbose_edges; |
142 | label_text edge_desc (m_sedge->get_description (user_facing)); |
143 | if (user_facing) |
144 | { |
145 | if (edge_desc.get () && strlen (s: edge_desc.get ()) > 0) |
146 | { |
147 | label_text cond_desc = maybe_describe_condition (can_colorize); |
148 | label_text result; |
149 | if (cond_desc.get ()) |
150 | return make_label_text |
151 | (can_colorize, |
152 | fmt: "%s: always following %qs branch..." , |
153 | cond_desc.get (), edge_desc.get ()); |
154 | else |
155 | return make_label_text |
156 | (can_colorize, |
157 | fmt: "if it ever follows %qs branch, it will always do so..." , |
158 | edge_desc.get ()); |
159 | } |
160 | } |
161 | return start_cfg_edge_event::get_desc (can_colorize); |
162 | } |
163 | }; |
164 | |
165 | /* A subclass of pending_diagnostic for complaining about suspected |
166 | infinite loops. */ |
167 | |
168 | class infinite_loop_diagnostic |
169 | : public pending_diagnostic_subclass<infinite_loop_diagnostic> |
170 | { |
171 | public: |
172 | infinite_loop_diagnostic (std::unique_ptr<infinite_loop> inf_loop) |
173 | : m_inf_loop (std::move (inf_loop)) |
174 | { |
175 | gcc_assert (m_inf_loop != nullptr); |
176 | } |
177 | |
178 | const char *get_kind () const final override |
179 | { |
180 | return "infinite_loop_diagnostic" ; |
181 | } |
182 | |
183 | bool operator== (const infinite_loop_diagnostic &other) const |
184 | { |
185 | return *m_inf_loop == *other.m_inf_loop; |
186 | } |
187 | |
188 | int get_controlling_option () const final override |
189 | { |
190 | return OPT_Wanalyzer_infinite_loop; |
191 | } |
192 | |
193 | bool emit (diagnostic_emission_context &ctxt) final override |
194 | { |
195 | /* "CWE-835: Loop with Unreachable Exit Condition ('Infinite Loop')". */ |
196 | ctxt.add_cwe (cwe: 835); |
197 | return ctxt.warn ("infinite loop" ); |
198 | } |
199 | |
200 | bool maybe_add_custom_events_for_superedge (const exploded_edge &, |
201 | checker_path *) |
202 | final override |
203 | { |
204 | /* Don't add any regular events; instead we add them after pruning as |
205 | part of the "final" warning. */ |
206 | return true; |
207 | } |
208 | |
209 | label_text describe_final_event (const evdesc::final_event &ev) final override |
210 | { |
211 | return ev.formatted_print (fmt: "infinite loop here" ); |
212 | } |
213 | |
214 | /* Customize the location where the warning_event appears. */ |
215 | void add_final_event (const state_machine *, |
216 | const exploded_node *enode, |
217 | const gimple *, |
218 | tree, |
219 | state_machine::state_t, |
220 | checker_path *emission_path) final override |
221 | { |
222 | emission_path->add_event |
223 | (event: make_unique<warning_event> |
224 | (args: event_loc_info (m_inf_loop->m_loc, |
225 | enode->get_function ()->decl, |
226 | enode->get_stack_depth ()), |
227 | args&: enode, |
228 | NULL, NULL, NULL)); |
229 | |
230 | logger *logger = emission_path->get_logger (); |
231 | |
232 | /* EMISSION_PATH has the path to the entry of the infinite loop. |
233 | Add extra edges showing the loop itself. */ |
234 | for (auto eedge : m_inf_loop->m_eedge_vec) |
235 | { |
236 | if (logger) |
237 | logger->log (fmt: "EN: %i -> EN: %i" , |
238 | eedge->m_src->m_index, |
239 | eedge->m_dest->m_index); |
240 | if (!eedge->m_sedge) |
241 | continue; |
242 | |
243 | const cfg_superedge *cfg_sedge |
244 | = eedge->m_sedge->dyn_cast_cfg_superedge (); |
245 | if (!cfg_sedge) |
246 | continue; |
247 | |
248 | const exploded_node *src_node = eedge->m_src; |
249 | const program_point &src_point = src_node->get_point (); |
250 | const exploded_node *dst_node = eedge->m_dest; |
251 | const program_point &dst_point = dst_node->get_point (); |
252 | const int src_stack_depth = src_point.get_stack_depth (); |
253 | const int dst_stack_depth = dst_point.get_stack_depth (); |
254 | const gimple *last_stmt = src_point.get_supernode ()->get_last_stmt (); |
255 | |
256 | event_loc_info loc_info_from |
257 | (last_stmt ? last_stmt->location : cfg_sedge->get_goto_locus (), |
258 | src_point.get_fndecl (), |
259 | src_stack_depth); |
260 | event_loc_info loc_info_to |
261 | (dst_point.get_supernode ()->get_start_location (), |
262 | dst_point.get_fndecl (), |
263 | dst_stack_depth); |
264 | |
265 | if (const switch_cfg_superedge *switch_cfg_sedge |
266 | = cfg_sedge->dyn_cast_switch_cfg_superedge ()) |
267 | { |
268 | if (switch_cfg_sedge->implicitly_created_default_p ()) |
269 | { |
270 | emission_path->add_event |
271 | (event: make_unique<perpetual_start_cfg_edge_event> (args: *eedge, |
272 | args&: loc_info_from)); |
273 | emission_path->add_event |
274 | (event: make_unique<end_cfg_edge_event> |
275 | (args: *eedge, |
276 | args&: loc_info_to)); |
277 | } |
278 | } |
279 | |
280 | if (cfg_sedge->true_value_p ()) |
281 | { |
282 | emission_path->add_event |
283 | (event: make_unique<perpetual_start_cfg_edge_event> (args: *eedge, |
284 | args&: loc_info_from)); |
285 | emission_path->add_event |
286 | (event: make_unique<end_cfg_edge_event> |
287 | (args: *eedge, |
288 | args&: loc_info_to)); |
289 | } |
290 | else if (cfg_sedge->false_value_p ()) |
291 | { |
292 | emission_path->add_event |
293 | (event: make_unique<perpetual_start_cfg_edge_event> (args: *eedge, |
294 | args&: loc_info_from)); |
295 | emission_path->add_event |
296 | (event: make_unique<end_cfg_edge_event> |
297 | (args: *eedge, |
298 | args&: loc_info_to)); |
299 | } |
300 | else if (cfg_sedge->back_edge_p ()) |
301 | { |
302 | emission_path->add_event |
303 | (event: make_unique<precanned_custom_event> |
304 | (args&: loc_info_from, args: "looping back..." )); |
305 | emission_path->add_event |
306 | (event: make_unique<end_cfg_edge_event> |
307 | (args: *eedge, |
308 | args&: loc_info_to)); |
309 | } |
310 | } |
311 | } |
312 | |
313 | void maybe_add_sarif_properties (sarif_object &result_obj) |
314 | const final override |
315 | { |
316 | sarif_property_bag &props = result_obj.get_or_create_properties (); |
317 | #define PROPERTY_PREFIX "gcc/analyzer/infinite_loop_diagnostic/" |
318 | props.set (PROPERTY_PREFIX "inf_loop" , v: m_inf_loop->to_json ()); |
319 | #undef PROPERTY_PREFIX |
320 | } |
321 | |
322 | private: |
323 | std::unique_ptr<infinite_loop> m_inf_loop; |
324 | }; |
325 | |
326 | /* If ENODE has an in-edge corresponding to a CFG backedge, return that |
327 | exploded in-edge. |
328 | Otherwise, return nullptr. */ |
329 | |
330 | static const exploded_edge * |
331 | get_in_edge_back_edge (const exploded_node &enode) |
332 | { |
333 | for (auto in_edge : enode.m_preds) |
334 | { |
335 | const superedge *sedge = in_edge->m_sedge; |
336 | if (!sedge) |
337 | continue; |
338 | const cfg_superedge *cfg_sedge = sedge->dyn_cast_cfg_superedge (); |
339 | if (!cfg_sedge) |
340 | continue; |
341 | if (cfg_sedge->back_edge_p ()) |
342 | return in_edge; |
343 | } |
344 | return nullptr; |
345 | } |
346 | |
347 | /* Subclass of region_model_context that rejects conditional branches that |
348 | aren't known for definite. */ |
349 | |
350 | class infinite_loop_checking_context : public noop_region_model_context |
351 | { |
352 | public: |
353 | infinite_loop_checking_context () : m_unusable (false) {} |
354 | |
355 | bool checking_for_infinite_loop_p () const override { return true; } |
356 | void on_unusable_in_infinite_loop () override { m_unusable = true; } |
357 | |
358 | bool unusable_p () const { return m_unusable; } |
359 | |
360 | private: |
361 | bool m_unusable; |
362 | }; |
363 | |
364 | /* Determine if an infinite loop starts at ENODE. |
365 | Return the loop if it is found, nullptr otherwise. |
366 | |
367 | Look for cycles in the exploded graph in which: |
368 | - no externally visible work occurs |
369 | - no escape from the cycle |
370 | - the program state is "sufficiently concrete" at each step: |
371 | - no unknown activity could be occurring |
372 | - the worklist was fully drained for each enode in the cycle |
373 | i.e. every enode in the cycle is processed. */ |
374 | |
375 | static std::unique_ptr<infinite_loop> |
376 | starts_infinite_loop_p (const exploded_node &enode, |
377 | const exploded_graph &eg, |
378 | logger *logger) |
379 | { |
380 | LOG_FUNC_1 (logger, "considering EN: %i" , enode.m_index); |
381 | |
382 | /* Only consider enodes that have a CFG back edge as an in-edge. */ |
383 | if (const exploded_edge *back_edge = get_in_edge_back_edge (enode)) |
384 | { |
385 | if (logger) |
386 | logger->log (fmt: "got backedge from EN: %i" , |
387 | back_edge->m_src->m_index); |
388 | } |
389 | else |
390 | { |
391 | if (logger) |
392 | logger->log (fmt: "rejecting: no backedge in in-edges" ); |
393 | return nullptr; |
394 | } |
395 | |
396 | /* Support for dumping an .infinite-loop.dot file visualizing the |
397 | traversal for this enode. */ |
398 | std::unique_ptr<feasible_graph> fg; |
399 | feasible_node *curr_fnode = nullptr; |
400 | |
401 | if (flag_dump_analyzer_infinite_loop) |
402 | fg = ::make_unique<feasible_graph> (); |
403 | |
404 | location_t first_loc = UNKNOWN_LOCATION; |
405 | const exploded_node *iter = &enode; |
406 | feasibility_state state (*enode.get_state ().m_region_model, |
407 | eg.get_supergraph ()); |
408 | |
409 | if (fg) |
410 | curr_fnode = fg->add_node (enode: &enode, state, path_length: 0); |
411 | |
412 | hash_set<const exploded_node *> visited; |
413 | std::vector<const exploded_edge *> eedges; |
414 | while (1) |
415 | { |
416 | if (logger) |
417 | logger->log (fmt: "iter: EN: %i" , iter->m_index); |
418 | /* Analysis bailed out before processing this node. */ |
419 | if (iter->get_status () == exploded_node::STATUS_WORKLIST) |
420 | { |
421 | if (logger) |
422 | logger->log (fmt: "rejecting: EN: %i is still in worklist" , |
423 | iter->m_index); |
424 | return nullptr; |
425 | } |
426 | if (visited.contains (k: iter)) |
427 | { |
428 | /* We've looped back on ourselves. ENODE is in the loop |
429 | itself if ENODE is the first place we looped back, |
430 | as opposed to being on a path to a loop. */ |
431 | if (iter == &enode) |
432 | { |
433 | if (logger) |
434 | logger->log (fmt: "accepting: looped back to EN: %i" , |
435 | iter->m_index); |
436 | if (fg) |
437 | { |
438 | auto_timevar tv (TV_ANALYZER_DUMP); |
439 | pretty_printer pp; |
440 | pp_printf (&pp, "%s.en%i.infinite-loop.dot" , |
441 | dump_base_name, enode.m_index); |
442 | char *filename = xstrdup (pp_formatted_text (&pp)); |
443 | feasible_graph::dump_args_t dump_args (eg); |
444 | fg->dump_dot (path: filename, root_cluster: nullptr, args: dump_args); |
445 | free (ptr: filename); |
446 | } |
447 | return ::make_unique<infinite_loop> (args: enode, |
448 | args&: first_loc, |
449 | args: std::move (eedges), |
450 | args&: logger); |
451 | } |
452 | else |
453 | { |
454 | if (logger) |
455 | logger->log (fmt: "rejecting: looped back to EN: %i, not to EN: %i" , |
456 | iter->m_index, enode.m_index); |
457 | return nullptr; |
458 | } |
459 | } |
460 | visited.add (k: iter); |
461 | if (first_loc == UNKNOWN_LOCATION) |
462 | { |
463 | location_t enode_loc = iter->get_point ().get_location (); |
464 | if (enode_loc != UNKNOWN_LOCATION) |
465 | first_loc = enode_loc; |
466 | } |
467 | |
468 | /* Find the out-edges that are feasible, given the |
469 | constraints here. */ |
470 | typedef std::pair<feasibility_state, const exploded_edge *> pair_t; |
471 | std::vector<pair_t> succs; |
472 | for (auto out_edge : iter->m_succs) |
473 | { |
474 | log_scope s (logger, "considering out-edge" , |
475 | "EN:%i -> EN:%i" , |
476 | out_edge->m_src->m_index, |
477 | out_edge->m_dest->m_index); |
478 | feasibility_state next_state (state); |
479 | |
480 | /* Use this context to require edge conditions to be known, |
481 | rather than be merely "possible". */ |
482 | infinite_loop_checking_context ctxt; |
483 | if (next_state.maybe_update_for_edge (logger, |
484 | eedge: out_edge, |
485 | ctxt: &ctxt, |
486 | out_rc: nullptr)) |
487 | succs.push_back (x: pair_t (next_state, out_edge)); |
488 | if (ctxt.unusable_p ()) |
489 | { |
490 | /* If we get here, then we have e.g. a gcond where |
491 | the condition is UNKNOWN, or a condition |
492 | based on a widening_svalue. Reject such paths. */ |
493 | if (logger) |
494 | logger->log (fmt: "rejecting: unusable" ); |
495 | return nullptr; |
496 | } |
497 | } |
498 | |
499 | if (succs.size () != 1) |
500 | { |
501 | if (logger) |
502 | logger->log (fmt: "rejecting: %i feasible successors" , |
503 | (int)succs.size ()); |
504 | return nullptr; |
505 | } |
506 | const feasibility_state &next_state = succs[0].first; |
507 | const exploded_edge *out_eedge = succs[0].second; |
508 | if (out_eedge->could_do_work_p ()) |
509 | { |
510 | if (logger) |
511 | logger->log (fmt: "rejecting: edge could do work" ); |
512 | return nullptr; |
513 | } |
514 | if (fg) |
515 | { |
516 | feasible_node *next_fnode = fg->add_node (enode: out_eedge->m_dest, |
517 | state: next_state, |
518 | path_length: fg->m_nodes.length ()); |
519 | fg->add_edge (edge: new feasible_edge (curr_fnode, next_fnode, out_eedge)); |
520 | curr_fnode = next_fnode; |
521 | } |
522 | state = next_state; |
523 | eedges.push_back (x: out_eedge); |
524 | if (first_loc == UNKNOWN_LOCATION) |
525 | { |
526 | if (out_eedge->m_sedge) |
527 | if (::edge cfg_edge = out_eedge->m_sedge->get_any_cfg_edge ()) |
528 | if (cfg_edge->goto_locus > BUILTINS_LOCATION) |
529 | first_loc = cfg_edge->goto_locus; |
530 | } |
531 | iter = out_eedge->m_dest; |
532 | } |
533 | } |
534 | |
535 | /* Implementation of -Wanalyzer-infinite-loop. */ |
536 | |
537 | void |
538 | exploded_graph::detect_infinite_loops () |
539 | { |
540 | LOG_FUNC (get_logger ()); |
541 | auto_timevar tv (TV_ANALYZER_INFINITE_LOOPS); |
542 | |
543 | /* Track all enodes we've warned for; both the loop entrypoints |
544 | and all the enodes within those loops. */ |
545 | hash_set<const exploded_node *> warned_for; |
546 | |
547 | for (auto enode : m_nodes) |
548 | { |
549 | if (get_logger ()) |
550 | get_logger ()->log (fmt: "visited: %i out of %i" , |
551 | (int)warned_for.elements (), m_nodes.length ()); |
552 | |
553 | /* Only warn about the first enode we encounter in each cycle. */ |
554 | if (warned_for.contains(k: enode)) |
555 | continue; |
556 | |
557 | if (std::unique_ptr<infinite_loop> inf_loop |
558 | = starts_infinite_loop_p (enode: *enode, eg: *this, logger: get_logger ())) |
559 | { |
560 | const supernode *snode = enode->get_supernode (); |
561 | |
562 | if (get_logger ()) |
563 | get_logger ()->log (fmt: "EN: %i from starts_infinite_loop_p" , |
564 | enode->m_index); |
565 | |
566 | for (auto iter : inf_loop->m_eedge_vec) |
567 | warned_for.add (k: iter->m_src); |
568 | gcc_assert (warned_for.contains(enode)); |
569 | |
570 | if (inf_loop->m_loc == UNKNOWN_LOCATION) |
571 | { |
572 | if (get_logger ()) |
573 | get_logger ()->log |
574 | (fmt: "no location available for reporting infinite loop" ); |
575 | continue; |
576 | } |
577 | |
578 | pending_location ploc (enode, snode, inf_loop->m_loc); |
579 | auto d |
580 | = ::make_unique<infinite_loop_diagnostic> (args: std::move (inf_loop)); |
581 | get_diagnostic_manager ().add_diagnostic (ploc, d: std::move (d)); |
582 | } |
583 | } |
584 | } |
585 | |