1/* A state machine for detecting misuses of <stdio.h>'s FILE * API.
2 Copyright (C) 2019-2024 Free Software Foundation, Inc.
3 Contributed by David Malcolm <dmalcolm@redhat.com>.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful, but
13WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#define INCLUDE_MEMORY
23#include "system.h"
24#include "coretypes.h"
25#include "make-unique.h"
26#include "tree.h"
27#include "function.h"
28#include "basic-block.h"
29#include "gimple.h"
30#include "options.h"
31#include "diagnostic-path.h"
32#include "analyzer/analyzer.h"
33#include "diagnostic-event-id.h"
34#include "analyzer/analyzer-logging.h"
35#include "analyzer/sm.h"
36#include "analyzer/pending-diagnostic.h"
37#include "analyzer/function-set.h"
38#include "analyzer/analyzer-selftests.h"
39#include "selftest.h"
40#include "analyzer/call-string.h"
41#include "analyzer/program-point.h"
42#include "analyzer/store.h"
43#include "analyzer/region-model.h"
44#include "analyzer/call-details.h"
45
46#if ENABLE_ANALYZER
47
48namespace ana {
49
50namespace {
51
52/* A state machine for detecting misuses of <stdio.h>'s FILE * API. */
53
54class fileptr_state_machine : public state_machine
55{
56public:
57 fileptr_state_machine (logger *logger);
58
59 bool inherited_state_p () const final override { return false; }
60
61 state_machine::state_t
62 get_default_state (const svalue *sval) const final override
63 {
64 if (tree cst = sval->maybe_get_constant ())
65 {
66 if (zerop (cst))
67 return m_null;
68 }
69 return m_start;
70 }
71
72 bool on_stmt (sm_context *sm_ctxt,
73 const supernode *node,
74 const gimple *stmt) const final override;
75
76 void on_condition (sm_context *sm_ctxt,
77 const supernode *node,
78 const gimple *stmt,
79 const svalue *lhs,
80 enum tree_code op,
81 const svalue *rhs) const final override;
82
83 bool can_purge_p (state_t s) const final override;
84 std::unique_ptr<pending_diagnostic> on_leak (tree var) const final override;
85
86 /* State for a FILE * returned from fopen that hasn't been checked for
87 NULL.
88 It could be an open stream, or could be NULL. */
89 state_t m_unchecked;
90
91 /* State for a FILE * that's known to be NULL. */
92 state_t m_null;
93
94 /* State for a FILE * that's known to be a non-NULL open stream. */
95 state_t m_nonnull;
96
97 /* State for a FILE * that's had fclose called on it. */
98 state_t m_closed;
99
100 /* Stop state, for a FILE * we don't want to track any more. */
101 state_t m_stop;
102};
103
104/* Base class for diagnostics relative to fileptr_state_machine. */
105
106class file_diagnostic : public pending_diagnostic
107{
108public:
109 file_diagnostic (const fileptr_state_machine &sm, tree arg)
110 : m_sm (sm), m_arg (arg)
111 {}
112
113 bool subclass_equal_p (const pending_diagnostic &base_other) const override
114 {
115 return same_tree_p (t1: m_arg, t2: ((const file_diagnostic &)base_other).m_arg);
116 }
117
118 label_text describe_state_change (const evdesc::state_change &change)
119 override
120 {
121 if (change.m_old_state == m_sm.get_start_state ()
122 && change.m_new_state == m_sm.m_unchecked)
123 // TODO: verify that it's the fopen stmt, not a copy
124 return label_text::borrow (buffer: "opened here");
125 if (change.m_old_state == m_sm.m_unchecked
126 && change.m_new_state == m_sm.m_nonnull)
127 {
128 if (change.m_expr)
129 return change.formatted_print (fmt: "assuming %qE is non-NULL",
130 change.m_expr);
131 else
132 return change.formatted_print (fmt: "assuming FILE * is non-NULL");
133 }
134 if (change.m_new_state == m_sm.m_null)
135 {
136 if (change.m_expr)
137 return change.formatted_print (fmt: "assuming %qE is NULL",
138 change.m_expr);
139 else
140 return change.formatted_print (fmt: "assuming FILE * is NULL");
141 }
142 return label_text ();
143 }
144
145 diagnostic_event::meaning
146 get_meaning_for_state_change (const evdesc::state_change &change)
147 const final override
148 {
149 if (change.m_old_state == m_sm.get_start_state ()
150 && change.m_new_state == m_sm.m_unchecked)
151 return diagnostic_event::meaning (diagnostic_event::VERB_acquire,
152 diagnostic_event::NOUN_resource);
153 if (change.m_new_state == m_sm.m_closed)
154 return diagnostic_event::meaning (diagnostic_event::VERB_release,
155 diagnostic_event::NOUN_resource);
156 return diagnostic_event::meaning ();
157 }
158
159protected:
160 const fileptr_state_machine &m_sm;
161 tree m_arg;
162};
163
164class double_fclose : public file_diagnostic
165{
166public:
167 double_fclose (const fileptr_state_machine &sm, tree arg)
168 : file_diagnostic (sm, arg)
169 {}
170
171 const char *get_kind () const final override { return "double_fclose"; }
172
173 int get_controlling_option () const final override
174 {
175 return OPT_Wanalyzer_double_fclose;
176 }
177
178 bool emit (diagnostic_emission_context &ctxt) final override
179 {
180 /* CWE-1341: Multiple Releases of Same Resource or Handle. */
181 ctxt.add_cwe (cwe: 1341);
182 return ctxt.warn ("double %<fclose%> of FILE %qE",
183 m_arg);
184 }
185
186 label_text describe_state_change (const evdesc::state_change &change)
187 override
188 {
189 if (change.m_new_state == m_sm.m_closed)
190 {
191 m_first_fclose_event = change.m_event_id;
192 return change.formatted_print (fmt: "first %qs here", "fclose");
193 }
194 return file_diagnostic::describe_state_change (change);
195 }
196
197 label_text describe_final_event (const evdesc::final_event &ev) final override
198 {
199 if (m_first_fclose_event.known_p ())
200 return ev.formatted_print (fmt: "second %qs here; first %qs was at %@",
201 "fclose", "fclose",
202 &m_first_fclose_event);
203 return ev.formatted_print (fmt: "second %qs here", "fclose");
204 }
205
206private:
207 diagnostic_event_id_t m_first_fclose_event;
208};
209
210class file_leak : public file_diagnostic
211{
212public:
213 file_leak (const fileptr_state_machine &sm, tree arg)
214 : file_diagnostic (sm, arg)
215 {}
216
217 const char *get_kind () const final override { return "file_leak"; }
218
219 int get_controlling_option () const final override
220 {
221 return OPT_Wanalyzer_file_leak;
222 }
223
224 bool emit (diagnostic_emission_context &ctxt) final override
225 {
226 /* CWE-775: "Missing Release of File Descriptor or Handle after
227 Effective Lifetime". */
228 ctxt.add_cwe (cwe: 775);
229 if (m_arg)
230 return ctxt.warn ("leak of FILE %qE", m_arg);
231 else
232 return ctxt.warn ("leak of FILE");
233 }
234
235 label_text describe_state_change (const evdesc::state_change &change)
236 final override
237 {
238 if (change.m_new_state == m_sm.m_unchecked)
239 {
240 m_fopen_event = change.m_event_id;
241 return label_text::borrow (buffer: "opened here");
242 }
243 return file_diagnostic::describe_state_change (change);
244 }
245
246 label_text describe_final_event (const evdesc::final_event &ev) final override
247 {
248 if (m_fopen_event.known_p ())
249 {
250 if (ev.m_expr)
251 return ev.formatted_print (fmt: "%qE leaks here; was opened at %@",
252 ev.m_expr, &m_fopen_event);
253 else
254 return ev.formatted_print (fmt: "leaks here; was opened at %@",
255 &m_fopen_event);
256 }
257 else
258 {
259 if (ev.m_expr)
260 return ev.formatted_print (fmt: "%qE leaks here", ev.m_expr);
261 else
262 return ev.formatted_print (fmt: "leaks here");
263 }
264 }
265
266private:
267 diagnostic_event_id_t m_fopen_event;
268};
269
270/* fileptr_state_machine's ctor. */
271
272fileptr_state_machine::fileptr_state_machine (logger *logger)
273: state_machine ("file", logger),
274 m_unchecked (add_state (name: "unchecked")),
275 m_null (add_state (name: "null")),
276 m_nonnull (add_state (name: "nonnull")),
277 m_closed (add_state (name: "closed")),
278 m_stop (add_state (name: "stop"))
279{
280}
281
282/* Get a set of functions that are known to take a FILE * that must be open,
283 and are known to not close it. */
284
285static function_set
286get_file_using_fns ()
287{
288 // TODO: populate this list more fully
289 static const char * const funcnames[] = {
290 /* This array must be kept sorted. */
291 "__fbufsize",
292 "__flbf",
293 "__fpending",
294 "__fpurge",
295 "__freadable",
296 "__freading",
297 "__fsetlocking",
298 "__fwritable",
299 "__fwriting",
300 "clearerr",
301 "clearerr_unlocked",
302 "feof",
303 "feof_unlocked",
304 "ferror",
305 "ferror_unlocked",
306 "fflush", // safe to call with NULL
307 "fflush_unlocked", // safe to call with NULL
308 "fgetc",
309 "fgetc_unlocked",
310 "fgetpos",
311 "fgets",
312 "fgets_unlocked",
313 "fgetwc_unlocked",
314 "fgetws_unlocked",
315 "fileno",
316 "fileno_unlocked",
317 "fprintf",
318 "fputc",
319 "fputc_unlocked",
320 "fputs",
321 "fputs_unlocked",
322 "fputwc_unlocked",
323 "fputws_unlocked",
324 "fread_unlocked",
325 "fseek",
326 "fsetpos",
327 "ftell",
328 "fwrite_unlocked",
329 "getc",
330 "getc_unlocked",
331 "getwc_unlocked",
332 "putc",
333 "putc_unlocked",
334 "rewind",
335 "setbuf",
336 "setbuffer",
337 "setlinebuf",
338 "setvbuf",
339 "ungetc",
340 "vfprintf"
341 };
342 const size_t count = ARRAY_SIZE (funcnames);
343 function_set fs (funcnames, count);
344 return fs;
345}
346
347/* Return true if FNDECL is known to require an open FILE *, and is known
348 to not close it. */
349
350static bool
351is_file_using_fn_p (tree fndecl)
352{
353 function_set fs = get_file_using_fns ();
354 if (fs.contains_decl_p (fndecl))
355 return true;
356
357 /* Also support variants of these names prefixed with "_IO_". */
358 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
359 if (startswith (str: name, prefix: "_IO_") && fs.contains_name_p (name: name + 4))
360 return true;
361
362 return false;
363}
364
365/* Implementation of state_machine::on_stmt vfunc for fileptr_state_machine. */
366
367bool
368fileptr_state_machine::on_stmt (sm_context *sm_ctxt,
369 const supernode *node,
370 const gimple *stmt) const
371{
372 if (const gcall *call = dyn_cast <const gcall *> (p: stmt))
373 if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
374 {
375 if (is_named_call_p (fndecl: callee_fndecl, funcname: "fopen", call, num_args: 2))
376 {
377 tree lhs = gimple_call_lhs (gs: call);
378 if (lhs)
379 sm_ctxt->on_transition (node, stmt, var: lhs, from: m_start, to: m_unchecked);
380 else
381 {
382 /* TODO: report leak. */
383 }
384 return true;
385 }
386
387 if (is_named_call_p (fndecl: callee_fndecl, funcname: "fclose", call, num_args: 1))
388 {
389 tree arg = gimple_call_arg (gs: call, index: 0);
390
391 sm_ctxt->on_transition (node, stmt, var: arg, from: m_start, to: m_closed);
392
393 // TODO: is it safe to call fclose (NULL) ?
394 sm_ctxt->on_transition (node, stmt, var: arg, from: m_unchecked, to: m_closed);
395 sm_ctxt->on_transition (node, stmt, var: arg, from: m_null, to: m_closed);
396
397 sm_ctxt->on_transition (node, stmt , var: arg, from: m_nonnull, to: m_closed);
398
399 if (sm_ctxt->get_state (stmt, var: arg) == m_closed)
400 {
401 tree diag_arg = sm_ctxt->get_diagnostic_tree (expr: arg);
402 sm_ctxt->warn (node, stmt, var: arg,
403 d: make_unique<double_fclose> (args: *this, args&: diag_arg));
404 sm_ctxt->set_next_state (stmt, var: arg, to: m_stop);
405 }
406 return true;
407 }
408
409 if (is_file_using_fn_p (fndecl: callee_fndecl))
410 {
411 // TODO: operations on unchecked file
412 return true;
413 }
414 // etc
415 }
416
417 return false;
418}
419
420/* Implementation of state_machine::on_condition vfunc for
421 fileptr_state_machine.
422 Potentially transition state 'unchecked' to 'nonnull' or to 'null'. */
423
424void
425fileptr_state_machine::on_condition (sm_context *sm_ctxt,
426 const supernode *node,
427 const gimple *stmt,
428 const svalue *lhs,
429 enum tree_code op,
430 const svalue *rhs) const
431{
432 if (!rhs->all_zeroes_p ())
433 return;
434
435 // TODO: has to be a FILE *, specifically
436 if (!any_pointer_p (sval: lhs))
437 return;
438 // TODO: has to be a FILE *, specifically
439 if (!any_pointer_p (sval: rhs))
440 return;
441
442 if (op == NE_EXPR)
443 {
444 log (fmt: "got 'ARG != 0' match");
445 sm_ctxt->on_transition (node, stmt,
446 var: lhs, from: m_unchecked, to: m_nonnull);
447 }
448 else if (op == EQ_EXPR)
449 {
450 log (fmt: "got 'ARG == 0' match");
451 sm_ctxt->on_transition (node, stmt,
452 var: lhs, from: m_unchecked, to: m_null);
453 }
454}
455
456/* Implementation of state_machine::can_purge_p vfunc for fileptr_state_machine.
457 Don't allow purging of pointers in state 'unchecked' or 'nonnull'
458 (to avoid false leak reports). */
459
460bool
461fileptr_state_machine::can_purge_p (state_t s) const
462{
463 return s != m_unchecked && s != m_nonnull;
464}
465
466/* Implementation of state_machine::on_leak vfunc for
467 fileptr_state_machine, for complaining about leaks of FILE * in
468 state 'unchecked' and 'nonnull'. */
469
470std::unique_ptr<pending_diagnostic>
471fileptr_state_machine::on_leak (tree var) const
472{
473 return make_unique<file_leak> (args: *this, args&: var);
474}
475
476} // anonymous namespace
477
478/* Internal interface to this file. */
479
480state_machine *
481make_fileptr_state_machine (logger *logger)
482{
483 return new fileptr_state_machine (logger);
484}
485
486/* Handler for various stdio-related builtins that merely have external
487 effects that are out of scope for the analyzer: we only want to model
488 the effects on the return value. */
489
490class kf_stdio_output_fn : public pure_known_function_with_default_return
491{
492public:
493 bool matches_call_types_p (const call_details &) const final override
494 {
495 return true;
496 }
497
498 /* A no-op; we just want the conjured return value. */
499};
500
501/* Handler for "ferror"". */
502
503class kf_ferror : public pure_known_function_with_default_return
504{
505public:
506 bool matches_call_types_p (const call_details &cd) const final override
507 {
508 return (cd.num_args () == 1
509 && cd.arg_is_pointer_p (idx: 0));
510 }
511
512 /* No side effects. */
513};
514
515/* Handler for "fileno"". */
516
517class kf_fileno : public pure_known_function_with_default_return
518{
519public:
520 bool matches_call_types_p (const call_details &cd) const final override
521 {
522 return (cd.num_args () == 1
523 && cd.arg_is_pointer_p (idx: 0));
524 }
525
526 /* No side effects. */
527};
528
529/* Handler for "fgets" and "fgets_unlocked". */
530
531class kf_fgets : public known_function
532{
533public:
534 bool matches_call_types_p (const call_details &cd) const final override
535 {
536 return (cd.num_args () == 3
537 && cd.arg_is_pointer_p (idx: 0)
538 && cd.arg_is_pointer_p (idx: 2));
539 }
540
541 void impl_call_pre (const call_details &cd) const final override
542 {
543 /* Ideally we would bifurcate state here between the
544 error vs no error cases. */
545 region_model *model = cd.get_model ();
546 const svalue *ptr_sval = cd.get_arg_svalue (idx: 0);
547 if (const region *reg = ptr_sval->maybe_get_region ())
548 {
549 const region *base_reg = reg->get_base_region ();
550 const svalue *new_sval = cd.get_or_create_conjured_svalue (base_reg);
551 model->set_value (lhs_reg: base_reg, rhs_sval: new_sval, ctxt: cd.get_ctxt ());
552 }
553 cd.set_any_lhs_with_defaults ();
554 }
555};
556
557/* Handler for "fread".
558 size_t fread(void *restrict buffer, size_t size, size_t count,
559 FILE *restrict stream);
560 See e.g. https://en.cppreference.com/w/c/io/fread
561 and https://www.man7.org/linux/man-pages/man3/fread.3.html */
562
563class kf_fread : public known_function
564{
565public:
566 bool matches_call_types_p (const call_details &cd) const final override
567 {
568 return (cd.num_args () == 4
569 && cd.arg_is_pointer_p (idx: 0)
570 && cd.arg_is_size_p (idx: 1)
571 && cd.arg_is_size_p (idx: 2)
572 && cd.arg_is_pointer_p (idx: 3));
573 }
574
575 /* For now, assume that any call to "fread" fully clobbers the buffer
576 passed in. This isn't quite correct (e.g. errors, partial reads;
577 see PR analyzer/108689), but at least stops us falsely complaining
578 about the buffer being uninitialized. */
579 void impl_call_pre (const call_details &cd) const final override
580 {
581 region_model *model = cd.get_model ();
582 const svalue *ptr_sval = cd.get_arg_svalue (idx: 0);
583 if (const region *reg = ptr_sval->maybe_get_region ())
584 {
585 const region *base_reg = reg->get_base_region ();
586 const svalue *new_sval = cd.get_or_create_conjured_svalue (base_reg);
587 model->set_value (lhs_reg: base_reg, rhs_sval: new_sval, ctxt: cd.get_ctxt ());
588 }
589 cd.set_any_lhs_with_defaults ();
590 }
591};
592
593/* Handler for "getc"". */
594
595class kf_getc : public pure_known_function_with_default_return
596{
597public:
598 bool matches_call_types_p (const call_details &cd) const final override
599 {
600 return (cd.num_args () == 1
601 && cd.arg_is_pointer_p (idx: 0));
602 }
603};
604
605/* Handler for "getchar"". */
606
607class kf_getchar : public pure_known_function_with_default_return
608{
609public:
610 bool matches_call_types_p (const call_details &cd) const final override
611 {
612 return cd.num_args () == 0;
613 }
614
615 /* Empty. No side-effects (tracking stream state is out-of-scope
616 for the analyzer). */
617};
618
619/* Populate KFM with instances of known functions relating to
620 stdio streams. */
621
622void
623register_known_file_functions (known_function_manager &kfm)
624{
625 kfm.add (name: BUILT_IN_FPRINTF, kf: make_unique<kf_stdio_output_fn> ());
626 kfm.add (name: BUILT_IN_FPRINTF_UNLOCKED, kf: make_unique<kf_stdio_output_fn> ());
627 kfm.add (name: BUILT_IN_FPUTC, kf: make_unique<kf_stdio_output_fn> ());
628 kfm.add (name: BUILT_IN_FPUTC_UNLOCKED, kf: make_unique<kf_stdio_output_fn> ());
629 kfm.add (name: BUILT_IN_FPUTS, kf: make_unique<kf_stdio_output_fn> ());
630 kfm.add (name: BUILT_IN_FPUTS_UNLOCKED, kf: make_unique<kf_stdio_output_fn> ());
631 kfm.add (name: BUILT_IN_FWRITE, kf: make_unique<kf_stdio_output_fn> ());
632 kfm.add (name: BUILT_IN_FWRITE_UNLOCKED, kf: make_unique<kf_stdio_output_fn> ());
633 kfm.add (name: BUILT_IN_PRINTF, kf: make_unique<kf_stdio_output_fn> ());
634 kfm.add (name: BUILT_IN_PRINTF_UNLOCKED, kf: make_unique<kf_stdio_output_fn> ());
635 kfm.add (name: BUILT_IN_PUTC, kf: make_unique<kf_stdio_output_fn> ());
636 kfm.add (name: BUILT_IN_PUTCHAR, kf: make_unique<kf_stdio_output_fn> ());
637 kfm.add (name: BUILT_IN_PUTCHAR_UNLOCKED, kf: make_unique<kf_stdio_output_fn> ());
638 kfm.add (name: BUILT_IN_PUTC_UNLOCKED, kf: make_unique<kf_stdio_output_fn> ());
639 kfm.add (name: BUILT_IN_PUTS, kf: make_unique<kf_stdio_output_fn> ());
640 kfm.add (name: BUILT_IN_PUTS_UNLOCKED, kf: make_unique<kf_stdio_output_fn> ());
641 kfm.add (name: BUILT_IN_VFPRINTF, kf: make_unique<kf_stdio_output_fn> ());
642 kfm.add (name: BUILT_IN_VPRINTF, kf: make_unique<kf_stdio_output_fn> ());
643
644 kfm.add (name: "ferror", kf: make_unique<kf_ferror> ());
645 kfm.add (name: "fgets", kf: make_unique<kf_fgets> ());
646 kfm.add (name: "fgets_unlocked", kf: make_unique<kf_fgets> ()); // non-standard
647 kfm.add (name: "fileno", kf: make_unique<kf_fileno> ());
648 kfm.add (name: "fread", kf: make_unique<kf_fread> ());
649 kfm.add (name: "getc", kf: make_unique<kf_getc> ());
650 kfm.add (name: "getchar", kf: make_unique<kf_getchar> ());
651}
652
653#if CHECKING_P
654
655namespace selftest {
656
657/* Run all of the selftests within this file. */
658
659void
660analyzer_sm_file_cc_tests ()
661{
662 function_set fs = get_file_using_fns ();
663 fs.assert_sorted ();
664 fs.assert_sane ();
665}
666
667} // namespace selftest
668
669#endif /* CHECKING_P */
670
671} // namespace ana
672
673#endif /* #if ENABLE_ANALYZER */
674

source code of gcc/analyzer/sm-file.cc