1/* A state machine for tracking "taint": unsanitized uses
2 of data potentially under an attacker's control.
3
4 Copyright (C) 2019-2024 Free Software Foundation, Inc.
5 Contributed by David Malcolm <dmalcolm@redhat.com>.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 3, or (at your option)
12any later version.
13
14GCC is distributed in the hope that it will be useful, but
15WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#define INCLUDE_MEMORY
25#include "system.h"
26#include "coretypes.h"
27#include "make-unique.h"
28#include "tree.h"
29#include "function.h"
30#include "basic-block.h"
31#include "gimple.h"
32#include "options.h"
33#include "diagnostic-path.h"
34#include "analyzer/analyzer.h"
35#include "analyzer/analyzer-logging.h"
36#include "gimple-iterator.h"
37#include "ordered-hash-map.h"
38#include "cgraph.h"
39#include "cfg.h"
40#include "digraph.h"
41#include "stringpool.h"
42#include "attribs.h"
43#include "fold-const.h"
44#include "analyzer/supergraph.h"
45#include "analyzer/call-string.h"
46#include "analyzer/program-point.h"
47#include "analyzer/store.h"
48#include "analyzer/region-model.h"
49#include "analyzer/sm.h"
50#include "analyzer/program-state.h"
51#include "analyzer/pending-diagnostic.h"
52#include "analyzer/constraint-manager.h"
53#include "diagnostic-format-sarif.h"
54
55#if ENABLE_ANALYZER
56
57namespace ana {
58
59namespace {
60
61/* An enum for describing tainted values. */
62
63enum bounds
64{
65 /* This tainted value has no upper or lower bound. */
66 BOUNDS_NONE,
67
68 /* This tainted value has an upper bound but not lower bound. */
69 BOUNDS_UPPER,
70
71 /* This tainted value has a lower bound but no upper bound. */
72 BOUNDS_LOWER
73};
74
75static const char *
76bounds_to_str (enum bounds b)
77{
78 switch (b)
79 {
80 default:
81 gcc_unreachable ();
82 case BOUNDS_NONE:
83 return "BOUNDS_NONE";
84 case BOUNDS_UPPER:
85 return "BOUNDS_UPPER";
86 case BOUNDS_LOWER:
87 return "BOUNDS_LOWER";
88 }
89}
90
91/* An experimental state machine, for tracking "taint": unsanitized uses
92 of data potentially under an attacker's control. */
93
94class taint_state_machine : public state_machine
95{
96public:
97 taint_state_machine (logger *logger);
98
99 bool inherited_state_p () const final override { return true; }
100
101 state_t alt_get_inherited_state (const sm_state_map &map,
102 const svalue *sval,
103 const extrinsic_state &ext_state)
104 const final override;
105
106 bool
107 has_alt_get_inherited_state_p () const final override
108 {
109 return true;
110 }
111
112 bool on_stmt (sm_context *sm_ctxt,
113 const supernode *node,
114 const gimple *stmt) const final override;
115
116 void on_condition (sm_context *sm_ctxt,
117 const supernode *node,
118 const gimple *stmt,
119 const svalue *lhs,
120 enum tree_code op,
121 const svalue *rhs) const final override;
122 void on_bounded_ranges (sm_context *sm_ctxt,
123 const supernode *node,
124 const gimple *stmt,
125 const svalue &sval,
126 const bounded_ranges &ranges) const final override;
127
128 bool can_purge_p (state_t s) const final override;
129
130 bool get_taint (state_t s, tree type, enum bounds *out) const;
131
132 state_t combine_states (state_t s0, state_t s1) const;
133
134private:
135 void check_control_flow_arg_for_taint (sm_context *sm_ctxt,
136 const gimple *stmt,
137 tree expr) const;
138
139 void check_for_tainted_size_arg (sm_context *sm_ctxt,
140 const supernode *node,
141 const gcall *call,
142 tree callee_fndecl) const;
143 void check_for_tainted_divisor (sm_context *sm_ctxt,
144 const supernode *node,
145 const gassign *assign) const;
146
147public:
148 /* State for a "tainted" value: unsanitized data potentially under an
149 attacker's control. */
150 state_t m_tainted;
151
152 /* State for a "tainted" value that has a lower bound. */
153 state_t m_has_lb;
154
155 /* State for a "tainted" value that has an upper bound. */
156 state_t m_has_ub;
157
158 /* Stop state, for a value we don't want to track any more. */
159 state_t m_stop;
160
161 /* Global state, for when the last condition had tainted arguments. */
162 state_t m_tainted_control_flow;
163};
164
165/* Class for diagnostics relating to taint_state_machine. */
166
167class taint_diagnostic : public pending_diagnostic
168{
169public:
170 taint_diagnostic (const taint_state_machine &sm, tree arg,
171 enum bounds has_bounds)
172 : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds)
173 {}
174
175 bool subclass_equal_p (const pending_diagnostic &base_other) const override
176 {
177 const taint_diagnostic &other = (const taint_diagnostic &)base_other;
178 return (same_tree_p (t1: m_arg, t2: other.m_arg)
179 && m_has_bounds == other.m_has_bounds);
180 }
181
182 label_text describe_state_change (const evdesc::state_change &change) override
183 {
184 if (change.m_new_state == m_sm.m_tainted)
185 {
186 if (change.m_origin)
187 return change.formatted_print (fmt: "%qE has an unchecked value here"
188 " (from %qE)",
189 change.m_expr, change.m_origin);
190 else
191 return change.formatted_print (fmt: "%qE gets an unchecked value here",
192 change.m_expr);
193 }
194 else if (change.m_new_state == m_sm.m_has_lb)
195 return change.formatted_print (fmt: "%qE has its lower bound checked here",
196 change.m_expr);
197 else if (change.m_new_state == m_sm.m_has_ub)
198 return change.formatted_print (fmt: "%qE has its upper bound checked here",
199 change.m_expr);
200 return label_text ();
201 }
202
203 diagnostic_event::meaning
204 get_meaning_for_state_change (const evdesc::state_change &change)
205 const final override
206 {
207 if (change.m_new_state == m_sm.m_tainted)
208 return diagnostic_event::meaning (diagnostic_event::VERB_acquire,
209 diagnostic_event::NOUN_taint);
210 return diagnostic_event::meaning ();
211 }
212
213 void maybe_add_sarif_properties (sarif_object &result_obj)
214 const override
215 {
216 sarif_property_bag &props = result_obj.get_or_create_properties ();
217#define PROPERTY_PREFIX "gcc/analyzer/taint_diagnostic/"
218 props.set (PROPERTY_PREFIX "arg", v: tree_to_json (node: m_arg));
219 props.set_string (PROPERTY_PREFIX "has_bounds",
220 utf8_value: bounds_to_str (b: m_has_bounds));
221#undef PROPERTY_PREFIX
222 }
223
224protected:
225 const taint_state_machine &m_sm;
226 tree m_arg;
227 enum bounds m_has_bounds;
228};
229
230/* Concrete taint_diagnostic subclass for reporting attacker-controlled
231 array index. */
232
233class tainted_array_index : public taint_diagnostic
234{
235public:
236 tainted_array_index (const taint_state_machine &sm, tree arg,
237 enum bounds has_bounds)
238 : taint_diagnostic (sm, arg, has_bounds)
239 {}
240
241 const char *get_kind () const final override { return "tainted_array_index"; }
242
243 int get_controlling_option () const final override
244 {
245 return OPT_Wanalyzer_tainted_array_index;
246 }
247
248 bool emit (diagnostic_emission_context &ctxt) final override
249 {
250 /* CWE-129: "Improper Validation of Array Index". */
251 ctxt.add_cwe (cwe: 129);
252 if (m_arg)
253 switch (m_has_bounds)
254 {
255 default:
256 gcc_unreachable ();
257 case BOUNDS_NONE:
258 return ctxt.warn ("use of attacker-controlled value %qE"
259 " in array lookup without bounds checking",
260 m_arg);
261 break;
262 case BOUNDS_UPPER:
263 return ctxt.warn ("use of attacker-controlled value %qE"
264 " in array lookup without checking for negative",
265 m_arg);
266 break;
267 case BOUNDS_LOWER:
268 return ctxt.warn ("use of attacker-controlled value %qE"
269 " in array lookup without upper-bounds checking",
270 m_arg);
271 break;
272 }
273 else
274 switch (m_has_bounds)
275 {
276 default:
277 gcc_unreachable ();
278 case BOUNDS_NONE:
279 return ctxt.warn ("use of attacker-controlled value"
280 " in array lookup without bounds checking");
281 break;
282 case BOUNDS_UPPER:
283 return ctxt.warn ("use of attacker-controlled value"
284 " in array lookup without checking for"
285 " negative");
286 break;
287 case BOUNDS_LOWER:
288 return ctxt.warn ("use of attacker-controlled value"
289 " in array lookup without upper-bounds"
290 " checking");
291 break;
292 }
293 }
294
295 label_text describe_final_event (const evdesc::final_event &ev) final override
296 {
297 if (m_arg)
298 switch (m_has_bounds)
299 {
300 default:
301 gcc_unreachable ();
302 case BOUNDS_NONE:
303 return ev.formatted_print
304 (fmt: "use of attacker-controlled value %qE in array lookup"
305 " without bounds checking",
306 m_arg);
307 case BOUNDS_UPPER:
308 return ev.formatted_print
309 (fmt: "use of attacker-controlled value %qE"
310 " in array lookup without checking for negative",
311 m_arg);
312 case BOUNDS_LOWER:
313 return ev.formatted_print
314 (fmt: "use of attacker-controlled value %qE"
315 " in array lookup without upper-bounds checking",
316 m_arg);
317 }
318 else
319 switch (m_has_bounds)
320 {
321 default:
322 gcc_unreachable ();
323 case BOUNDS_NONE:
324 return ev.formatted_print
325 (fmt: "use of attacker-controlled value in array lookup"
326 " without bounds checking");
327 case BOUNDS_UPPER:
328 return ev.formatted_print
329 (fmt: "use of attacker-controlled value"
330 " in array lookup without checking for negative");
331 case BOUNDS_LOWER:
332 return ev.formatted_print
333 (fmt: "use of attacker-controlled value"
334 " in array lookup without upper-bounds checking");
335 }
336 }
337};
338
339/* Concrete taint_diagnostic subclass for reporting attacker-controlled
340 pointer offset. */
341
342class tainted_offset : public taint_diagnostic
343{
344public:
345 tainted_offset (const taint_state_machine &sm, tree arg,
346 enum bounds has_bounds,
347 const svalue *offset)
348 : taint_diagnostic (sm, arg, has_bounds),
349 m_offset (offset)
350 {}
351
352 const char *get_kind () const final override { return "tainted_offset"; }
353
354 int get_controlling_option () const final override
355 {
356 return OPT_Wanalyzer_tainted_offset;
357 }
358
359 bool emit (diagnostic_emission_context &ctxt) final override
360 {
361 /* CWE-823: "Use of Out-of-range Pointer Offset". */
362 ctxt.add_cwe (cwe: 823);
363 if (m_arg)
364 switch (m_has_bounds)
365 {
366 default:
367 gcc_unreachable ();
368 case BOUNDS_NONE:
369 return ctxt.warn ("use of attacker-controlled value %qE as offset"
370 " without bounds checking",
371 m_arg);
372 break;
373 case BOUNDS_UPPER:
374 return ctxt.warn ("use of attacker-controlled value %qE as offset"
375 " without lower-bounds checking",
376 m_arg);
377 break;
378 case BOUNDS_LOWER:
379 return ctxt.warn ("use of attacker-controlled value %qE as offset"
380 " without upper-bounds checking",
381 m_arg);
382 break;
383 }
384 else
385 switch (m_has_bounds)
386 {
387 default:
388 gcc_unreachable ();
389 case BOUNDS_NONE:
390 return ctxt.warn ("use of attacker-controlled value as offset"
391 " without bounds checking");
392 break;
393 case BOUNDS_UPPER:
394 return ctxt.warn ("use of attacker-controlled value as offset"
395 " without lower-bounds checking");
396 break;
397 case BOUNDS_LOWER:
398 return ctxt.warn ("use of attacker-controlled value as offset"
399 " without upper-bounds checking");
400 break;
401 }
402 }
403
404 label_text describe_final_event (const evdesc::final_event &ev) final override
405 {
406 if (m_arg)
407 switch (m_has_bounds)
408 {
409 default:
410 gcc_unreachable ();
411 case BOUNDS_NONE:
412 return ev.formatted_print (fmt: "use of attacker-controlled value %qE"
413 " as offset without bounds checking",
414 m_arg);
415 case BOUNDS_UPPER:
416 return ev.formatted_print (fmt: "use of attacker-controlled value %qE"
417 " as offset without lower-bounds checking",
418 m_arg);
419 case BOUNDS_LOWER:
420 return ev.formatted_print (fmt: "use of attacker-controlled value %qE"
421 " as offset without upper-bounds checking",
422 m_arg);
423 }
424 else
425 switch (m_has_bounds)
426 {
427 default:
428 gcc_unreachable ();
429 case BOUNDS_NONE:
430 return ev.formatted_print (fmt: "use of attacker-controlled value"
431 " as offset without bounds checking");
432 case BOUNDS_UPPER:
433 return ev.formatted_print (fmt: "use of attacker-controlled value"
434 " as offset without lower-bounds"
435 " checking");
436 case BOUNDS_LOWER:
437 return ev.formatted_print (fmt: "use of attacker-controlled value"
438 " as offset without upper-bounds"
439 " checking");
440 }
441 }
442
443 void maybe_add_sarif_properties (sarif_object &result_obj)
444 const final override
445 {
446 taint_diagnostic::maybe_add_sarif_properties (result_obj);
447 sarif_property_bag &props = result_obj.get_or_create_properties ();
448#define PROPERTY_PREFIX "gcc/analyzer/tainted_offset/"
449 props.set (PROPERTY_PREFIX "offset", v: m_offset->to_json ());
450#undef PROPERTY_PREFIX
451 }
452
453private:
454 const svalue *m_offset;
455};
456
457/* Concrete taint_diagnostic subclass for reporting attacker-controlled
458 size. */
459
460class tainted_size : public taint_diagnostic
461{
462public:
463 tainted_size (const taint_state_machine &sm, tree arg,
464 enum bounds has_bounds)
465 : taint_diagnostic (sm, arg, has_bounds)
466 {}
467
468 const char *get_kind () const override { return "tainted_size"; }
469
470 int get_controlling_option () const final override
471 {
472 return OPT_Wanalyzer_tainted_size;
473 }
474
475 bool emit (diagnostic_emission_context &ctxt) override
476 {
477 /* "CWE-129: Improper Validation of Array Index". */
478 ctxt.add_cwe (cwe: 129);
479 if (m_arg)
480 switch (m_has_bounds)
481 {
482 default:
483 gcc_unreachable ();
484 case BOUNDS_NONE:
485 return ctxt.warn ("use of attacker-controlled value %qE as size"
486 " without bounds checking",
487 m_arg);
488 break;
489 case BOUNDS_UPPER:
490 return ctxt.warn ("use of attacker-controlled value %qE as size"
491 " without lower-bounds checking",
492 m_arg);
493 break;
494 case BOUNDS_LOWER:
495 return ctxt.warn ("use of attacker-controlled value %qE as size"
496 " without upper-bounds checking",
497 m_arg);
498 break;
499 }
500 else
501 switch (m_has_bounds)
502 {
503 default:
504 gcc_unreachable ();
505 case BOUNDS_NONE:
506 return ctxt.warn ("use of attacker-controlled value as size"
507 " without bounds checking");
508 break;
509 case BOUNDS_UPPER:
510 return ctxt.warn ("use of attacker-controlled value as size"
511 " without lower-bounds checking");
512 break;
513 case BOUNDS_LOWER:
514 return ctxt.warn ("use of attacker-controlled value as size"
515 " without upper-bounds checking");
516 break;
517 }
518 }
519
520 label_text describe_final_event (const evdesc::final_event &ev) final override
521 {
522 if (m_arg)
523 switch (m_has_bounds)
524 {
525 default:
526 gcc_unreachable ();
527 case BOUNDS_NONE:
528 return ev.formatted_print (fmt: "use of attacker-controlled value %qE"
529 " as size without bounds checking",
530 m_arg);
531 case BOUNDS_UPPER:
532 return ev.formatted_print (fmt: "use of attacker-controlled value %qE"
533 " as size without lower-bounds checking",
534 m_arg);
535 case BOUNDS_LOWER:
536 return ev.formatted_print (fmt: "use of attacker-controlled value %qE"
537 " as size without upper-bounds checking",
538 m_arg);
539 }
540 else
541 switch (m_has_bounds)
542 {
543 default:
544 gcc_unreachable ();
545 case BOUNDS_NONE:
546 return ev.formatted_print (fmt: "use of attacker-controlled value"
547 " as size without bounds checking");
548 case BOUNDS_UPPER:
549 return ev.formatted_print (fmt: "use of attacker-controlled value"
550 " as size without lower-bounds checking");
551 case BOUNDS_LOWER:
552 return ev.formatted_print (fmt: "use of attacker-controlled value"
553 " as size without upper-bounds checking");
554 }
555 }
556};
557
558/* Subclass of tainted_size for reporting on tainted size values
559 passed to an external function annotated with attribute "access". */
560
561class tainted_access_attrib_size : public tainted_size
562{
563public:
564 tainted_access_attrib_size (const taint_state_machine &sm, tree arg,
565 enum bounds has_bounds, tree callee_fndecl,
566 unsigned size_argno, const char *access_str)
567 : tainted_size (sm, arg, has_bounds),
568 m_callee_fndecl (callee_fndecl),
569 m_size_argno (size_argno), m_access_str (access_str)
570 {
571 }
572
573 const char *get_kind () const override
574 {
575 return "tainted_access_attrib_size";
576 }
577
578 bool emit (diagnostic_emission_context &ctxt) final override
579 {
580 bool warned = tainted_size::emit (ctxt);
581 if (warned)
582 {
583 inform (DECL_SOURCE_LOCATION (m_callee_fndecl),
584 "parameter %i of %qD marked as a size via attribute %qs",
585 m_size_argno + 1, m_callee_fndecl, m_access_str);
586 }
587 return warned;
588 }
589
590private:
591 tree m_callee_fndecl;
592 unsigned m_size_argno;
593 const char *m_access_str;
594};
595
596/* Concrete taint_diagnostic subclass for reporting attacker-controlled
597 divisor (so that an attacker can trigger a divide by zero). */
598
599class tainted_divisor : public taint_diagnostic
600{
601public:
602 tainted_divisor (const taint_state_machine &sm, tree arg,
603 enum bounds has_bounds)
604 : taint_diagnostic (sm, arg, has_bounds)
605 {}
606
607 const char *get_kind () const final override { return "tainted_divisor"; }
608
609 int get_controlling_option () const final override
610 {
611 return OPT_Wanalyzer_tainted_divisor;
612 }
613
614 bool emit (diagnostic_emission_context &ctxt) final override
615 {
616 /* CWE-369: "Divide By Zero". */
617 ctxt.add_cwe (cwe: 369);
618 if (m_arg)
619 return ctxt.warn ("use of attacker-controlled value %qE as divisor"
620 " without checking for zero",
621 m_arg);
622 else
623 return ctxt.warn ("use of attacker-controlled value as divisor"
624 " without checking for zero");
625 }
626
627 label_text describe_final_event (const evdesc::final_event &ev) final override
628 {
629 if (m_arg)
630 return ev.formatted_print
631 (fmt: "use of attacker-controlled value %qE as divisor"
632 " without checking for zero",
633 m_arg);
634 else
635 return ev.formatted_print
636 (fmt: "use of attacker-controlled value as divisor"
637 " without checking for zero");
638 }
639};
640
641/* Concrete taint_diagnostic subclass for reporting attacker-controlled
642 size of a dynamic allocation. */
643
644class tainted_allocation_size : public taint_diagnostic
645{
646public:
647 tainted_allocation_size (const taint_state_machine &sm, tree arg,
648 const svalue *size_in_bytes,
649 enum bounds has_bounds, enum memory_space mem_space)
650 : taint_diagnostic (sm, arg, has_bounds),
651 m_size_in_bytes (size_in_bytes),
652 m_mem_space (mem_space)
653 {
654 }
655
656 const char *get_kind () const final override
657 {
658 return "tainted_allocation_size";
659 }
660
661 bool subclass_equal_p (const pending_diagnostic &base_other) const override
662 {
663 if (!taint_diagnostic::subclass_equal_p (base_other))
664 return false;
665 const tainted_allocation_size &other
666 = (const tainted_allocation_size &)base_other;
667 return m_mem_space == other.m_mem_space;
668 }
669
670 int get_controlling_option () const final override
671 {
672 return OPT_Wanalyzer_tainted_allocation_size;
673 }
674
675 bool emit (diagnostic_emission_context &ctxt) final override
676 {
677 /* "CWE-789: Memory Allocation with Excessive Size Value". */
678 ctxt.add_cwe (cwe: 789);
679
680 bool warned;
681 if (m_arg)
682 switch (m_has_bounds)
683 {
684 default:
685 gcc_unreachable ();
686 case BOUNDS_NONE:
687 warned = ctxt.warn ("use of attacker-controlled value %qE as"
688 " allocation size without bounds checking",
689 m_arg);
690 break;
691 case BOUNDS_UPPER:
692 warned = ctxt.warn ("use of attacker-controlled value %qE as"
693 " allocation size without"
694 " lower-bounds checking",
695 m_arg);
696 break;
697 case BOUNDS_LOWER:
698 warned = ctxt.warn ("use of attacker-controlled value %qE as"
699 " allocation size without"
700 " upper-bounds checking",
701 m_arg);
702 break;
703 }
704 else
705 switch (m_has_bounds)
706 {
707 default:
708 gcc_unreachable ();
709 case BOUNDS_NONE:
710 warned = ctxt.warn ("use of attacker-controlled value as"
711 " allocation size without bounds"
712 " checking");
713 break;
714 case BOUNDS_UPPER:
715 warned = ctxt.warn ("use of attacker-controlled value as"
716 " allocation size without"
717 " lower-bounds checking");
718 break;
719 case BOUNDS_LOWER:
720 warned = ctxt.warn ("use of attacker-controlled value as"
721 " allocation size without"
722 " upper-bounds checking");
723 break;
724 }
725 if (warned)
726 {
727 const location_t loc = ctxt.get_location ();
728 switch (m_mem_space)
729 {
730 default:
731 break;
732 case MEMSPACE_STACK:
733 inform (loc, "stack-based allocation");
734 break;
735 case MEMSPACE_HEAP:
736 inform (loc, "heap-based allocation");
737 break;
738 }
739 }
740 return warned;
741 }
742
743 label_text describe_final_event (const evdesc::final_event &ev) final override
744 {
745 if (m_arg)
746 switch (m_has_bounds)
747 {
748 default:
749 gcc_unreachable ();
750 case BOUNDS_NONE:
751 return ev.formatted_print
752 (fmt: "use of attacker-controlled value %qE as allocation size"
753 " without bounds checking",
754 m_arg);
755 case BOUNDS_UPPER:
756 return ev.formatted_print
757 (fmt: "use of attacker-controlled value %qE as allocation size"
758 " without lower-bounds checking",
759 m_arg);
760 case BOUNDS_LOWER:
761 return ev.formatted_print
762 (fmt: "use of attacker-controlled value %qE as allocation size"
763 " without upper-bounds checking",
764 m_arg);
765 }
766 else
767 switch (m_has_bounds)
768 {
769 default:
770 gcc_unreachable ();
771 case BOUNDS_NONE:
772 return ev.formatted_print
773 (fmt: "use of attacker-controlled value as allocation size"
774 " without bounds checking");
775 case BOUNDS_UPPER:
776 return ev.formatted_print
777 (fmt: "use of attacker-controlled value as allocation size"
778 " without lower-bounds checking");
779 case BOUNDS_LOWER:
780 return ev.formatted_print
781 (fmt: "use of attacker-controlled value as allocation size"
782 " without upper-bounds checking");
783 }
784 }
785
786 void maybe_add_sarif_properties (sarif_object &result_obj)
787 const final override
788 {
789 taint_diagnostic::maybe_add_sarif_properties (result_obj);
790 sarif_property_bag &props = result_obj.get_or_create_properties ();
791#define PROPERTY_PREFIX "gcc/analyzer/tainted_allocation_size/"
792 props.set (PROPERTY_PREFIX "size_in_bytes", v: m_size_in_bytes->to_json ());
793#undef PROPERTY_PREFIX
794 }
795
796private:
797 const svalue *m_size_in_bytes;
798 enum memory_space m_mem_space;
799};
800
801/* Concrete taint_diagnostic subclass for reporting attacker-controlled
802 value being used as part of the condition of an assertion. */
803
804class tainted_assertion : public taint_diagnostic
805{
806public:
807 tainted_assertion (const taint_state_machine &sm, tree arg,
808 tree assert_failure_fndecl)
809 : taint_diagnostic (sm, arg, BOUNDS_NONE),
810 m_assert_failure_fndecl (assert_failure_fndecl)
811 {
812 gcc_assert (m_assert_failure_fndecl);
813 }
814
815 const char *get_kind () const final override
816 {
817 return "tainted_assertion";
818 }
819
820 bool subclass_equal_p (const pending_diagnostic &base_other) const override
821 {
822 if (!taint_diagnostic::subclass_equal_p (base_other))
823 return false;
824 const tainted_assertion &other
825 = (const tainted_assertion &)base_other;
826 return m_assert_failure_fndecl == other.m_assert_failure_fndecl;
827 }
828
829 int get_controlling_option () const final override
830 {
831 return OPT_Wanalyzer_tainted_assertion;
832 }
833
834 bool emit (diagnostic_emission_context &ctxt) final override
835 {
836 /* "CWE-617: Reachable Assertion". */
837 ctxt.add_cwe (cwe: 617);
838
839 return ctxt.warn ("use of attacked-controlled value in"
840 " condition for assertion");
841 }
842
843 location_t fixup_location (location_t loc,
844 bool primary) const final override
845 {
846 if (primary)
847 /* For the primary location we want to avoid being in e.g. the
848 <assert.h> system header, since this would suppress the
849 diagnostic. */
850 return expansion_point_location_if_in_system_header (loc);
851 else if (in_system_header_at (loc))
852 /* For events, we want to show the implemenation of the assert
853 macro when we're describing them. */
854 return linemap_resolve_location (line_table, loc,
855 lrk: LRK_SPELLING_LOCATION,
856 NULL);
857 else
858 return pending_diagnostic::fixup_location (loc, primary);
859 }
860
861 label_text describe_state_change (const evdesc::state_change &change) override
862 {
863 if (change.m_new_state == m_sm.m_tainted_control_flow)
864 return change.formatted_print
865 (fmt: "use of attacker-controlled value for control flow");
866 return taint_diagnostic::describe_state_change (change);
867 }
868
869 label_text describe_final_event (const evdesc::final_event &ev) final override
870 {
871 if (mention_noreturn_attribute_p ())
872 return ev.formatted_print
873 (fmt: "treating %qE as an assertion failure handler"
874 " due to %<__attribute__((__noreturn__))%>",
875 m_assert_failure_fndecl);
876 else
877 return ev.formatted_print
878 (fmt: "treating %qE as an assertion failure handler",
879 m_assert_failure_fndecl);
880 }
881
882private:
883 bool mention_noreturn_attribute_p () const
884 {
885 if (fndecl_built_in_p (node: m_assert_failure_fndecl, name1: BUILT_IN_UNREACHABLE))
886 return false;
887 return true;
888 }
889
890 tree m_assert_failure_fndecl;
891};
892
893/* taint_state_machine's ctor. */
894
895taint_state_machine::taint_state_machine (logger *logger)
896: state_machine ("taint", logger),
897 m_tainted (add_state (name: "tainted")),
898 m_has_lb (add_state (name: "has_lb")),
899 m_has_ub (add_state (name: "has_ub")),
900 m_stop (add_state (name: "stop")),
901 m_tainted_control_flow (add_state (name: "tainted-control-flow"))
902{
903}
904
905state_machine::state_t
906taint_state_machine::alt_get_inherited_state (const sm_state_map &map,
907 const svalue *sval,
908 const extrinsic_state &ext_state)
909 const
910{
911 switch (sval->get_kind ())
912 {
913 default:
914 break;
915 case SK_UNARYOP:
916 {
917 const unaryop_svalue *unaryop_sval
918 = as_a <const unaryop_svalue *> (p: sval);
919 enum tree_code op = unaryop_sval->get_op ();
920 const svalue *arg = unaryop_sval->get_arg ();
921 switch (op)
922 {
923 case NOP_EXPR:
924 {
925 state_t arg_state = map.get_state (sval: arg, ext_state);
926 return arg_state;
927 }
928 default:
929 break;
930 }
931 }
932 break;
933 case SK_BINOP:
934 {
935 const binop_svalue *binop_sval = as_a <const binop_svalue *> (p: sval);
936 enum tree_code op = binop_sval->get_op ();
937 const svalue *arg0 = binop_sval->get_arg0 ();
938 const svalue *arg1 = binop_sval->get_arg1 ();
939 switch (op)
940 {
941 default:
942 break;
943
944 case EQ_EXPR:
945 case GE_EXPR:
946 case LE_EXPR:
947 case NE_EXPR:
948 case GT_EXPR:
949 case LT_EXPR:
950 case UNORDERED_EXPR:
951 case ORDERED_EXPR:
952 case PLUS_EXPR:
953 case MINUS_EXPR:
954 case MULT_EXPR:
955 case POINTER_PLUS_EXPR:
956 case TRUNC_DIV_EXPR:
957 {
958 state_t arg0_state = map.get_state (sval: arg0, ext_state);
959 state_t arg1_state = map.get_state (sval: arg1, ext_state);
960 return combine_states (s0: arg0_state, s1: arg1_state);
961 }
962 break;
963
964 case TRUNC_MOD_EXPR:
965 {
966 /* The left-hand side of X % Y can be sanitized by
967 the operation. */
968 return map.get_state (sval: arg1, ext_state);
969 }
970 break;
971
972 case BIT_AND_EXPR:
973 case RSHIFT_EXPR:
974 return NULL;
975 }
976 }
977 break;
978 }
979 return NULL;
980}
981
982/* Return true iff FNDECL should be considered to be an assertion failure
983 handler by -Wanalyzer-tainted-assertion. */
984
985static bool
986is_assertion_failure_handler_p (tree fndecl)
987{
988 // i.e. "noreturn"
989 if (TREE_THIS_VOLATILE (fndecl))
990 return true;
991
992 return false;
993}
994
995/* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
996
997bool
998taint_state_machine::on_stmt (sm_context *sm_ctxt,
999 const supernode *node,
1000 const gimple *stmt) const
1001{
1002 if (const gcall *call = dyn_cast <const gcall *> (p: stmt))
1003 if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
1004 {
1005 if (is_named_call_p (fndecl: callee_fndecl, funcname: "fread", call, num_args: 4))
1006 {
1007 tree arg = gimple_call_arg (gs: call, index: 0);
1008
1009 sm_ctxt->on_transition (node, stmt, var: arg, from: m_start, to: m_tainted);
1010
1011 /* Dereference an ADDR_EXPR. */
1012 // TODO: should the engine do this?
1013 if (TREE_CODE (arg) == ADDR_EXPR)
1014 sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
1015 from: m_start, to: m_tainted);
1016 return true;
1017 }
1018
1019 /* External function with "access" attribute. */
1020 if (sm_ctxt->unknown_side_effects_p ())
1021 check_for_tainted_size_arg (sm_ctxt, node, call, callee_fndecl);
1022
1023 if (is_assertion_failure_handler_p (fndecl: callee_fndecl)
1024 && sm_ctxt->get_global_state () == m_tainted_control_flow)
1025 {
1026 sm_ctxt->warn (node, stmt: call, NULL_TREE,
1027 d: make_unique<tainted_assertion> (args: *this, NULL_TREE,
1028 args&: callee_fndecl));
1029 }
1030 }
1031 // TODO: ...etc; many other sources of untrusted data
1032
1033 if (const gassign *assign = dyn_cast <const gassign *> (p: stmt))
1034 {
1035 enum tree_code op = gimple_assign_rhs_code (gs: assign);
1036
1037 switch (op)
1038 {
1039 default:
1040 break;
1041 case TRUNC_DIV_EXPR:
1042 case CEIL_DIV_EXPR:
1043 case FLOOR_DIV_EXPR:
1044 case ROUND_DIV_EXPR:
1045 case TRUNC_MOD_EXPR:
1046 case CEIL_MOD_EXPR:
1047 case FLOOR_MOD_EXPR:
1048 case ROUND_MOD_EXPR:
1049 case RDIV_EXPR:
1050 case EXACT_DIV_EXPR:
1051 check_for_tainted_divisor (sm_ctxt, node, assign);
1052 break;
1053 }
1054 }
1055
1056 if (const gcond *cond = dyn_cast <const gcond *> (p: stmt))
1057 {
1058 /* Reset the state of "tainted-control-flow" before each
1059 control flow statement, so that only the last one before
1060 an assertion-failure-handler counts. */
1061 sm_ctxt->set_global_state (m_start);
1062 check_control_flow_arg_for_taint (sm_ctxt, stmt: cond, expr: gimple_cond_lhs (gs: cond));
1063 check_control_flow_arg_for_taint (sm_ctxt, stmt: cond, expr: gimple_cond_rhs (gs: cond));
1064 }
1065
1066 if (const gswitch *switch_ = dyn_cast <const gswitch *> (p: stmt))
1067 {
1068 /* Reset the state of "tainted-control-flow" before each
1069 control flow statement, so that only the last one before
1070 an assertion-failure-handler counts. */
1071 sm_ctxt->set_global_state (m_start);
1072 check_control_flow_arg_for_taint (sm_ctxt, stmt: switch_,
1073 expr: gimple_switch_index (gs: switch_));
1074 }
1075
1076 return false;
1077}
1078
1079/* If EXPR is tainted, mark this execution path with the
1080 "tainted-control-flow" global state, in case we're about
1081 to call an assertion-failure-handler. */
1082
1083void
1084taint_state_machine::check_control_flow_arg_for_taint (sm_context *sm_ctxt,
1085 const gimple *stmt,
1086 tree expr) const
1087{
1088 const region_model *old_model = sm_ctxt->get_old_region_model ();
1089 const svalue *sval = old_model->get_rvalue (expr, NULL);
1090 state_t state = sm_ctxt->get_state (stmt, sval);
1091 enum bounds b;
1092 if (get_taint (s: state, TREE_TYPE (expr), out: &b))
1093 sm_ctxt->set_global_state (m_tainted_control_flow);
1094}
1095
1096/* Implementation of state_machine::on_condition vfunc for taint_state_machine.
1097 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1098 and states 'has_ub' and 'has_lb' to 'stop'. */
1099
1100void
1101taint_state_machine::on_condition (sm_context *sm_ctxt,
1102 const supernode *node,
1103 const gimple *stmt,
1104 const svalue *lhs,
1105 enum tree_code op,
1106 const svalue *rhs) const
1107{
1108 if (stmt == NULL)
1109 return;
1110
1111 if (lhs->get_kind () == SK_UNKNOWN
1112 || rhs->get_kind () == SK_UNKNOWN)
1113 {
1114 /* If we have a comparison against UNKNOWN, then
1115 we've presumably hit the svalue complexity limit,
1116 and we don't know what is being sanitized.
1117 Give up on any taint already found on this execution path. */
1118 // TODO: warn about this
1119 if (get_logger ())
1120 get_logger ()->log (fmt: "comparison against UNKNOWN; removing all taint");
1121 sm_ctxt->clear_all_per_svalue_state ();
1122 return;
1123 }
1124
1125 /* Strip away casts before considering LHS and RHS, to increase the
1126 chance of detecting places where sanitization of a value may have
1127 happened. */
1128 if (const svalue *inner = lhs->maybe_undo_cast ())
1129 lhs = inner;
1130 if (const svalue *inner = rhs->maybe_undo_cast ())
1131 rhs = inner;
1132
1133 // TODO
1134 switch (op)
1135 {
1136 //case NE_EXPR:
1137 //case EQ_EXPR:
1138 case GE_EXPR:
1139 case GT_EXPR:
1140 {
1141 /* (LHS >= RHS) or (LHS > RHS)
1142 LHS gains a lower bound
1143 RHS gains an upper bound. */
1144 sm_ctxt->on_transition (node, stmt, var: lhs, from: m_tainted,
1145 to: m_has_lb);
1146 sm_ctxt->on_transition (node, stmt, var: lhs, from: m_has_ub,
1147 to: m_stop);
1148 sm_ctxt->on_transition (node, stmt, var: rhs, from: m_tainted,
1149 to: m_has_ub);
1150 sm_ctxt->on_transition (node, stmt, var: rhs, from: m_has_lb,
1151 to: m_stop);
1152 }
1153 break;
1154 case LE_EXPR:
1155 case LT_EXPR:
1156 {
1157 /* Detect where build_range_check has optimized
1158 (c>=low) && (c<=high)
1159 into
1160 (c-low>=0) && (c-low<=high-low)
1161 and thus into:
1162 (unsigned)(c - low) <= (unsigned)(high-low). */
1163 if (const binop_svalue *binop_sval
1164 = lhs->dyn_cast_binop_svalue ())
1165 {
1166 const svalue *inner_lhs = binop_sval->get_arg0 ();
1167 enum tree_code inner_op = binop_sval->get_op ();
1168 const svalue *inner_rhs = binop_sval->get_arg1 ();
1169 if (const svalue *before_cast = inner_lhs->maybe_undo_cast ())
1170 inner_lhs = before_cast;
1171 if (tree outer_rhs_cst = rhs->maybe_get_constant ())
1172 if (tree inner_rhs_cst = inner_rhs->maybe_get_constant ())
1173 if (inner_op == PLUS_EXPR
1174 && TREE_CODE (inner_rhs_cst) == INTEGER_CST
1175 && TREE_CODE (outer_rhs_cst) == INTEGER_CST
1176 && TYPE_UNSIGNED (TREE_TYPE (inner_rhs_cst))
1177 && TYPE_UNSIGNED (TREE_TYPE (outer_rhs_cst)))
1178 {
1179 /* We have
1180 (unsigned)(INNER_LHS + CST_A) </<= UNSIGNED_CST_B
1181 and thus an optimized test of INNER_LHS (before any
1182 cast to unsigned) against a range.
1183 Transition any of the tainted states to the stop state.
1184 We have to special-case this here rather than in
1185 region_model::on_condition since we can't apply
1186 both conditions simultaneously (we'd have a transition
1187 from the old state to has_lb, then a transition from
1188 the old state *again* to has_ub). */
1189 state_t old_state
1190 = sm_ctxt->get_state (stmt, inner_lhs);
1191 if (old_state == m_tainted
1192 || old_state == m_has_lb
1193 || old_state == m_has_ub)
1194 sm_ctxt->set_next_state (stmt, var: inner_lhs, to: m_stop);
1195 return;
1196 }
1197 }
1198
1199 /* (LHS <= RHS) or (LHS < RHS)
1200 LHS gains an upper bound
1201 RHS gains a lower bound. */
1202 sm_ctxt->on_transition (node, stmt, var: lhs, from: m_tainted,
1203 to: m_has_ub);
1204 sm_ctxt->on_transition (node, stmt, var: lhs, from: m_has_lb,
1205 to: m_stop);
1206 sm_ctxt->on_transition (node, stmt, var: rhs, from: m_tainted,
1207 to: m_has_lb);
1208 sm_ctxt->on_transition (node, stmt, var: rhs, from: m_has_ub,
1209 to: m_stop);
1210 }
1211 break;
1212 default:
1213 break;
1214 }
1215}
1216
1217/* Implementation of state_machine::on_bounded_ranges vfunc for
1218 taint_state_machine, for handling switch statement cases.
1219 Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1220 and states 'has_ub' and 'has_lb' to 'stop'. */
1221
1222void
1223taint_state_machine::on_bounded_ranges (sm_context *sm_ctxt,
1224 const supernode *,
1225 const gimple *stmt,
1226 const svalue &sval,
1227 const bounded_ranges &ranges) const
1228{
1229 gcc_assert (!ranges.empty_p ());
1230 gcc_assert (ranges.get_count () > 0);
1231
1232 /* We have one or more ranges; this could be a "default:", or one or
1233 more single or range cases.
1234
1235 Look at the overall endpoints to see if the ranges impose any lower
1236 bounds or upper bounds beyond those of the underlying numeric type. */
1237
1238 tree lowest_bound = ranges.get_range (idx: 0).m_lower;
1239 tree highest_bound = ranges.get_range (idx: ranges.get_count () - 1).m_upper;
1240 gcc_assert (lowest_bound);
1241 gcc_assert (highest_bound);
1242
1243 bool ranges_have_lb
1244 = (lowest_bound != TYPE_MIN_VALUE (TREE_TYPE (lowest_bound)));
1245 bool ranges_have_ub
1246 = (highest_bound != TYPE_MAX_VALUE (TREE_TYPE (highest_bound)));
1247
1248 if (!ranges_have_lb && !ranges_have_ub)
1249 return;
1250
1251 /* We have new bounds from the ranges; combine them with any
1252 existing bounds on SVAL. */
1253 state_t old_state = sm_ctxt->get_state (stmt, &sval);
1254 if (old_state == m_tainted)
1255 {
1256 if (ranges_have_lb && ranges_have_ub)
1257 sm_ctxt->set_next_state (stmt, var: &sval, to: m_stop);
1258 else if (ranges_have_lb)
1259 sm_ctxt->set_next_state (stmt, var: &sval, to: m_has_lb);
1260 else if (ranges_have_ub)
1261 sm_ctxt->set_next_state (stmt, var: &sval, to: m_has_ub);
1262 }
1263 else if (old_state == m_has_ub && ranges_have_lb)
1264 sm_ctxt->set_next_state (stmt, var: &sval, to: m_stop);
1265 else if (old_state == m_has_lb && ranges_have_ub)
1266 sm_ctxt->set_next_state (stmt, var: &sval, to: m_stop);
1267}
1268
1269bool
1270taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
1271{
1272 if (s == m_has_lb || s == m_has_ub)
1273 return false;
1274
1275 return true;
1276}
1277
1278/* If STATE is a tainted state, write the bounds to *OUT and return true.
1279 Otherwise return false.
1280 Use the signedness of TYPE to determine if "has_ub" is tainted. */
1281
1282bool
1283taint_state_machine::get_taint (state_t state, tree type,
1284 enum bounds *out) const
1285{
1286 /* Unsigned types have an implicit lower bound. */
1287 bool is_unsigned = false;
1288 if (type)
1289 if (INTEGRAL_TYPE_P (type))
1290 is_unsigned = TYPE_UNSIGNED (type);
1291
1292 /* Can't use a switch as the states are non-const. */
1293 if (state == m_tainted)
1294 {
1295 *out = is_unsigned ? BOUNDS_LOWER : BOUNDS_NONE;
1296 return true;
1297 }
1298 else if (state == m_has_lb)
1299 {
1300 *out = BOUNDS_LOWER;
1301 return true;
1302 }
1303 else if (state == m_has_ub && !is_unsigned)
1304 {
1305 /* Missing lower bound. */
1306 *out = BOUNDS_UPPER;
1307 return true;
1308 }
1309 return false;
1310}
1311
1312/* Find the most tainted state of S0 and S1. */
1313
1314state_machine::state_t
1315taint_state_machine::combine_states (state_t s0, state_t s1) const
1316{
1317 gcc_assert (s0);
1318 gcc_assert (s1);
1319 if (s0 == s1)
1320 return s0;
1321 if (s0 == m_tainted || s1 == m_tainted)
1322 return m_tainted;
1323 if (s0 == m_start)
1324 return s1;
1325 if (s1 == m_start)
1326 return s0;
1327 if (s0 == m_stop)
1328 return s1;
1329 if (s1 == m_stop)
1330 return s0;
1331 /* The only remaining combinations are one of has_ub and has_lb
1332 (in either order). */
1333 gcc_assert ((s0 == m_has_lb && s1 == m_has_ub)
1334 || (s0 == m_has_ub && s1 == m_has_lb));
1335 return m_tainted;
1336}
1337
1338/* Check for calls to external functions marked with
1339 __attribute__((access)) with a size-index: complain about
1340 tainted values passed as a size to such a function. */
1341
1342void
1343taint_state_machine::check_for_tainted_size_arg (sm_context *sm_ctxt,
1344 const supernode *node,
1345 const gcall *call,
1346 tree callee_fndecl) const
1347{
1348 tree fntype = TREE_TYPE (callee_fndecl);
1349 if (!fntype)
1350 return;
1351
1352 if (!TYPE_ATTRIBUTES (fntype))
1353 return;
1354
1355 /* Initialize a map of attribute access specifications for arguments
1356 to the function call. */
1357 rdwr_map rdwr_idx;
1358 init_attr_rdwr_indices (&rdwr_idx, TYPE_ATTRIBUTES (fntype));
1359
1360 unsigned argno = 0;
1361
1362 for (tree iter = TYPE_ARG_TYPES (fntype); iter;
1363 iter = TREE_CHAIN (iter), ++argno)
1364 {
1365 const attr_access* access = rdwr_idx.get (k: argno);
1366 if (!access)
1367 continue;
1368
1369 /* Ignore any duplicate entry in the map for the size argument. */
1370 if (access->ptrarg != argno)
1371 continue;
1372
1373 if (access->sizarg == UINT_MAX)
1374 continue;
1375
1376 tree size_arg = gimple_call_arg (gs: call, index: access->sizarg);
1377
1378 state_t state = sm_ctxt->get_state (stmt: call, var: size_arg);
1379 enum bounds b;
1380 if (get_taint (state, TREE_TYPE (size_arg), out: &b))
1381 {
1382 const char* const access_str =
1383 TREE_STRING_POINTER (access->to_external_string ());
1384 tree diag_size = sm_ctxt->get_diagnostic_tree (expr: size_arg);
1385 sm_ctxt->warn (node, stmt: call, var: size_arg,
1386 d: make_unique<tainted_access_attrib_size>
1387 (args: *this, args&: diag_size, args&: b,
1388 args&: callee_fndecl,
1389 args: access->sizarg,
1390 args: access_str));
1391 }
1392 }
1393}
1394
1395/* Complain if ASSIGN (a division operation) has a tainted divisor
1396 that could be zero. */
1397
1398void
1399taint_state_machine::check_for_tainted_divisor (sm_context *sm_ctxt,
1400 const supernode *node,
1401 const gassign *assign) const
1402{
1403 const region_model *old_model = sm_ctxt->get_old_region_model ();
1404 if (!old_model)
1405 return;
1406
1407 tree divisor_expr = gimple_assign_rhs2 (gs: assign);;
1408
1409 /* Until we track conditions on floating point values, we can't check to
1410 see if they've been checked against zero. */
1411 if (!INTEGRAL_TYPE_P (TREE_TYPE (divisor_expr)))
1412 return;
1413
1414 const svalue *divisor_sval = old_model->get_rvalue (expr: divisor_expr, NULL);
1415
1416 state_t state = sm_ctxt->get_state (stmt: assign, divisor_sval);
1417 enum bounds b;
1418 if (get_taint (state, TREE_TYPE (divisor_expr), out: &b))
1419 {
1420 const svalue *zero_sval
1421 = old_model->get_manager ()->get_or_create_int_cst
1422 (TREE_TYPE (divisor_expr), cst: 0);
1423 tristate ts
1424 = old_model->eval_condition (lhs: divisor_sval, op: NE_EXPR, rhs: zero_sval);
1425 if (ts.is_true ())
1426 /* The divisor is known to not equal 0: don't warn. */
1427 return;
1428
1429 tree diag_divisor = sm_ctxt->get_diagnostic_tree (expr: divisor_expr);
1430 sm_ctxt->warn (node, stmt: assign, var: divisor_expr,
1431 d: make_unique <tainted_divisor> (args: *this, args&: diag_divisor, args&: b));
1432 sm_ctxt->set_next_state (stmt: assign, var: divisor_sval, to: m_stop);
1433 }
1434}
1435
1436} // anonymous namespace
1437
1438/* Internal interface to this file. */
1439
1440state_machine *
1441make_taint_state_machine (logger *logger)
1442{
1443 return new taint_state_machine (logger);
1444}
1445
1446/* A closed concrete range. */
1447
1448class concrete_range
1449{
1450public:
1451 /* Return true iff THIS is fully within OTHER
1452 i.e.
1453 - m_min must be >= OTHER.m_min
1454 - m_max must be <= OTHER.m_max. */
1455 bool within_p (const concrete_range &other) const
1456 {
1457 if (compare_constants (lhs_const: m_min, op: GE_EXPR, rhs_const: other.m_min).is_true ())
1458 if (compare_constants (lhs_const: m_max, op: LE_EXPR, rhs_const: other.m_max).is_true ())
1459 return true;
1460 return false;
1461 }
1462
1463 tree m_min;
1464 tree m_max;
1465};
1466
1467/* Attempt to get a closed concrete range for SVAL based on types.
1468 If found, write to *OUT and return true.
1469 Otherwise return false. */
1470
1471static bool
1472get_possible_range (const svalue *sval, concrete_range *out)
1473{
1474 if (const svalue *inner = sval->maybe_undo_cast ())
1475 {
1476 concrete_range inner_range;
1477 if (!get_possible_range (sval: inner, out: &inner_range))
1478 return false;
1479
1480 if (sval->get_type ()
1481 && inner->get_type ()
1482 && INTEGRAL_TYPE_P (sval->get_type ())
1483 && INTEGRAL_TYPE_P (inner->get_type ())
1484 && TYPE_UNSIGNED (inner->get_type ())
1485 && (TYPE_PRECISION (sval->get_type ())
1486 > TYPE_PRECISION (inner->get_type ())))
1487 {
1488 /* We have a cast from an unsigned type to a wider integral type.
1489 Assuming this is zero-extension, we can inherit the range from
1490 the inner type. */
1491 enum tree_code op = ((const unaryop_svalue *)sval)->get_op ();
1492 out->m_min = fold_unary (op, sval->get_type (), inner_range.m_min);
1493 out->m_max = fold_unary (op, sval->get_type (), inner_range.m_max);
1494 return true;
1495 }
1496 }
1497
1498 if (sval->get_type ()
1499 && INTEGRAL_TYPE_P (sval->get_type ()))
1500 {
1501 out->m_min = TYPE_MIN_VALUE (sval->get_type ());
1502 out->m_max = TYPE_MAX_VALUE (sval->get_type ());
1503 return true;
1504 }
1505
1506 return false;
1507}
1508
1509/* Determine if it's possible for tainted array access ELEMENT_REG to
1510 actually be a problem.
1511
1512 Check here for index being from e.g. unsigned char when the array
1513 contains >= 255 elements.
1514
1515 Return true if out-of-bounds is possible, false if it's impossible
1516 (for suppressing false positives). */
1517
1518static bool
1519index_can_be_out_of_bounds_p (const element_region *element_reg)
1520{
1521 const svalue *index = element_reg->get_index ();
1522 const region *array_reg = element_reg->get_parent_region ();
1523
1524 if (array_reg->get_type ()
1525 && TREE_CODE (array_reg->get_type ()) == ARRAY_TYPE
1526 && TYPE_DOMAIN (array_reg->get_type ())
1527 && INTEGRAL_TYPE_P (TYPE_DOMAIN (array_reg->get_type ())))
1528 {
1529 concrete_range valid_index_range;
1530 valid_index_range.m_min
1531 = TYPE_MIN_VALUE (TYPE_DOMAIN (array_reg->get_type ()));
1532 valid_index_range.m_max
1533 = TYPE_MAX_VALUE (TYPE_DOMAIN (array_reg->get_type ()));
1534
1535 concrete_range possible_index_range;
1536 if (get_possible_range (sval: index, out: &possible_index_range))
1537 if (possible_index_range.within_p (other: valid_index_range))
1538 return false;
1539 }
1540
1541 return true;
1542}
1543
1544/* Complain to CTXT if accessing REG leads could lead to arbitrary
1545 memory access under an attacker's control (due to taint). */
1546
1547void
1548region_model::check_region_for_taint (const region *reg,
1549 enum access_direction,
1550 region_model_context *ctxt) const
1551{
1552 gcc_assert (reg);
1553 gcc_assert (ctxt);
1554
1555 LOG_SCOPE (ctxt->get_logger ());
1556
1557 sm_state_map *smap;
1558 const state_machine *sm;
1559 unsigned sm_idx;
1560 if (!ctxt->get_taint_map (out_smap: &smap, out_sm: &sm, out_sm_idx: &sm_idx))
1561 return;
1562
1563 gcc_assert (smap);
1564 gcc_assert (sm);
1565
1566 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1567
1568 const extrinsic_state *ext_state = ctxt->get_ext_state ();
1569 if (!ext_state)
1570 return;
1571
1572 const region *iter_region = reg;
1573 while (iter_region)
1574 {
1575 switch (iter_region->get_kind ())
1576 {
1577 default:
1578 break;
1579
1580 case RK_ELEMENT:
1581 {
1582 const element_region *element_reg
1583 = (const element_region *)iter_region;
1584 const svalue *index = element_reg->get_index ();
1585 const state_machine::state_t
1586 state = smap->get_state (sval: index, ext_state: *ext_state);
1587 gcc_assert (state);
1588 enum bounds b;
1589 if (taint_sm.get_taint (state, type: index->get_type (), out: &b))
1590 {
1591 if (index_can_be_out_of_bounds_p (element_reg))
1592 {
1593 tree arg = get_representative_tree (sval: index);
1594 ctxt->warn (d: make_unique<tainted_array_index> (args: taint_sm,
1595 args&: arg, args&: b));
1596 }
1597 else if (ctxt->get_logger ())
1598 ctxt->get_logger ()->log (fmt: "rejecting tainted_array_index as"
1599 " out of bounds is not possible");
1600 }
1601 }
1602 break;
1603
1604 case RK_OFFSET:
1605 {
1606 const offset_region *offset_reg
1607 = (const offset_region *)iter_region;
1608 const svalue *offset = offset_reg->get_byte_offset ();
1609 const state_machine::state_t
1610 state = smap->get_state (sval: offset, ext_state: *ext_state);
1611 gcc_assert (state);
1612 /* Handle implicit cast to sizetype. */
1613 tree effective_type = offset->get_type ();
1614 if (const svalue *cast = offset->maybe_undo_cast ())
1615 if (cast->get_type ())
1616 effective_type = cast->get_type ();
1617 enum bounds b;
1618 if (taint_sm.get_taint (state, type: effective_type, out: &b))
1619 {
1620 tree arg = get_representative_tree (sval: offset);
1621 ctxt->warn (d: make_unique<tainted_offset> (args: taint_sm, args&: arg, args&: b,
1622 args&: offset));
1623 }
1624 }
1625 break;
1626
1627 case RK_CAST:
1628 {
1629 const cast_region *cast_reg
1630 = as_a <const cast_region *> (p: iter_region);
1631 iter_region = cast_reg->get_original_region ();
1632 continue;
1633 }
1634
1635 case RK_SIZED:
1636 {
1637 const sized_region *sized_reg
1638 = (const sized_region *)iter_region;
1639 const svalue *size_sval = sized_reg->get_byte_size_sval (m_mgr);
1640 const state_machine::state_t
1641 state = smap->get_state (sval: size_sval, ext_state: *ext_state);
1642 gcc_assert (state);
1643 enum bounds b;
1644 if (taint_sm.get_taint (state, type: size_sval->get_type (), out: &b))
1645 {
1646 tree arg = get_representative_tree (sval: size_sval);
1647 ctxt->warn (d: make_unique<tainted_size> (args: taint_sm, args&: arg, args&: b));
1648 }
1649 }
1650 break;
1651 }
1652
1653 iter_region = iter_region->get_parent_region ();
1654 }
1655}
1656
1657/* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
1658 under an attacker's control (due to taint), where the allocation
1659 is happening within MEM_SPACE. */
1660
1661void
1662region_model::check_dynamic_size_for_taint (enum memory_space mem_space,
1663 const svalue *size_in_bytes,
1664 region_model_context *ctxt) const
1665{
1666 gcc_assert (size_in_bytes);
1667 gcc_assert (ctxt);
1668
1669 LOG_SCOPE (ctxt->get_logger ());
1670
1671 sm_state_map *smap;
1672 const state_machine *sm;
1673 unsigned sm_idx;
1674 if (!ctxt->get_taint_map (out_smap: &smap, out_sm: &sm, out_sm_idx: &sm_idx))
1675 return;
1676
1677 gcc_assert (smap);
1678 gcc_assert (sm);
1679
1680 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1681
1682 const extrinsic_state *ext_state = ctxt->get_ext_state ();
1683 if (!ext_state)
1684 return;
1685
1686 const state_machine::state_t
1687 state = smap->get_state (sval: size_in_bytes, ext_state: *ext_state);
1688 gcc_assert (state);
1689 enum bounds b;
1690 if (taint_sm.get_taint (state, type: size_in_bytes->get_type (), out: &b))
1691 {
1692 tree arg = get_representative_tree (sval: size_in_bytes);
1693 ctxt->warn (d: make_unique<tainted_allocation_size>
1694 (args: taint_sm, args&: arg, args&: size_in_bytes, args&: b, args&: mem_space));
1695 }
1696}
1697
1698/* Mark SVAL as TAINTED. CTXT must be non-NULL. */
1699
1700void
1701region_model::mark_as_tainted (const svalue *sval,
1702 region_model_context *ctxt)
1703{
1704 gcc_assert (sval);
1705 gcc_assert (ctxt);
1706
1707 sm_state_map *smap;
1708 const state_machine *sm;
1709 unsigned sm_idx;
1710 if (!ctxt->get_taint_map (out_smap: &smap, out_sm: &sm, out_sm_idx: &sm_idx))
1711 return;
1712
1713 gcc_assert (smap);
1714 gcc_assert (sm);
1715
1716 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1717
1718 const extrinsic_state *ext_state = ctxt->get_ext_state ();
1719 if (!ext_state)
1720 return;
1721
1722 smap->set_state (model: this, sval, state: taint_sm.m_tainted, NULL, ext_state: *ext_state);
1723}
1724
1725/* Return true if SVAL could possibly be attacker-controlled. */
1726
1727bool
1728region_model_context::possibly_tainted_p (const svalue *sval)
1729{
1730 sm_state_map *smap;
1731 const state_machine *sm;
1732 unsigned sm_idx;
1733 if (!get_taint_map (out_smap: &smap, out_sm: &sm, out_sm_idx: &sm_idx))
1734 return false;
1735
1736 const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1737
1738 const extrinsic_state *ext_state = get_ext_state ();
1739 if (!ext_state)
1740 return false;
1741
1742 const state_machine::state_t state = smap->get_state (sval, ext_state: *ext_state);
1743 gcc_assert (state);
1744
1745 return (state == taint_sm.m_tainted
1746 || state == taint_sm.m_has_lb
1747 || state == taint_sm.m_has_ub);
1748}
1749
1750} // namespace ana
1751
1752#endif /* #if ENABLE_ANALYZER */
1753

source code of gcc/analyzer/sm-taint.cc