1 | /* A state machine for tracking "taint": unsanitized uses |
2 | of data potentially under an attacker's control. |
3 | |
4 | Copyright (C) 2019-2024 Free Software Foundation, Inc. |
5 | Contributed by David Malcolm <dmalcolm@redhat.com>. |
6 | |
7 | This file is part of GCC. |
8 | |
9 | GCC is free software; you can redistribute it and/or modify it |
10 | under the terms of the GNU General Public License as published by |
11 | the Free Software Foundation; either version 3, or (at your option) |
12 | any later version. |
13 | |
14 | GCC is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with GCC; see the file COPYING3. If not see |
21 | <http://www.gnu.org/licenses/>. */ |
22 | |
23 | #include "config.h" |
24 | #define INCLUDE_MEMORY |
25 | #include "system.h" |
26 | #include "coretypes.h" |
27 | #include "make-unique.h" |
28 | #include "tree.h" |
29 | #include "function.h" |
30 | #include "basic-block.h" |
31 | #include "gimple.h" |
32 | #include "options.h" |
33 | #include "diagnostic-path.h" |
34 | #include "analyzer/analyzer.h" |
35 | #include "analyzer/analyzer-logging.h" |
36 | #include "gimple-iterator.h" |
37 | #include "ordered-hash-map.h" |
38 | #include "cgraph.h" |
39 | #include "cfg.h" |
40 | #include "digraph.h" |
41 | #include "stringpool.h" |
42 | #include "attribs.h" |
43 | #include "fold-const.h" |
44 | #include "analyzer/supergraph.h" |
45 | #include "analyzer/call-string.h" |
46 | #include "analyzer/program-point.h" |
47 | #include "analyzer/store.h" |
48 | #include "analyzer/region-model.h" |
49 | #include "analyzer/sm.h" |
50 | #include "analyzer/program-state.h" |
51 | #include "analyzer/pending-diagnostic.h" |
52 | #include "analyzer/constraint-manager.h" |
53 | #include "diagnostic-format-sarif.h" |
54 | |
55 | #if ENABLE_ANALYZER |
56 | |
57 | namespace ana { |
58 | |
59 | namespace { |
60 | |
61 | /* An enum for describing tainted values. */ |
62 | |
63 | enum bounds |
64 | { |
65 | /* This tainted value has no upper or lower bound. */ |
66 | BOUNDS_NONE, |
67 | |
68 | /* This tainted value has an upper bound but not lower bound. */ |
69 | BOUNDS_UPPER, |
70 | |
71 | /* This tainted value has a lower bound but no upper bound. */ |
72 | BOUNDS_LOWER |
73 | }; |
74 | |
75 | static const char * |
76 | bounds_to_str (enum bounds b) |
77 | { |
78 | switch (b) |
79 | { |
80 | default: |
81 | gcc_unreachable (); |
82 | case BOUNDS_NONE: |
83 | return "BOUNDS_NONE" ; |
84 | case BOUNDS_UPPER: |
85 | return "BOUNDS_UPPER" ; |
86 | case BOUNDS_LOWER: |
87 | return "BOUNDS_LOWER" ; |
88 | } |
89 | } |
90 | |
91 | /* An experimental state machine, for tracking "taint": unsanitized uses |
92 | of data potentially under an attacker's control. */ |
93 | |
94 | class taint_state_machine : public state_machine |
95 | { |
96 | public: |
97 | taint_state_machine (logger *logger); |
98 | |
99 | bool inherited_state_p () const final override { return true; } |
100 | |
101 | state_t alt_get_inherited_state (const sm_state_map &map, |
102 | const svalue *sval, |
103 | const extrinsic_state &ext_state) |
104 | const final override; |
105 | |
106 | bool |
107 | has_alt_get_inherited_state_p () const final override |
108 | { |
109 | return true; |
110 | } |
111 | |
112 | bool on_stmt (sm_context *sm_ctxt, |
113 | const supernode *node, |
114 | const gimple *stmt) const final override; |
115 | |
116 | void on_condition (sm_context *sm_ctxt, |
117 | const supernode *node, |
118 | const gimple *stmt, |
119 | const svalue *lhs, |
120 | enum tree_code op, |
121 | const svalue *rhs) const final override; |
122 | void on_bounded_ranges (sm_context *sm_ctxt, |
123 | const supernode *node, |
124 | const gimple *stmt, |
125 | const svalue &sval, |
126 | const bounded_ranges &ranges) const final override; |
127 | |
128 | bool can_purge_p (state_t s) const final override; |
129 | |
130 | bool get_taint (state_t s, tree type, enum bounds *out) const; |
131 | |
132 | state_t combine_states (state_t s0, state_t s1) const; |
133 | |
134 | private: |
135 | void check_control_flow_arg_for_taint (sm_context *sm_ctxt, |
136 | const gimple *stmt, |
137 | tree expr) const; |
138 | |
139 | void check_for_tainted_size_arg (sm_context *sm_ctxt, |
140 | const supernode *node, |
141 | const gcall *call, |
142 | tree callee_fndecl) const; |
143 | void check_for_tainted_divisor (sm_context *sm_ctxt, |
144 | const supernode *node, |
145 | const gassign *assign) const; |
146 | |
147 | public: |
148 | /* State for a "tainted" value: unsanitized data potentially under an |
149 | attacker's control. */ |
150 | state_t m_tainted; |
151 | |
152 | /* State for a "tainted" value that has a lower bound. */ |
153 | state_t m_has_lb; |
154 | |
155 | /* State for a "tainted" value that has an upper bound. */ |
156 | state_t m_has_ub; |
157 | |
158 | /* Stop state, for a value we don't want to track any more. */ |
159 | state_t m_stop; |
160 | |
161 | /* Global state, for when the last condition had tainted arguments. */ |
162 | state_t m_tainted_control_flow; |
163 | }; |
164 | |
165 | /* Class for diagnostics relating to taint_state_machine. */ |
166 | |
167 | class taint_diagnostic : public pending_diagnostic |
168 | { |
169 | public: |
170 | taint_diagnostic (const taint_state_machine &sm, tree arg, |
171 | enum bounds has_bounds) |
172 | : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds) |
173 | {} |
174 | |
175 | bool subclass_equal_p (const pending_diagnostic &base_other) const override |
176 | { |
177 | const taint_diagnostic &other = (const taint_diagnostic &)base_other; |
178 | return (same_tree_p (t1: m_arg, t2: other.m_arg) |
179 | && m_has_bounds == other.m_has_bounds); |
180 | } |
181 | |
182 | label_text describe_state_change (const evdesc::state_change &change) override |
183 | { |
184 | if (change.m_new_state == m_sm.m_tainted) |
185 | { |
186 | if (change.m_origin) |
187 | return change.formatted_print (fmt: "%qE has an unchecked value here" |
188 | " (from %qE)" , |
189 | change.m_expr, change.m_origin); |
190 | else |
191 | return change.formatted_print (fmt: "%qE gets an unchecked value here" , |
192 | change.m_expr); |
193 | } |
194 | else if (change.m_new_state == m_sm.m_has_lb) |
195 | return change.formatted_print (fmt: "%qE has its lower bound checked here" , |
196 | change.m_expr); |
197 | else if (change.m_new_state == m_sm.m_has_ub) |
198 | return change.formatted_print (fmt: "%qE has its upper bound checked here" , |
199 | change.m_expr); |
200 | return label_text (); |
201 | } |
202 | |
203 | diagnostic_event::meaning |
204 | get_meaning_for_state_change (const evdesc::state_change &change) |
205 | const final override |
206 | { |
207 | if (change.m_new_state == m_sm.m_tainted) |
208 | return diagnostic_event::meaning (diagnostic_event::VERB_acquire, |
209 | diagnostic_event::NOUN_taint); |
210 | return diagnostic_event::meaning (); |
211 | } |
212 | |
213 | void maybe_add_sarif_properties (sarif_object &result_obj) |
214 | const override |
215 | { |
216 | sarif_property_bag &props = result_obj.get_or_create_properties (); |
217 | #define PROPERTY_PREFIX "gcc/analyzer/taint_diagnostic/" |
218 | props.set (PROPERTY_PREFIX "arg" , v: tree_to_json (node: m_arg)); |
219 | props.set_string (PROPERTY_PREFIX "has_bounds" , |
220 | utf8_value: bounds_to_str (b: m_has_bounds)); |
221 | #undef PROPERTY_PREFIX |
222 | } |
223 | |
224 | protected: |
225 | const taint_state_machine &m_sm; |
226 | tree m_arg; |
227 | enum bounds m_has_bounds; |
228 | }; |
229 | |
230 | /* Concrete taint_diagnostic subclass for reporting attacker-controlled |
231 | array index. */ |
232 | |
233 | class tainted_array_index : public taint_diagnostic |
234 | { |
235 | public: |
236 | tainted_array_index (const taint_state_machine &sm, tree arg, |
237 | enum bounds has_bounds) |
238 | : taint_diagnostic (sm, arg, has_bounds) |
239 | {} |
240 | |
241 | const char *get_kind () const final override { return "tainted_array_index" ; } |
242 | |
243 | int get_controlling_option () const final override |
244 | { |
245 | return OPT_Wanalyzer_tainted_array_index; |
246 | } |
247 | |
248 | bool emit (diagnostic_emission_context &ctxt) final override |
249 | { |
250 | /* CWE-129: "Improper Validation of Array Index". */ |
251 | ctxt.add_cwe (cwe: 129); |
252 | if (m_arg) |
253 | switch (m_has_bounds) |
254 | { |
255 | default: |
256 | gcc_unreachable (); |
257 | case BOUNDS_NONE: |
258 | return ctxt.warn ("use of attacker-controlled value %qE" |
259 | " in array lookup without bounds checking" , |
260 | m_arg); |
261 | break; |
262 | case BOUNDS_UPPER: |
263 | return ctxt.warn ("use of attacker-controlled value %qE" |
264 | " in array lookup without checking for negative" , |
265 | m_arg); |
266 | break; |
267 | case BOUNDS_LOWER: |
268 | return ctxt.warn ("use of attacker-controlled value %qE" |
269 | " in array lookup without upper-bounds checking" , |
270 | m_arg); |
271 | break; |
272 | } |
273 | else |
274 | switch (m_has_bounds) |
275 | { |
276 | default: |
277 | gcc_unreachable (); |
278 | case BOUNDS_NONE: |
279 | return ctxt.warn ("use of attacker-controlled value" |
280 | " in array lookup without bounds checking" ); |
281 | break; |
282 | case BOUNDS_UPPER: |
283 | return ctxt.warn ("use of attacker-controlled value" |
284 | " in array lookup without checking for" |
285 | " negative" ); |
286 | break; |
287 | case BOUNDS_LOWER: |
288 | return ctxt.warn ("use of attacker-controlled value" |
289 | " in array lookup without upper-bounds" |
290 | " checking" ); |
291 | break; |
292 | } |
293 | } |
294 | |
295 | label_text describe_final_event (const evdesc::final_event &ev) final override |
296 | { |
297 | if (m_arg) |
298 | switch (m_has_bounds) |
299 | { |
300 | default: |
301 | gcc_unreachable (); |
302 | case BOUNDS_NONE: |
303 | return ev.formatted_print |
304 | (fmt: "use of attacker-controlled value %qE in array lookup" |
305 | " without bounds checking" , |
306 | m_arg); |
307 | case BOUNDS_UPPER: |
308 | return ev.formatted_print |
309 | (fmt: "use of attacker-controlled value %qE" |
310 | " in array lookup without checking for negative" , |
311 | m_arg); |
312 | case BOUNDS_LOWER: |
313 | return ev.formatted_print |
314 | (fmt: "use of attacker-controlled value %qE" |
315 | " in array lookup without upper-bounds checking" , |
316 | m_arg); |
317 | } |
318 | else |
319 | switch (m_has_bounds) |
320 | { |
321 | default: |
322 | gcc_unreachable (); |
323 | case BOUNDS_NONE: |
324 | return ev.formatted_print |
325 | (fmt: "use of attacker-controlled value in array lookup" |
326 | " without bounds checking" ); |
327 | case BOUNDS_UPPER: |
328 | return ev.formatted_print |
329 | (fmt: "use of attacker-controlled value" |
330 | " in array lookup without checking for negative" ); |
331 | case BOUNDS_LOWER: |
332 | return ev.formatted_print |
333 | (fmt: "use of attacker-controlled value" |
334 | " in array lookup without upper-bounds checking" ); |
335 | } |
336 | } |
337 | }; |
338 | |
339 | /* Concrete taint_diagnostic subclass for reporting attacker-controlled |
340 | pointer offset. */ |
341 | |
342 | class tainted_offset : public taint_diagnostic |
343 | { |
344 | public: |
345 | tainted_offset (const taint_state_machine &sm, tree arg, |
346 | enum bounds has_bounds, |
347 | const svalue *offset) |
348 | : taint_diagnostic (sm, arg, has_bounds), |
349 | m_offset (offset) |
350 | {} |
351 | |
352 | const char *get_kind () const final override { return "tainted_offset" ; } |
353 | |
354 | int get_controlling_option () const final override |
355 | { |
356 | return OPT_Wanalyzer_tainted_offset; |
357 | } |
358 | |
359 | bool emit (diagnostic_emission_context &ctxt) final override |
360 | { |
361 | /* CWE-823: "Use of Out-of-range Pointer Offset". */ |
362 | ctxt.add_cwe (cwe: 823); |
363 | if (m_arg) |
364 | switch (m_has_bounds) |
365 | { |
366 | default: |
367 | gcc_unreachable (); |
368 | case BOUNDS_NONE: |
369 | return ctxt.warn ("use of attacker-controlled value %qE as offset" |
370 | " without bounds checking" , |
371 | m_arg); |
372 | break; |
373 | case BOUNDS_UPPER: |
374 | return ctxt.warn ("use of attacker-controlled value %qE as offset" |
375 | " without lower-bounds checking" , |
376 | m_arg); |
377 | break; |
378 | case BOUNDS_LOWER: |
379 | return ctxt.warn ("use of attacker-controlled value %qE as offset" |
380 | " without upper-bounds checking" , |
381 | m_arg); |
382 | break; |
383 | } |
384 | else |
385 | switch (m_has_bounds) |
386 | { |
387 | default: |
388 | gcc_unreachable (); |
389 | case BOUNDS_NONE: |
390 | return ctxt.warn ("use of attacker-controlled value as offset" |
391 | " without bounds checking" ); |
392 | break; |
393 | case BOUNDS_UPPER: |
394 | return ctxt.warn ("use of attacker-controlled value as offset" |
395 | " without lower-bounds checking" ); |
396 | break; |
397 | case BOUNDS_LOWER: |
398 | return ctxt.warn ("use of attacker-controlled value as offset" |
399 | " without upper-bounds checking" ); |
400 | break; |
401 | } |
402 | } |
403 | |
404 | label_text describe_final_event (const evdesc::final_event &ev) final override |
405 | { |
406 | if (m_arg) |
407 | switch (m_has_bounds) |
408 | { |
409 | default: |
410 | gcc_unreachable (); |
411 | case BOUNDS_NONE: |
412 | return ev.formatted_print (fmt: "use of attacker-controlled value %qE" |
413 | " as offset without bounds checking" , |
414 | m_arg); |
415 | case BOUNDS_UPPER: |
416 | return ev.formatted_print (fmt: "use of attacker-controlled value %qE" |
417 | " as offset without lower-bounds checking" , |
418 | m_arg); |
419 | case BOUNDS_LOWER: |
420 | return ev.formatted_print (fmt: "use of attacker-controlled value %qE" |
421 | " as offset without upper-bounds checking" , |
422 | m_arg); |
423 | } |
424 | else |
425 | switch (m_has_bounds) |
426 | { |
427 | default: |
428 | gcc_unreachable (); |
429 | case BOUNDS_NONE: |
430 | return ev.formatted_print (fmt: "use of attacker-controlled value" |
431 | " as offset without bounds checking" ); |
432 | case BOUNDS_UPPER: |
433 | return ev.formatted_print (fmt: "use of attacker-controlled value" |
434 | " as offset without lower-bounds" |
435 | " checking" ); |
436 | case BOUNDS_LOWER: |
437 | return ev.formatted_print (fmt: "use of attacker-controlled value" |
438 | " as offset without upper-bounds" |
439 | " checking" ); |
440 | } |
441 | } |
442 | |
443 | void maybe_add_sarif_properties (sarif_object &result_obj) |
444 | const final override |
445 | { |
446 | taint_diagnostic::maybe_add_sarif_properties (result_obj); |
447 | sarif_property_bag &props = result_obj.get_or_create_properties (); |
448 | #define PROPERTY_PREFIX "gcc/analyzer/tainted_offset/" |
449 | props.set (PROPERTY_PREFIX "offset" , v: m_offset->to_json ()); |
450 | #undef PROPERTY_PREFIX |
451 | } |
452 | |
453 | private: |
454 | const svalue *m_offset; |
455 | }; |
456 | |
457 | /* Concrete taint_diagnostic subclass for reporting attacker-controlled |
458 | size. */ |
459 | |
460 | class tainted_size : public taint_diagnostic |
461 | { |
462 | public: |
463 | tainted_size (const taint_state_machine &sm, tree arg, |
464 | enum bounds has_bounds) |
465 | : taint_diagnostic (sm, arg, has_bounds) |
466 | {} |
467 | |
468 | const char *get_kind () const override { return "tainted_size" ; } |
469 | |
470 | int get_controlling_option () const final override |
471 | { |
472 | return OPT_Wanalyzer_tainted_size; |
473 | } |
474 | |
475 | bool emit (diagnostic_emission_context &ctxt) override |
476 | { |
477 | /* "CWE-129: Improper Validation of Array Index". */ |
478 | ctxt.add_cwe (cwe: 129); |
479 | if (m_arg) |
480 | switch (m_has_bounds) |
481 | { |
482 | default: |
483 | gcc_unreachable (); |
484 | case BOUNDS_NONE: |
485 | return ctxt.warn ("use of attacker-controlled value %qE as size" |
486 | " without bounds checking" , |
487 | m_arg); |
488 | break; |
489 | case BOUNDS_UPPER: |
490 | return ctxt.warn ("use of attacker-controlled value %qE as size" |
491 | " without lower-bounds checking" , |
492 | m_arg); |
493 | break; |
494 | case BOUNDS_LOWER: |
495 | return ctxt.warn ("use of attacker-controlled value %qE as size" |
496 | " without upper-bounds checking" , |
497 | m_arg); |
498 | break; |
499 | } |
500 | else |
501 | switch (m_has_bounds) |
502 | { |
503 | default: |
504 | gcc_unreachable (); |
505 | case BOUNDS_NONE: |
506 | return ctxt.warn ("use of attacker-controlled value as size" |
507 | " without bounds checking" ); |
508 | break; |
509 | case BOUNDS_UPPER: |
510 | return ctxt.warn ("use of attacker-controlled value as size" |
511 | " without lower-bounds checking" ); |
512 | break; |
513 | case BOUNDS_LOWER: |
514 | return ctxt.warn ("use of attacker-controlled value as size" |
515 | " without upper-bounds checking" ); |
516 | break; |
517 | } |
518 | } |
519 | |
520 | label_text describe_final_event (const evdesc::final_event &ev) final override |
521 | { |
522 | if (m_arg) |
523 | switch (m_has_bounds) |
524 | { |
525 | default: |
526 | gcc_unreachable (); |
527 | case BOUNDS_NONE: |
528 | return ev.formatted_print (fmt: "use of attacker-controlled value %qE" |
529 | " as size without bounds checking" , |
530 | m_arg); |
531 | case BOUNDS_UPPER: |
532 | return ev.formatted_print (fmt: "use of attacker-controlled value %qE" |
533 | " as size without lower-bounds checking" , |
534 | m_arg); |
535 | case BOUNDS_LOWER: |
536 | return ev.formatted_print (fmt: "use of attacker-controlled value %qE" |
537 | " as size without upper-bounds checking" , |
538 | m_arg); |
539 | } |
540 | else |
541 | switch (m_has_bounds) |
542 | { |
543 | default: |
544 | gcc_unreachable (); |
545 | case BOUNDS_NONE: |
546 | return ev.formatted_print (fmt: "use of attacker-controlled value" |
547 | " as size without bounds checking" ); |
548 | case BOUNDS_UPPER: |
549 | return ev.formatted_print (fmt: "use of attacker-controlled value" |
550 | " as size without lower-bounds checking" ); |
551 | case BOUNDS_LOWER: |
552 | return ev.formatted_print (fmt: "use of attacker-controlled value" |
553 | " as size without upper-bounds checking" ); |
554 | } |
555 | } |
556 | }; |
557 | |
558 | /* Subclass of tainted_size for reporting on tainted size values |
559 | passed to an external function annotated with attribute "access". */ |
560 | |
561 | class tainted_access_attrib_size : public tainted_size |
562 | { |
563 | public: |
564 | tainted_access_attrib_size (const taint_state_machine &sm, tree arg, |
565 | enum bounds has_bounds, tree callee_fndecl, |
566 | unsigned size_argno, const char *access_str) |
567 | : tainted_size (sm, arg, has_bounds), |
568 | m_callee_fndecl (callee_fndecl), |
569 | m_size_argno (size_argno), m_access_str (access_str) |
570 | { |
571 | } |
572 | |
573 | const char *get_kind () const override |
574 | { |
575 | return "tainted_access_attrib_size" ; |
576 | } |
577 | |
578 | bool emit (diagnostic_emission_context &ctxt) final override |
579 | { |
580 | bool warned = tainted_size::emit (ctxt); |
581 | if (warned) |
582 | { |
583 | inform (DECL_SOURCE_LOCATION (m_callee_fndecl), |
584 | "parameter %i of %qD marked as a size via attribute %qs" , |
585 | m_size_argno + 1, m_callee_fndecl, m_access_str); |
586 | } |
587 | return warned; |
588 | } |
589 | |
590 | private: |
591 | tree m_callee_fndecl; |
592 | unsigned m_size_argno; |
593 | const char *m_access_str; |
594 | }; |
595 | |
596 | /* Concrete taint_diagnostic subclass for reporting attacker-controlled |
597 | divisor (so that an attacker can trigger a divide by zero). */ |
598 | |
599 | class tainted_divisor : public taint_diagnostic |
600 | { |
601 | public: |
602 | tainted_divisor (const taint_state_machine &sm, tree arg, |
603 | enum bounds has_bounds) |
604 | : taint_diagnostic (sm, arg, has_bounds) |
605 | {} |
606 | |
607 | const char *get_kind () const final override { return "tainted_divisor" ; } |
608 | |
609 | int get_controlling_option () const final override |
610 | { |
611 | return OPT_Wanalyzer_tainted_divisor; |
612 | } |
613 | |
614 | bool emit (diagnostic_emission_context &ctxt) final override |
615 | { |
616 | /* CWE-369: "Divide By Zero". */ |
617 | ctxt.add_cwe (cwe: 369); |
618 | if (m_arg) |
619 | return ctxt.warn ("use of attacker-controlled value %qE as divisor" |
620 | " without checking for zero" , |
621 | m_arg); |
622 | else |
623 | return ctxt.warn ("use of attacker-controlled value as divisor" |
624 | " without checking for zero" ); |
625 | } |
626 | |
627 | label_text describe_final_event (const evdesc::final_event &ev) final override |
628 | { |
629 | if (m_arg) |
630 | return ev.formatted_print |
631 | (fmt: "use of attacker-controlled value %qE as divisor" |
632 | " without checking for zero" , |
633 | m_arg); |
634 | else |
635 | return ev.formatted_print |
636 | (fmt: "use of attacker-controlled value as divisor" |
637 | " without checking for zero" ); |
638 | } |
639 | }; |
640 | |
641 | /* Concrete taint_diagnostic subclass for reporting attacker-controlled |
642 | size of a dynamic allocation. */ |
643 | |
644 | class tainted_allocation_size : public taint_diagnostic |
645 | { |
646 | public: |
647 | tainted_allocation_size (const taint_state_machine &sm, tree arg, |
648 | const svalue *size_in_bytes, |
649 | enum bounds has_bounds, enum memory_space mem_space) |
650 | : taint_diagnostic (sm, arg, has_bounds), |
651 | m_size_in_bytes (size_in_bytes), |
652 | m_mem_space (mem_space) |
653 | { |
654 | } |
655 | |
656 | const char *get_kind () const final override |
657 | { |
658 | return "tainted_allocation_size" ; |
659 | } |
660 | |
661 | bool subclass_equal_p (const pending_diagnostic &base_other) const override |
662 | { |
663 | if (!taint_diagnostic::subclass_equal_p (base_other)) |
664 | return false; |
665 | const tainted_allocation_size &other |
666 | = (const tainted_allocation_size &)base_other; |
667 | return m_mem_space == other.m_mem_space; |
668 | } |
669 | |
670 | int get_controlling_option () const final override |
671 | { |
672 | return OPT_Wanalyzer_tainted_allocation_size; |
673 | } |
674 | |
675 | bool emit (diagnostic_emission_context &ctxt) final override |
676 | { |
677 | /* "CWE-789: Memory Allocation with Excessive Size Value". */ |
678 | ctxt.add_cwe (cwe: 789); |
679 | |
680 | bool warned; |
681 | if (m_arg) |
682 | switch (m_has_bounds) |
683 | { |
684 | default: |
685 | gcc_unreachable (); |
686 | case BOUNDS_NONE: |
687 | warned = ctxt.warn ("use of attacker-controlled value %qE as" |
688 | " allocation size without bounds checking" , |
689 | m_arg); |
690 | break; |
691 | case BOUNDS_UPPER: |
692 | warned = ctxt.warn ("use of attacker-controlled value %qE as" |
693 | " allocation size without" |
694 | " lower-bounds checking" , |
695 | m_arg); |
696 | break; |
697 | case BOUNDS_LOWER: |
698 | warned = ctxt.warn ("use of attacker-controlled value %qE as" |
699 | " allocation size without" |
700 | " upper-bounds checking" , |
701 | m_arg); |
702 | break; |
703 | } |
704 | else |
705 | switch (m_has_bounds) |
706 | { |
707 | default: |
708 | gcc_unreachable (); |
709 | case BOUNDS_NONE: |
710 | warned = ctxt.warn ("use of attacker-controlled value as" |
711 | " allocation size without bounds" |
712 | " checking" ); |
713 | break; |
714 | case BOUNDS_UPPER: |
715 | warned = ctxt.warn ("use of attacker-controlled value as" |
716 | " allocation size without" |
717 | " lower-bounds checking" ); |
718 | break; |
719 | case BOUNDS_LOWER: |
720 | warned = ctxt.warn ("use of attacker-controlled value as" |
721 | " allocation size without" |
722 | " upper-bounds checking" ); |
723 | break; |
724 | } |
725 | if (warned) |
726 | { |
727 | const location_t loc = ctxt.get_location (); |
728 | switch (m_mem_space) |
729 | { |
730 | default: |
731 | break; |
732 | case MEMSPACE_STACK: |
733 | inform (loc, "stack-based allocation" ); |
734 | break; |
735 | case MEMSPACE_HEAP: |
736 | inform (loc, "heap-based allocation" ); |
737 | break; |
738 | } |
739 | } |
740 | return warned; |
741 | } |
742 | |
743 | label_text describe_final_event (const evdesc::final_event &ev) final override |
744 | { |
745 | if (m_arg) |
746 | switch (m_has_bounds) |
747 | { |
748 | default: |
749 | gcc_unreachable (); |
750 | case BOUNDS_NONE: |
751 | return ev.formatted_print |
752 | (fmt: "use of attacker-controlled value %qE as allocation size" |
753 | " without bounds checking" , |
754 | m_arg); |
755 | case BOUNDS_UPPER: |
756 | return ev.formatted_print |
757 | (fmt: "use of attacker-controlled value %qE as allocation size" |
758 | " without lower-bounds checking" , |
759 | m_arg); |
760 | case BOUNDS_LOWER: |
761 | return ev.formatted_print |
762 | (fmt: "use of attacker-controlled value %qE as allocation size" |
763 | " without upper-bounds checking" , |
764 | m_arg); |
765 | } |
766 | else |
767 | switch (m_has_bounds) |
768 | { |
769 | default: |
770 | gcc_unreachable (); |
771 | case BOUNDS_NONE: |
772 | return ev.formatted_print |
773 | (fmt: "use of attacker-controlled value as allocation size" |
774 | " without bounds checking" ); |
775 | case BOUNDS_UPPER: |
776 | return ev.formatted_print |
777 | (fmt: "use of attacker-controlled value as allocation size" |
778 | " without lower-bounds checking" ); |
779 | case BOUNDS_LOWER: |
780 | return ev.formatted_print |
781 | (fmt: "use of attacker-controlled value as allocation size" |
782 | " without upper-bounds checking" ); |
783 | } |
784 | } |
785 | |
786 | void maybe_add_sarif_properties (sarif_object &result_obj) |
787 | const final override |
788 | { |
789 | taint_diagnostic::maybe_add_sarif_properties (result_obj); |
790 | sarif_property_bag &props = result_obj.get_or_create_properties (); |
791 | #define PROPERTY_PREFIX "gcc/analyzer/tainted_allocation_size/" |
792 | props.set (PROPERTY_PREFIX "size_in_bytes" , v: m_size_in_bytes->to_json ()); |
793 | #undef PROPERTY_PREFIX |
794 | } |
795 | |
796 | private: |
797 | const svalue *m_size_in_bytes; |
798 | enum memory_space m_mem_space; |
799 | }; |
800 | |
801 | /* Concrete taint_diagnostic subclass for reporting attacker-controlled |
802 | value being used as part of the condition of an assertion. */ |
803 | |
804 | class tainted_assertion : public taint_diagnostic |
805 | { |
806 | public: |
807 | tainted_assertion (const taint_state_machine &sm, tree arg, |
808 | tree assert_failure_fndecl) |
809 | : taint_diagnostic (sm, arg, BOUNDS_NONE), |
810 | m_assert_failure_fndecl (assert_failure_fndecl) |
811 | { |
812 | gcc_assert (m_assert_failure_fndecl); |
813 | } |
814 | |
815 | const char *get_kind () const final override |
816 | { |
817 | return "tainted_assertion" ; |
818 | } |
819 | |
820 | bool subclass_equal_p (const pending_diagnostic &base_other) const override |
821 | { |
822 | if (!taint_diagnostic::subclass_equal_p (base_other)) |
823 | return false; |
824 | const tainted_assertion &other |
825 | = (const tainted_assertion &)base_other; |
826 | return m_assert_failure_fndecl == other.m_assert_failure_fndecl; |
827 | } |
828 | |
829 | int get_controlling_option () const final override |
830 | { |
831 | return OPT_Wanalyzer_tainted_assertion; |
832 | } |
833 | |
834 | bool emit (diagnostic_emission_context &ctxt) final override |
835 | { |
836 | /* "CWE-617: Reachable Assertion". */ |
837 | ctxt.add_cwe (cwe: 617); |
838 | |
839 | return ctxt.warn ("use of attacked-controlled value in" |
840 | " condition for assertion" ); |
841 | } |
842 | |
843 | location_t fixup_location (location_t loc, |
844 | bool primary) const final override |
845 | { |
846 | if (primary) |
847 | /* For the primary location we want to avoid being in e.g. the |
848 | <assert.h> system header, since this would suppress the |
849 | diagnostic. */ |
850 | return expansion_point_location_if_in_system_header (loc); |
851 | else if (in_system_header_at (loc)) |
852 | /* For events, we want to show the implemenation of the assert |
853 | macro when we're describing them. */ |
854 | return linemap_resolve_location (line_table, loc, |
855 | lrk: LRK_SPELLING_LOCATION, |
856 | NULL); |
857 | else |
858 | return pending_diagnostic::fixup_location (loc, primary); |
859 | } |
860 | |
861 | label_text describe_state_change (const evdesc::state_change &change) override |
862 | { |
863 | if (change.m_new_state == m_sm.m_tainted_control_flow) |
864 | return change.formatted_print |
865 | (fmt: "use of attacker-controlled value for control flow" ); |
866 | return taint_diagnostic::describe_state_change (change); |
867 | } |
868 | |
869 | label_text describe_final_event (const evdesc::final_event &ev) final override |
870 | { |
871 | if (mention_noreturn_attribute_p ()) |
872 | return ev.formatted_print |
873 | (fmt: "treating %qE as an assertion failure handler" |
874 | " due to %<__attribute__((__noreturn__))%>" , |
875 | m_assert_failure_fndecl); |
876 | else |
877 | return ev.formatted_print |
878 | (fmt: "treating %qE as an assertion failure handler" , |
879 | m_assert_failure_fndecl); |
880 | } |
881 | |
882 | private: |
883 | bool mention_noreturn_attribute_p () const |
884 | { |
885 | if (fndecl_built_in_p (node: m_assert_failure_fndecl, name1: BUILT_IN_UNREACHABLE)) |
886 | return false; |
887 | return true; |
888 | } |
889 | |
890 | tree m_assert_failure_fndecl; |
891 | }; |
892 | |
893 | /* taint_state_machine's ctor. */ |
894 | |
895 | taint_state_machine::taint_state_machine (logger *logger) |
896 | : state_machine ("taint" , logger), |
897 | m_tainted (add_state (name: "tainted" )), |
898 | m_has_lb (add_state (name: "has_lb" )), |
899 | m_has_ub (add_state (name: "has_ub" )), |
900 | m_stop (add_state (name: "stop" )), |
901 | m_tainted_control_flow (add_state (name: "tainted-control-flow" )) |
902 | { |
903 | } |
904 | |
905 | state_machine::state_t |
906 | taint_state_machine::alt_get_inherited_state (const sm_state_map &map, |
907 | const svalue *sval, |
908 | const extrinsic_state &ext_state) |
909 | const |
910 | { |
911 | switch (sval->get_kind ()) |
912 | { |
913 | default: |
914 | break; |
915 | case SK_UNARYOP: |
916 | { |
917 | const unaryop_svalue *unaryop_sval |
918 | = as_a <const unaryop_svalue *> (p: sval); |
919 | enum tree_code op = unaryop_sval->get_op (); |
920 | const svalue *arg = unaryop_sval->get_arg (); |
921 | switch (op) |
922 | { |
923 | case NOP_EXPR: |
924 | { |
925 | state_t arg_state = map.get_state (sval: arg, ext_state); |
926 | return arg_state; |
927 | } |
928 | default: |
929 | break; |
930 | } |
931 | } |
932 | break; |
933 | case SK_BINOP: |
934 | { |
935 | const binop_svalue *binop_sval = as_a <const binop_svalue *> (p: sval); |
936 | enum tree_code op = binop_sval->get_op (); |
937 | const svalue *arg0 = binop_sval->get_arg0 (); |
938 | const svalue *arg1 = binop_sval->get_arg1 (); |
939 | switch (op) |
940 | { |
941 | default: |
942 | break; |
943 | |
944 | case EQ_EXPR: |
945 | case GE_EXPR: |
946 | case LE_EXPR: |
947 | case NE_EXPR: |
948 | case GT_EXPR: |
949 | case LT_EXPR: |
950 | case UNORDERED_EXPR: |
951 | case ORDERED_EXPR: |
952 | case PLUS_EXPR: |
953 | case MINUS_EXPR: |
954 | case MULT_EXPR: |
955 | case POINTER_PLUS_EXPR: |
956 | case TRUNC_DIV_EXPR: |
957 | { |
958 | state_t arg0_state = map.get_state (sval: arg0, ext_state); |
959 | state_t arg1_state = map.get_state (sval: arg1, ext_state); |
960 | return combine_states (s0: arg0_state, s1: arg1_state); |
961 | } |
962 | break; |
963 | |
964 | case TRUNC_MOD_EXPR: |
965 | { |
966 | /* The left-hand side of X % Y can be sanitized by |
967 | the operation. */ |
968 | return map.get_state (sval: arg1, ext_state); |
969 | } |
970 | break; |
971 | |
972 | case BIT_AND_EXPR: |
973 | case RSHIFT_EXPR: |
974 | return NULL; |
975 | } |
976 | } |
977 | break; |
978 | } |
979 | return NULL; |
980 | } |
981 | |
982 | /* Return true iff FNDECL should be considered to be an assertion failure |
983 | handler by -Wanalyzer-tainted-assertion. */ |
984 | |
985 | static bool |
986 | is_assertion_failure_handler_p (tree fndecl) |
987 | { |
988 | // i.e. "noreturn" |
989 | if (TREE_THIS_VOLATILE (fndecl)) |
990 | return true; |
991 | |
992 | return false; |
993 | } |
994 | |
995 | /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */ |
996 | |
997 | bool |
998 | taint_state_machine::on_stmt (sm_context *sm_ctxt, |
999 | const supernode *node, |
1000 | const gimple *stmt) const |
1001 | { |
1002 | if (const gcall *call = dyn_cast <const gcall *> (p: stmt)) |
1003 | if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call)) |
1004 | { |
1005 | if (is_named_call_p (fndecl: callee_fndecl, funcname: "fread" , call, num_args: 4)) |
1006 | { |
1007 | tree arg = gimple_call_arg (gs: call, index: 0); |
1008 | |
1009 | sm_ctxt->on_transition (node, stmt, var: arg, from: m_start, to: m_tainted); |
1010 | |
1011 | /* Dereference an ADDR_EXPR. */ |
1012 | // TODO: should the engine do this? |
1013 | if (TREE_CODE (arg) == ADDR_EXPR) |
1014 | sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0), |
1015 | from: m_start, to: m_tainted); |
1016 | return true; |
1017 | } |
1018 | |
1019 | /* External function with "access" attribute. */ |
1020 | if (sm_ctxt->unknown_side_effects_p ()) |
1021 | check_for_tainted_size_arg (sm_ctxt, node, call, callee_fndecl); |
1022 | |
1023 | if (is_assertion_failure_handler_p (fndecl: callee_fndecl) |
1024 | && sm_ctxt->get_global_state () == m_tainted_control_flow) |
1025 | { |
1026 | sm_ctxt->warn (node, stmt: call, NULL_TREE, |
1027 | d: make_unique<tainted_assertion> (args: *this, NULL_TREE, |
1028 | args&: callee_fndecl)); |
1029 | } |
1030 | } |
1031 | // TODO: ...etc; many other sources of untrusted data |
1032 | |
1033 | if (const gassign *assign = dyn_cast <const gassign *> (p: stmt)) |
1034 | { |
1035 | enum tree_code op = gimple_assign_rhs_code (gs: assign); |
1036 | |
1037 | switch (op) |
1038 | { |
1039 | default: |
1040 | break; |
1041 | case TRUNC_DIV_EXPR: |
1042 | case CEIL_DIV_EXPR: |
1043 | case FLOOR_DIV_EXPR: |
1044 | case ROUND_DIV_EXPR: |
1045 | case TRUNC_MOD_EXPR: |
1046 | case CEIL_MOD_EXPR: |
1047 | case FLOOR_MOD_EXPR: |
1048 | case ROUND_MOD_EXPR: |
1049 | case RDIV_EXPR: |
1050 | case EXACT_DIV_EXPR: |
1051 | check_for_tainted_divisor (sm_ctxt, node, assign); |
1052 | break; |
1053 | } |
1054 | } |
1055 | |
1056 | if (const gcond *cond = dyn_cast <const gcond *> (p: stmt)) |
1057 | { |
1058 | /* Reset the state of "tainted-control-flow" before each |
1059 | control flow statement, so that only the last one before |
1060 | an assertion-failure-handler counts. */ |
1061 | sm_ctxt->set_global_state (m_start); |
1062 | check_control_flow_arg_for_taint (sm_ctxt, stmt: cond, expr: gimple_cond_lhs (gs: cond)); |
1063 | check_control_flow_arg_for_taint (sm_ctxt, stmt: cond, expr: gimple_cond_rhs (gs: cond)); |
1064 | } |
1065 | |
1066 | if (const gswitch *switch_ = dyn_cast <const gswitch *> (p: stmt)) |
1067 | { |
1068 | /* Reset the state of "tainted-control-flow" before each |
1069 | control flow statement, so that only the last one before |
1070 | an assertion-failure-handler counts. */ |
1071 | sm_ctxt->set_global_state (m_start); |
1072 | check_control_flow_arg_for_taint (sm_ctxt, stmt: switch_, |
1073 | expr: gimple_switch_index (gs: switch_)); |
1074 | } |
1075 | |
1076 | return false; |
1077 | } |
1078 | |
1079 | /* If EXPR is tainted, mark this execution path with the |
1080 | "tainted-control-flow" global state, in case we're about |
1081 | to call an assertion-failure-handler. */ |
1082 | |
1083 | void |
1084 | taint_state_machine::check_control_flow_arg_for_taint (sm_context *sm_ctxt, |
1085 | const gimple *stmt, |
1086 | tree expr) const |
1087 | { |
1088 | const region_model *old_model = sm_ctxt->get_old_region_model (); |
1089 | const svalue *sval = old_model->get_rvalue (expr, NULL); |
1090 | state_t state = sm_ctxt->get_state (stmt, sval); |
1091 | enum bounds b; |
1092 | if (get_taint (s: state, TREE_TYPE (expr), out: &b)) |
1093 | sm_ctxt->set_global_state (m_tainted_control_flow); |
1094 | } |
1095 | |
1096 | /* Implementation of state_machine::on_condition vfunc for taint_state_machine. |
1097 | Potentially transition state 'tainted' to 'has_ub' or 'has_lb', |
1098 | and states 'has_ub' and 'has_lb' to 'stop'. */ |
1099 | |
1100 | void |
1101 | taint_state_machine::on_condition (sm_context *sm_ctxt, |
1102 | const supernode *node, |
1103 | const gimple *stmt, |
1104 | const svalue *lhs, |
1105 | enum tree_code op, |
1106 | const svalue *rhs) const |
1107 | { |
1108 | if (stmt == NULL) |
1109 | return; |
1110 | |
1111 | if (lhs->get_kind () == SK_UNKNOWN |
1112 | || rhs->get_kind () == SK_UNKNOWN) |
1113 | { |
1114 | /* If we have a comparison against UNKNOWN, then |
1115 | we've presumably hit the svalue complexity limit, |
1116 | and we don't know what is being sanitized. |
1117 | Give up on any taint already found on this execution path. */ |
1118 | // TODO: warn about this |
1119 | if (get_logger ()) |
1120 | get_logger ()->log (fmt: "comparison against UNKNOWN; removing all taint" ); |
1121 | sm_ctxt->clear_all_per_svalue_state (); |
1122 | return; |
1123 | } |
1124 | |
1125 | /* Strip away casts before considering LHS and RHS, to increase the |
1126 | chance of detecting places where sanitization of a value may have |
1127 | happened. */ |
1128 | if (const svalue *inner = lhs->maybe_undo_cast ()) |
1129 | lhs = inner; |
1130 | if (const svalue *inner = rhs->maybe_undo_cast ()) |
1131 | rhs = inner; |
1132 | |
1133 | // TODO |
1134 | switch (op) |
1135 | { |
1136 | //case NE_EXPR: |
1137 | //case EQ_EXPR: |
1138 | case GE_EXPR: |
1139 | case GT_EXPR: |
1140 | { |
1141 | /* (LHS >= RHS) or (LHS > RHS) |
1142 | LHS gains a lower bound |
1143 | RHS gains an upper bound. */ |
1144 | sm_ctxt->on_transition (node, stmt, var: lhs, from: m_tainted, |
1145 | to: m_has_lb); |
1146 | sm_ctxt->on_transition (node, stmt, var: lhs, from: m_has_ub, |
1147 | to: m_stop); |
1148 | sm_ctxt->on_transition (node, stmt, var: rhs, from: m_tainted, |
1149 | to: m_has_ub); |
1150 | sm_ctxt->on_transition (node, stmt, var: rhs, from: m_has_lb, |
1151 | to: m_stop); |
1152 | } |
1153 | break; |
1154 | case LE_EXPR: |
1155 | case LT_EXPR: |
1156 | { |
1157 | /* Detect where build_range_check has optimized |
1158 | (c>=low) && (c<=high) |
1159 | into |
1160 | (c-low>=0) && (c-low<=high-low) |
1161 | and thus into: |
1162 | (unsigned)(c - low) <= (unsigned)(high-low). */ |
1163 | if (const binop_svalue *binop_sval |
1164 | = lhs->dyn_cast_binop_svalue ()) |
1165 | { |
1166 | const svalue *inner_lhs = binop_sval->get_arg0 (); |
1167 | enum tree_code inner_op = binop_sval->get_op (); |
1168 | const svalue *inner_rhs = binop_sval->get_arg1 (); |
1169 | if (const svalue *before_cast = inner_lhs->maybe_undo_cast ()) |
1170 | inner_lhs = before_cast; |
1171 | if (tree outer_rhs_cst = rhs->maybe_get_constant ()) |
1172 | if (tree inner_rhs_cst = inner_rhs->maybe_get_constant ()) |
1173 | if (inner_op == PLUS_EXPR |
1174 | && TREE_CODE (inner_rhs_cst) == INTEGER_CST |
1175 | && TREE_CODE (outer_rhs_cst) == INTEGER_CST |
1176 | && TYPE_UNSIGNED (TREE_TYPE (inner_rhs_cst)) |
1177 | && TYPE_UNSIGNED (TREE_TYPE (outer_rhs_cst))) |
1178 | { |
1179 | /* We have |
1180 | (unsigned)(INNER_LHS + CST_A) </<= UNSIGNED_CST_B |
1181 | and thus an optimized test of INNER_LHS (before any |
1182 | cast to unsigned) against a range. |
1183 | Transition any of the tainted states to the stop state. |
1184 | We have to special-case this here rather than in |
1185 | region_model::on_condition since we can't apply |
1186 | both conditions simultaneously (we'd have a transition |
1187 | from the old state to has_lb, then a transition from |
1188 | the old state *again* to has_ub). */ |
1189 | state_t old_state |
1190 | = sm_ctxt->get_state (stmt, inner_lhs); |
1191 | if (old_state == m_tainted |
1192 | || old_state == m_has_lb |
1193 | || old_state == m_has_ub) |
1194 | sm_ctxt->set_next_state (stmt, var: inner_lhs, to: m_stop); |
1195 | return; |
1196 | } |
1197 | } |
1198 | |
1199 | /* (LHS <= RHS) or (LHS < RHS) |
1200 | LHS gains an upper bound |
1201 | RHS gains a lower bound. */ |
1202 | sm_ctxt->on_transition (node, stmt, var: lhs, from: m_tainted, |
1203 | to: m_has_ub); |
1204 | sm_ctxt->on_transition (node, stmt, var: lhs, from: m_has_lb, |
1205 | to: m_stop); |
1206 | sm_ctxt->on_transition (node, stmt, var: rhs, from: m_tainted, |
1207 | to: m_has_lb); |
1208 | sm_ctxt->on_transition (node, stmt, var: rhs, from: m_has_ub, |
1209 | to: m_stop); |
1210 | } |
1211 | break; |
1212 | default: |
1213 | break; |
1214 | } |
1215 | } |
1216 | |
1217 | /* Implementation of state_machine::on_bounded_ranges vfunc for |
1218 | taint_state_machine, for handling switch statement cases. |
1219 | Potentially transition state 'tainted' to 'has_ub' or 'has_lb', |
1220 | and states 'has_ub' and 'has_lb' to 'stop'. */ |
1221 | |
1222 | void |
1223 | taint_state_machine::on_bounded_ranges (sm_context *sm_ctxt, |
1224 | const supernode *, |
1225 | const gimple *stmt, |
1226 | const svalue &sval, |
1227 | const bounded_ranges &ranges) const |
1228 | { |
1229 | gcc_assert (!ranges.empty_p ()); |
1230 | gcc_assert (ranges.get_count () > 0); |
1231 | |
1232 | /* We have one or more ranges; this could be a "default:", or one or |
1233 | more single or range cases. |
1234 | |
1235 | Look at the overall endpoints to see if the ranges impose any lower |
1236 | bounds or upper bounds beyond those of the underlying numeric type. */ |
1237 | |
1238 | tree lowest_bound = ranges.get_range (idx: 0).m_lower; |
1239 | tree highest_bound = ranges.get_range (idx: ranges.get_count () - 1).m_upper; |
1240 | gcc_assert (lowest_bound); |
1241 | gcc_assert (highest_bound); |
1242 | |
1243 | bool ranges_have_lb |
1244 | = (lowest_bound != TYPE_MIN_VALUE (TREE_TYPE (lowest_bound))); |
1245 | bool ranges_have_ub |
1246 | = (highest_bound != TYPE_MAX_VALUE (TREE_TYPE (highest_bound))); |
1247 | |
1248 | if (!ranges_have_lb && !ranges_have_ub) |
1249 | return; |
1250 | |
1251 | /* We have new bounds from the ranges; combine them with any |
1252 | existing bounds on SVAL. */ |
1253 | state_t old_state = sm_ctxt->get_state (stmt, &sval); |
1254 | if (old_state == m_tainted) |
1255 | { |
1256 | if (ranges_have_lb && ranges_have_ub) |
1257 | sm_ctxt->set_next_state (stmt, var: &sval, to: m_stop); |
1258 | else if (ranges_have_lb) |
1259 | sm_ctxt->set_next_state (stmt, var: &sval, to: m_has_lb); |
1260 | else if (ranges_have_ub) |
1261 | sm_ctxt->set_next_state (stmt, var: &sval, to: m_has_ub); |
1262 | } |
1263 | else if (old_state == m_has_ub && ranges_have_lb) |
1264 | sm_ctxt->set_next_state (stmt, var: &sval, to: m_stop); |
1265 | else if (old_state == m_has_lb && ranges_have_ub) |
1266 | sm_ctxt->set_next_state (stmt, var: &sval, to: m_stop); |
1267 | } |
1268 | |
1269 | bool |
1270 | taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const |
1271 | { |
1272 | if (s == m_has_lb || s == m_has_ub) |
1273 | return false; |
1274 | |
1275 | return true; |
1276 | } |
1277 | |
1278 | /* If STATE is a tainted state, write the bounds to *OUT and return true. |
1279 | Otherwise return false. |
1280 | Use the signedness of TYPE to determine if "has_ub" is tainted. */ |
1281 | |
1282 | bool |
1283 | taint_state_machine::get_taint (state_t state, tree type, |
1284 | enum bounds *out) const |
1285 | { |
1286 | /* Unsigned types have an implicit lower bound. */ |
1287 | bool is_unsigned = false; |
1288 | if (type) |
1289 | if (INTEGRAL_TYPE_P (type)) |
1290 | is_unsigned = TYPE_UNSIGNED (type); |
1291 | |
1292 | /* Can't use a switch as the states are non-const. */ |
1293 | if (state == m_tainted) |
1294 | { |
1295 | *out = is_unsigned ? BOUNDS_LOWER : BOUNDS_NONE; |
1296 | return true; |
1297 | } |
1298 | else if (state == m_has_lb) |
1299 | { |
1300 | *out = BOUNDS_LOWER; |
1301 | return true; |
1302 | } |
1303 | else if (state == m_has_ub && !is_unsigned) |
1304 | { |
1305 | /* Missing lower bound. */ |
1306 | *out = BOUNDS_UPPER; |
1307 | return true; |
1308 | } |
1309 | return false; |
1310 | } |
1311 | |
1312 | /* Find the most tainted state of S0 and S1. */ |
1313 | |
1314 | state_machine::state_t |
1315 | taint_state_machine::combine_states (state_t s0, state_t s1) const |
1316 | { |
1317 | gcc_assert (s0); |
1318 | gcc_assert (s1); |
1319 | if (s0 == s1) |
1320 | return s0; |
1321 | if (s0 == m_tainted || s1 == m_tainted) |
1322 | return m_tainted; |
1323 | if (s0 == m_start) |
1324 | return s1; |
1325 | if (s1 == m_start) |
1326 | return s0; |
1327 | if (s0 == m_stop) |
1328 | return s1; |
1329 | if (s1 == m_stop) |
1330 | return s0; |
1331 | /* The only remaining combinations are one of has_ub and has_lb |
1332 | (in either order). */ |
1333 | gcc_assert ((s0 == m_has_lb && s1 == m_has_ub) |
1334 | || (s0 == m_has_ub && s1 == m_has_lb)); |
1335 | return m_tainted; |
1336 | } |
1337 | |
1338 | /* Check for calls to external functions marked with |
1339 | __attribute__((access)) with a size-index: complain about |
1340 | tainted values passed as a size to such a function. */ |
1341 | |
1342 | void |
1343 | taint_state_machine::check_for_tainted_size_arg (sm_context *sm_ctxt, |
1344 | const supernode *node, |
1345 | const gcall *call, |
1346 | tree callee_fndecl) const |
1347 | { |
1348 | tree fntype = TREE_TYPE (callee_fndecl); |
1349 | if (!fntype) |
1350 | return; |
1351 | |
1352 | if (!TYPE_ATTRIBUTES (fntype)) |
1353 | return; |
1354 | |
1355 | /* Initialize a map of attribute access specifications for arguments |
1356 | to the function call. */ |
1357 | rdwr_map rdwr_idx; |
1358 | init_attr_rdwr_indices (&rdwr_idx, TYPE_ATTRIBUTES (fntype)); |
1359 | |
1360 | unsigned argno = 0; |
1361 | |
1362 | for (tree iter = TYPE_ARG_TYPES (fntype); iter; |
1363 | iter = TREE_CHAIN (iter), ++argno) |
1364 | { |
1365 | const attr_access* access = rdwr_idx.get (k: argno); |
1366 | if (!access) |
1367 | continue; |
1368 | |
1369 | /* Ignore any duplicate entry in the map for the size argument. */ |
1370 | if (access->ptrarg != argno) |
1371 | continue; |
1372 | |
1373 | if (access->sizarg == UINT_MAX) |
1374 | continue; |
1375 | |
1376 | tree size_arg = gimple_call_arg (gs: call, index: access->sizarg); |
1377 | |
1378 | state_t state = sm_ctxt->get_state (stmt: call, var: size_arg); |
1379 | enum bounds b; |
1380 | if (get_taint (state, TREE_TYPE (size_arg), out: &b)) |
1381 | { |
1382 | const char* const access_str = |
1383 | TREE_STRING_POINTER (access->to_external_string ()); |
1384 | tree diag_size = sm_ctxt->get_diagnostic_tree (expr: size_arg); |
1385 | sm_ctxt->warn (node, stmt: call, var: size_arg, |
1386 | d: make_unique<tainted_access_attrib_size> |
1387 | (args: *this, args&: diag_size, args&: b, |
1388 | args&: callee_fndecl, |
1389 | args: access->sizarg, |
1390 | args: access_str)); |
1391 | } |
1392 | } |
1393 | } |
1394 | |
1395 | /* Complain if ASSIGN (a division operation) has a tainted divisor |
1396 | that could be zero. */ |
1397 | |
1398 | void |
1399 | taint_state_machine::check_for_tainted_divisor (sm_context *sm_ctxt, |
1400 | const supernode *node, |
1401 | const gassign *assign) const |
1402 | { |
1403 | const region_model *old_model = sm_ctxt->get_old_region_model (); |
1404 | if (!old_model) |
1405 | return; |
1406 | |
1407 | tree divisor_expr = gimple_assign_rhs2 (gs: assign);; |
1408 | |
1409 | /* Until we track conditions on floating point values, we can't check to |
1410 | see if they've been checked against zero. */ |
1411 | if (!INTEGRAL_TYPE_P (TREE_TYPE (divisor_expr))) |
1412 | return; |
1413 | |
1414 | const svalue *divisor_sval = old_model->get_rvalue (expr: divisor_expr, NULL); |
1415 | |
1416 | state_t state = sm_ctxt->get_state (stmt: assign, divisor_sval); |
1417 | enum bounds b; |
1418 | if (get_taint (state, TREE_TYPE (divisor_expr), out: &b)) |
1419 | { |
1420 | const svalue *zero_sval |
1421 | = old_model->get_manager ()->get_or_create_int_cst |
1422 | (TREE_TYPE (divisor_expr), cst: 0); |
1423 | tristate ts |
1424 | = old_model->eval_condition (lhs: divisor_sval, op: NE_EXPR, rhs: zero_sval); |
1425 | if (ts.is_true ()) |
1426 | /* The divisor is known to not equal 0: don't warn. */ |
1427 | return; |
1428 | |
1429 | tree diag_divisor = sm_ctxt->get_diagnostic_tree (expr: divisor_expr); |
1430 | sm_ctxt->warn (node, stmt: assign, var: divisor_expr, |
1431 | d: make_unique <tainted_divisor> (args: *this, args&: diag_divisor, args&: b)); |
1432 | sm_ctxt->set_next_state (stmt: assign, var: divisor_sval, to: m_stop); |
1433 | } |
1434 | } |
1435 | |
1436 | } // anonymous namespace |
1437 | |
1438 | /* Internal interface to this file. */ |
1439 | |
1440 | state_machine * |
1441 | make_taint_state_machine (logger *logger) |
1442 | { |
1443 | return new taint_state_machine (logger); |
1444 | } |
1445 | |
1446 | /* A closed concrete range. */ |
1447 | |
1448 | class concrete_range |
1449 | { |
1450 | public: |
1451 | /* Return true iff THIS is fully within OTHER |
1452 | i.e. |
1453 | - m_min must be >= OTHER.m_min |
1454 | - m_max must be <= OTHER.m_max. */ |
1455 | bool within_p (const concrete_range &other) const |
1456 | { |
1457 | if (compare_constants (lhs_const: m_min, op: GE_EXPR, rhs_const: other.m_min).is_true ()) |
1458 | if (compare_constants (lhs_const: m_max, op: LE_EXPR, rhs_const: other.m_max).is_true ()) |
1459 | return true; |
1460 | return false; |
1461 | } |
1462 | |
1463 | tree m_min; |
1464 | tree m_max; |
1465 | }; |
1466 | |
1467 | /* Attempt to get a closed concrete range for SVAL based on types. |
1468 | If found, write to *OUT and return true. |
1469 | Otherwise return false. */ |
1470 | |
1471 | static bool |
1472 | get_possible_range (const svalue *sval, concrete_range *out) |
1473 | { |
1474 | if (const svalue *inner = sval->maybe_undo_cast ()) |
1475 | { |
1476 | concrete_range inner_range; |
1477 | if (!get_possible_range (sval: inner, out: &inner_range)) |
1478 | return false; |
1479 | |
1480 | if (sval->get_type () |
1481 | && inner->get_type () |
1482 | && INTEGRAL_TYPE_P (sval->get_type ()) |
1483 | && INTEGRAL_TYPE_P (inner->get_type ()) |
1484 | && TYPE_UNSIGNED (inner->get_type ()) |
1485 | && (TYPE_PRECISION (sval->get_type ()) |
1486 | > TYPE_PRECISION (inner->get_type ()))) |
1487 | { |
1488 | /* We have a cast from an unsigned type to a wider integral type. |
1489 | Assuming this is zero-extension, we can inherit the range from |
1490 | the inner type. */ |
1491 | enum tree_code op = ((const unaryop_svalue *)sval)->get_op (); |
1492 | out->m_min = fold_unary (op, sval->get_type (), inner_range.m_min); |
1493 | out->m_max = fold_unary (op, sval->get_type (), inner_range.m_max); |
1494 | return true; |
1495 | } |
1496 | } |
1497 | |
1498 | if (sval->get_type () |
1499 | && INTEGRAL_TYPE_P (sval->get_type ())) |
1500 | { |
1501 | out->m_min = TYPE_MIN_VALUE (sval->get_type ()); |
1502 | out->m_max = TYPE_MAX_VALUE (sval->get_type ()); |
1503 | return true; |
1504 | } |
1505 | |
1506 | return false; |
1507 | } |
1508 | |
1509 | /* Determine if it's possible for tainted array access ELEMENT_REG to |
1510 | actually be a problem. |
1511 | |
1512 | Check here for index being from e.g. unsigned char when the array |
1513 | contains >= 255 elements. |
1514 | |
1515 | Return true if out-of-bounds is possible, false if it's impossible |
1516 | (for suppressing false positives). */ |
1517 | |
1518 | static bool |
1519 | index_can_be_out_of_bounds_p (const element_region *element_reg) |
1520 | { |
1521 | const svalue *index = element_reg->get_index (); |
1522 | const region *array_reg = element_reg->get_parent_region (); |
1523 | |
1524 | if (array_reg->get_type () |
1525 | && TREE_CODE (array_reg->get_type ()) == ARRAY_TYPE |
1526 | && TYPE_DOMAIN (array_reg->get_type ()) |
1527 | && INTEGRAL_TYPE_P (TYPE_DOMAIN (array_reg->get_type ()))) |
1528 | { |
1529 | concrete_range valid_index_range; |
1530 | valid_index_range.m_min |
1531 | = TYPE_MIN_VALUE (TYPE_DOMAIN (array_reg->get_type ())); |
1532 | valid_index_range.m_max |
1533 | = TYPE_MAX_VALUE (TYPE_DOMAIN (array_reg->get_type ())); |
1534 | |
1535 | concrete_range possible_index_range; |
1536 | if (get_possible_range (sval: index, out: &possible_index_range)) |
1537 | if (possible_index_range.within_p (other: valid_index_range)) |
1538 | return false; |
1539 | } |
1540 | |
1541 | return true; |
1542 | } |
1543 | |
1544 | /* Complain to CTXT if accessing REG leads could lead to arbitrary |
1545 | memory access under an attacker's control (due to taint). */ |
1546 | |
1547 | void |
1548 | region_model::check_region_for_taint (const region *reg, |
1549 | enum access_direction, |
1550 | region_model_context *ctxt) const |
1551 | { |
1552 | gcc_assert (reg); |
1553 | gcc_assert (ctxt); |
1554 | |
1555 | LOG_SCOPE (ctxt->get_logger ()); |
1556 | |
1557 | sm_state_map *smap; |
1558 | const state_machine *sm; |
1559 | unsigned sm_idx; |
1560 | if (!ctxt->get_taint_map (out_smap: &smap, out_sm: &sm, out_sm_idx: &sm_idx)) |
1561 | return; |
1562 | |
1563 | gcc_assert (smap); |
1564 | gcc_assert (sm); |
1565 | |
1566 | const taint_state_machine &taint_sm = (const taint_state_machine &)*sm; |
1567 | |
1568 | const extrinsic_state *ext_state = ctxt->get_ext_state (); |
1569 | if (!ext_state) |
1570 | return; |
1571 | |
1572 | const region *iter_region = reg; |
1573 | while (iter_region) |
1574 | { |
1575 | switch (iter_region->get_kind ()) |
1576 | { |
1577 | default: |
1578 | break; |
1579 | |
1580 | case RK_ELEMENT: |
1581 | { |
1582 | const element_region *element_reg |
1583 | = (const element_region *)iter_region; |
1584 | const svalue *index = element_reg->get_index (); |
1585 | const state_machine::state_t |
1586 | state = smap->get_state (sval: index, ext_state: *ext_state); |
1587 | gcc_assert (state); |
1588 | enum bounds b; |
1589 | if (taint_sm.get_taint (state, type: index->get_type (), out: &b)) |
1590 | { |
1591 | if (index_can_be_out_of_bounds_p (element_reg)) |
1592 | { |
1593 | tree arg = get_representative_tree (sval: index); |
1594 | ctxt->warn (d: make_unique<tainted_array_index> (args: taint_sm, |
1595 | args&: arg, args&: b)); |
1596 | } |
1597 | else if (ctxt->get_logger ()) |
1598 | ctxt->get_logger ()->log (fmt: "rejecting tainted_array_index as" |
1599 | " out of bounds is not possible" ); |
1600 | } |
1601 | } |
1602 | break; |
1603 | |
1604 | case RK_OFFSET: |
1605 | { |
1606 | const offset_region *offset_reg |
1607 | = (const offset_region *)iter_region; |
1608 | const svalue *offset = offset_reg->get_byte_offset (); |
1609 | const state_machine::state_t |
1610 | state = smap->get_state (sval: offset, ext_state: *ext_state); |
1611 | gcc_assert (state); |
1612 | /* Handle implicit cast to sizetype. */ |
1613 | tree effective_type = offset->get_type (); |
1614 | if (const svalue *cast = offset->maybe_undo_cast ()) |
1615 | if (cast->get_type ()) |
1616 | effective_type = cast->get_type (); |
1617 | enum bounds b; |
1618 | if (taint_sm.get_taint (state, type: effective_type, out: &b)) |
1619 | { |
1620 | tree arg = get_representative_tree (sval: offset); |
1621 | ctxt->warn (d: make_unique<tainted_offset> (args: taint_sm, args&: arg, args&: b, |
1622 | args&: offset)); |
1623 | } |
1624 | } |
1625 | break; |
1626 | |
1627 | case RK_CAST: |
1628 | { |
1629 | const cast_region *cast_reg |
1630 | = as_a <const cast_region *> (p: iter_region); |
1631 | iter_region = cast_reg->get_original_region (); |
1632 | continue; |
1633 | } |
1634 | |
1635 | case RK_SIZED: |
1636 | { |
1637 | const sized_region *sized_reg |
1638 | = (const sized_region *)iter_region; |
1639 | const svalue *size_sval = sized_reg->get_byte_size_sval (m_mgr); |
1640 | const state_machine::state_t |
1641 | state = smap->get_state (sval: size_sval, ext_state: *ext_state); |
1642 | gcc_assert (state); |
1643 | enum bounds b; |
1644 | if (taint_sm.get_taint (state, type: size_sval->get_type (), out: &b)) |
1645 | { |
1646 | tree arg = get_representative_tree (sval: size_sval); |
1647 | ctxt->warn (d: make_unique<tainted_size> (args: taint_sm, args&: arg, args&: b)); |
1648 | } |
1649 | } |
1650 | break; |
1651 | } |
1652 | |
1653 | iter_region = iter_region->get_parent_region (); |
1654 | } |
1655 | } |
1656 | |
1657 | /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is |
1658 | under an attacker's control (due to taint), where the allocation |
1659 | is happening within MEM_SPACE. */ |
1660 | |
1661 | void |
1662 | region_model::check_dynamic_size_for_taint (enum memory_space mem_space, |
1663 | const svalue *size_in_bytes, |
1664 | region_model_context *ctxt) const |
1665 | { |
1666 | gcc_assert (size_in_bytes); |
1667 | gcc_assert (ctxt); |
1668 | |
1669 | LOG_SCOPE (ctxt->get_logger ()); |
1670 | |
1671 | sm_state_map *smap; |
1672 | const state_machine *sm; |
1673 | unsigned sm_idx; |
1674 | if (!ctxt->get_taint_map (out_smap: &smap, out_sm: &sm, out_sm_idx: &sm_idx)) |
1675 | return; |
1676 | |
1677 | gcc_assert (smap); |
1678 | gcc_assert (sm); |
1679 | |
1680 | const taint_state_machine &taint_sm = (const taint_state_machine &)*sm; |
1681 | |
1682 | const extrinsic_state *ext_state = ctxt->get_ext_state (); |
1683 | if (!ext_state) |
1684 | return; |
1685 | |
1686 | const state_machine::state_t |
1687 | state = smap->get_state (sval: size_in_bytes, ext_state: *ext_state); |
1688 | gcc_assert (state); |
1689 | enum bounds b; |
1690 | if (taint_sm.get_taint (state, type: size_in_bytes->get_type (), out: &b)) |
1691 | { |
1692 | tree arg = get_representative_tree (sval: size_in_bytes); |
1693 | ctxt->warn (d: make_unique<tainted_allocation_size> |
1694 | (args: taint_sm, args&: arg, args&: size_in_bytes, args&: b, args&: mem_space)); |
1695 | } |
1696 | } |
1697 | |
1698 | /* Mark SVAL as TAINTED. CTXT must be non-NULL. */ |
1699 | |
1700 | void |
1701 | region_model::mark_as_tainted (const svalue *sval, |
1702 | region_model_context *ctxt) |
1703 | { |
1704 | gcc_assert (sval); |
1705 | gcc_assert (ctxt); |
1706 | |
1707 | sm_state_map *smap; |
1708 | const state_machine *sm; |
1709 | unsigned sm_idx; |
1710 | if (!ctxt->get_taint_map (out_smap: &smap, out_sm: &sm, out_sm_idx: &sm_idx)) |
1711 | return; |
1712 | |
1713 | gcc_assert (smap); |
1714 | gcc_assert (sm); |
1715 | |
1716 | const taint_state_machine &taint_sm = (const taint_state_machine &)*sm; |
1717 | |
1718 | const extrinsic_state *ext_state = ctxt->get_ext_state (); |
1719 | if (!ext_state) |
1720 | return; |
1721 | |
1722 | smap->set_state (model: this, sval, state: taint_sm.m_tainted, NULL, ext_state: *ext_state); |
1723 | } |
1724 | |
1725 | /* Return true if SVAL could possibly be attacker-controlled. */ |
1726 | |
1727 | bool |
1728 | region_model_context::possibly_tainted_p (const svalue *sval) |
1729 | { |
1730 | sm_state_map *smap; |
1731 | const state_machine *sm; |
1732 | unsigned sm_idx; |
1733 | if (!get_taint_map (out_smap: &smap, out_sm: &sm, out_sm_idx: &sm_idx)) |
1734 | return false; |
1735 | |
1736 | const taint_state_machine &taint_sm = (const taint_state_machine &)*sm; |
1737 | |
1738 | const extrinsic_state *ext_state = get_ext_state (); |
1739 | if (!ext_state) |
1740 | return false; |
1741 | |
1742 | const state_machine::state_t state = smap->get_state (sval, ext_state: *ext_state); |
1743 | gcc_assert (state); |
1744 | |
1745 | return (state == taint_sm.m_tainted |
1746 | || state == taint_sm.m_has_lb |
1747 | || state == taint_sm.m_has_ub); |
1748 | } |
1749 | |
1750 | } // namespace ana |
1751 | |
1752 | #endif /* #if ENABLE_ANALYZER */ |
1753 | |