1/* String length optimization
2 Copyright (C) 2011-2023 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "rtl.h"
26#include "tree.h"
27#include "gimple.h"
28#include "alloc-pool.h"
29#include "tree-pass.h"
30#include "ssa.h"
31#include "cgraph.h"
32#include "gimple-pretty-print.h"
33#include "gimple-ssa-warn-access.h"
34#include "gimple-ssa-warn-restrict.h"
35#include "fold-const.h"
36#include "stor-layout.h"
37#include "gimple-iterator.h"
38#include "gimple-fold.h"
39#include "tree-eh.h"
40#include "gimplify.h"
41#include "gimplify-me.h"
42#include "expr.h"
43#include "tree-cfg.h"
44#include "tree-dfa.h"
45#include "domwalk.h"
46#include "tree-ssa-alias.h"
47#include "tree-ssa-propagate.h"
48#include "tree-ssa-strlen.h"
49#include "tree-hash-traits.h"
50#include "builtins.h"
51#include "pointer-query.h"
52#include "target.h"
53#include "diagnostic-core.h"
54#include "diagnostic.h"
55#include "intl.h"
56#include "attribs.h"
57#include "calls.h"
58#include "cfgloop.h"
59#include "tree-ssa-loop.h"
60#include "tree-scalar-evolution.h"
61#include "vr-values.h"
62#include "gimple-range.h"
63#include "tree-ssa.h"
64
65/* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
66 is an index into strinfo vector, negative value stands for
67 string length of a string literal (~strlen). */
68static vec<int> ssa_ver_to_stridx;
69
70/* Number of currently active string indexes plus one. */
71static int max_stridx;
72
73/* Set to true to optimize, false when just checking. */
74static bool strlen_optimize;
75
76/* String information record. */
77struct strinfo
78{
79 /* Number of leading characters that are known to be nonzero. This is
80 also the length of the string if FULL_STRING_P.
81
82 The values in a list of related string pointers must be consistent;
83 that is, if strinfo B comes X bytes after strinfo A, it must be
84 the case that A->nonzero_chars == X + B->nonzero_chars. */
85 tree nonzero_chars;
86 /* Any of the corresponding pointers for querying alias oracle. */
87 tree ptr;
88 /* STMT is used for two things:
89
90 - To record the statement that should be used for delayed length
91 computations. We maintain the invariant that all related strinfos
92 have delayed lengths or none do.
93
94 - To record the malloc or calloc call that produced this result
95 to optimize away malloc/memset sequences. STMT is reset after
96 a calloc-allocated object has been stored a non-zero value into. */
97 gimple *stmt;
98 /* Set to the dynamic allocation statement for the object (alloca,
99 calloc, malloc, or VLA). Unlike STMT, once set for a strinfo
100 object, ALLOC doesn't change. */
101 gimple *alloc;
102 /* Pointer to '\0' if known, if NULL, it can be computed as
103 ptr + length. */
104 tree endptr;
105 /* Reference count. Any changes to strinfo entry possibly shared
106 with dominating basic blocks need unshare_strinfo first, except
107 for dont_invalidate which affects only the immediately next
108 maybe_invalidate. */
109 int refcount;
110 /* Copy of index. get_strinfo (si->idx) should return si; */
111 int idx;
112 /* These 3 fields are for chaining related string pointers together.
113 E.g. for
114 bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
115 strcpy (c, d); e = c + dl;
116 strinfo(a) -> strinfo(c) -> strinfo(e)
117 All have ->first field equal to strinfo(a)->idx and are doubly
118 chained through prev/next fields. The later strinfos are required
119 to point into the same string with zero or more bytes after
120 the previous pointer and all bytes in between the two pointers
121 must be non-zero. Functions like strcpy or memcpy are supposed
122 to adjust all previous strinfo lengths, but not following strinfo
123 lengths (those are uncertain, usually invalidated during
124 maybe_invalidate, except when the alias oracle knows better).
125 Functions like strcat on the other side adjust the whole
126 related strinfo chain.
127 They are updated lazily, so to use the chain the same first fields
128 and si->prev->next == si->idx needs to be verified. */
129 int first;
130 int next;
131 int prev;
132 /* A flag whether the string is known to be written in the current
133 function. */
134 bool writable;
135 /* A flag for the next maybe_invalidate that this strinfo shouldn't
136 be invalidated. Always cleared by maybe_invalidate. */
137 bool dont_invalidate;
138 /* True if the string is known to be nul-terminated after NONZERO_CHARS
139 characters. False is useful when detecting strings that are built
140 up via successive memcpys. */
141 bool full_string_p;
142};
143
144/* Pool for allocating strinfo_struct entries. */
145static object_allocator<strinfo> strinfo_pool ("strinfo pool");
146
147/* Vector mapping positive string indexes to strinfo, for the
148 current basic block. The first pointer in the vector is special,
149 it is either NULL, meaning the vector isn't shared, or it is
150 a basic block pointer to the owner basic_block if shared.
151 If some other bb wants to modify the vector, the vector needs
152 to be unshared first, and only the owner bb is supposed to free it. */
153static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
154
155/* One OFFSET->IDX mapping. */
156struct stridxlist
157{
158 struct stridxlist *next;
159 HOST_WIDE_INT offset;
160 int idx;
161};
162
163/* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings. */
164struct decl_stridxlist_map
165{
166 struct tree_map_base base;
167 struct stridxlist list;
168};
169
170/* Hash table for mapping decls to a chained list of offset -> idx
171 mappings. */
172typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
173static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
174
175/* Hash table mapping strlen (or strnlen with constant bound and return
176 smaller than bound) calls to stridx instances describing
177 the calls' arguments. Non-null only when warn_stringop_truncation
178 is non-zero. */
179typedef std::pair<int, location_t> stridx_strlenloc;
180static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
181
182/* Obstack for struct stridxlist and struct decl_stridxlist_map. */
183static struct obstack stridx_obstack;
184
185/* Last memcpy statement if it could be adjusted if the trailing
186 '\0' written is immediately overwritten, or
187 *x = '\0' store that could be removed if it is immediately overwritten. */
188struct laststmt_struct
189{
190 gimple *stmt;
191 tree len;
192 int stridx;
193} laststmt;
194
195static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
196static bool get_range_strlen_dynamic (tree, gimple *, c_strlen_data *,
197 bitmap, pointer_query *, unsigned *);
198
199/* Sets MINMAX to either the constant value or the range VAL is in
200 and returns either the constant value or VAL on success or null
201 when the range couldn't be determined. Uses RVALS or CFUN for
202 range info, whichever is nonnull. */
203
204tree
205get_range (tree val, gimple *stmt, wide_int minmax[2],
206 range_query *rvals /* = NULL */)
207{
208 if (!rvals)
209 {
210 if (!cfun)
211 /* When called from front ends for global initializers CFUN
212 may be null. */
213 return NULL_TREE;
214
215 rvals = get_range_query (cfun);
216 }
217
218 value_range vr;
219 if (!rvals->range_of_expr (r&: vr, expr: val, stmt))
220 return NULL_TREE;
221
222 tree vrmin, vrmax;
223 value_range_kind rng = get_legacy_range (vr, min&: vrmin, max&: vrmax);
224 if (rng == VR_RANGE)
225 {
226 /* Only handle straight ranges. */
227 minmax[0] = wi::to_wide (t: vrmin);
228 minmax[1] = wi::to_wide (t: vrmax);
229 return val;
230 }
231
232 return NULL_TREE;
233}
234
235class strlen_pass : public dom_walker
236{
237public:
238 strlen_pass (cdi_direction direction)
239 : dom_walker (direction),
240 ptr_qry (&m_ranger),
241 m_cleanup_cfg (false)
242 {
243 }
244
245 ~strlen_pass ();
246
247 edge before_dom_children (basic_block) final override;
248 void after_dom_children (basic_block) final override;
249
250 bool check_and_optimize_stmt (bool *cleanup_eh);
251 bool check_and_optimize_call (bool *zero_write);
252 bool handle_assign (tree lhs, bool *zero_write);
253 bool handle_store (bool *zero_write);
254 void handle_pointer_plus ();
255 void handle_builtin_strlen ();
256 void handle_builtin_strchr ();
257 void handle_builtin_strcpy (built_in_function);
258 void handle_integral_assign (bool *cleanup_eh);
259 void handle_builtin_stxncpy_strncat (bool append_p);
260 void handle_builtin_memcpy (built_in_function bcode);
261 void handle_builtin_strcat (built_in_function bcode);
262 void handle_builtin_strncat (built_in_function);
263 bool handle_builtin_memset (bool *zero_write);
264 bool handle_builtin_memcmp ();
265 bool handle_builtin_string_cmp ();
266 void handle_alloc_call (built_in_function);
267 void maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
268 strinfo *si = NULL, bool plus_one = false,
269 bool rawmem = false);
270 void maybe_warn_overflow (gimple *stmt, bool call_lhs,
271 unsigned HOST_WIDE_INT len,
272 strinfo *si = NULL,
273 bool plus_one = false, bool rawmem = false);
274 void adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat);
275 tree strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
276 tree arg2, int idx2,
277 unsigned HOST_WIDE_INT bound,
278 unsigned HOST_WIDE_INT len[2],
279 unsigned HOST_WIDE_INT *psize);
280 bool count_nonzero_bytes (tree expr_or_type,
281 gimple *stmt,
282 unsigned lenrange[3], bool *nulterm,
283 bool *allnul, bool *allnonnul);
284 bool count_nonzero_bytes (tree exp, tree vuse,
285 gimple *stmt,
286 unsigned HOST_WIDE_INT offset,
287 unsigned HOST_WIDE_INT nbytes,
288 unsigned lenrange[3], bool *nulterm,
289 bool *allnul, bool *allnonnul,
290 ssa_name_limit_t &snlim);
291 bool count_nonzero_bytes_addr (tree exp, tree vuse,
292 gimple *stmt,
293 unsigned HOST_WIDE_INT offset,
294 unsigned HOST_WIDE_INT nbytes,
295 unsigned lenrange[3], bool *nulterm,
296 bool *allnul, bool *allnonnul,
297 ssa_name_limit_t &snlim);
298 bool get_len_or_size (gimple *stmt, tree arg, int idx,
299 unsigned HOST_WIDE_INT lenrng[2],
300 unsigned HOST_WIDE_INT *size, bool *nulterm);
301
302 gimple_ranger m_ranger;
303
304 /* A pointer_query object to store information about pointers and
305 their targets in. */
306 pointer_query ptr_qry;
307
308 gimple_stmt_iterator m_gsi;
309
310 /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
311 execute function. */
312 bool m_cleanup_cfg;
313};
314
315/* Return:
316
317 * +1 if SI is known to start with more than OFF nonzero characters.
318
319 * 0 if SI is known to start with exactly OFF nonzero characters.
320
321 * -1 if SI either does not start with OFF nonzero characters
322 or the relationship between the number of leading nonzero
323 characters in SI and OFF is unknown. */
324
325static int
326compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
327{
328 if (si->nonzero_chars
329 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
330 return compare_tree_int (si->nonzero_chars, off);
331 else
332 return -1;
333}
334
335/* Same as above but suitable also for strings with non-constant lengths.
336 Uses RVALS to determine length range. */
337
338static int
339compare_nonzero_chars (strinfo *si, gimple *stmt,
340 unsigned HOST_WIDE_INT off,
341 range_query *rvals)
342{
343 if (!si->nonzero_chars)
344 return -1;
345
346 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
347 return compare_tree_int (si->nonzero_chars, off);
348
349 if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
350 return -1;
351
352 value_range vr;
353 if (!rvals->range_of_expr (r&: vr, expr: si->nonzero_chars, stmt)
354 || vr.varying_p ()
355 || vr.undefined_p ())
356 return -1;
357
358 /* If the offset is less than the minimum length or if the bounds
359 of the length range are equal return the result of the comparison
360 same as in the constant case. Otherwise return a conservative
361 result. */
362 signop sign = TYPE_SIGN (vr.type ());
363 unsigned prec = TYPE_PRECISION (vr.type ());
364 int cmpmin = wi::cmp (x: vr.lower_bound (), y: wi::uhwi (val: off, precision: prec), sgn: sign);
365 if (cmpmin > 0 || vr.singleton_p ())
366 return cmpmin;
367
368 return -1;
369}
370
371/* Return true if SI is known to be a zero-length string. */
372
373static inline bool
374zero_length_string_p (strinfo *si)
375{
376 return si->full_string_p && integer_zerop (si->nonzero_chars);
377}
378
379/* Return strinfo vector entry IDX. */
380
381static inline strinfo *
382get_strinfo (int idx)
383{
384 if (vec_safe_length (v: stridx_to_strinfo) <= (unsigned int) idx)
385 return NULL;
386 return (*stridx_to_strinfo)[idx];
387}
388
389/* Get the next strinfo in the chain after SI, or null if none. */
390
391static inline strinfo *
392get_next_strinfo (strinfo *si)
393{
394 if (si->next == 0)
395 return NULL;
396 strinfo *nextsi = get_strinfo (idx: si->next);
397 if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
398 return NULL;
399 return nextsi;
400}
401
402/* Helper function for get_stridx. Return the strinfo index of the address
403 of EXP, which is available in PTR if nonnull. If OFFSET_OUT, it is
404 OK to return the index for some X <= &EXP and store &EXP - X in
405 *OFFSET_OUT. When RVALS is nonnull uses it to determine range
406 information. */
407
408static int
409get_addr_stridx (tree exp, gimple *stmt,
410 tree ptr, unsigned HOST_WIDE_INT *offset_out,
411 range_query *rvals = NULL)
412{
413 HOST_WIDE_INT off;
414 struct stridxlist *list, *last = NULL;
415 tree base;
416
417 if (!decl_to_stridxlist_htab)
418 return 0;
419
420 poly_int64 poff;
421 base = get_addr_base_and_unit_offset (exp, &poff);
422 if (base == NULL || !DECL_P (base) || !poff.is_constant (const_value: &off))
423 return 0;
424
425 list = decl_to_stridxlist_htab->get (k: base);
426 if (list == NULL)
427 return 0;
428
429 do
430 {
431 if (list->offset == off)
432 {
433 if (offset_out)
434 *offset_out = 0;
435 return list->idx;
436 }
437 if (list->offset > off)
438 return 0;
439 last = list;
440 list = list->next;
441 }
442 while (list);
443
444 if ((offset_out || ptr) && last && last->idx > 0)
445 {
446 unsigned HOST_WIDE_INT rel_off
447 = (unsigned HOST_WIDE_INT) off - last->offset;
448 strinfo *si = get_strinfo (idx: last->idx);
449 if (si && compare_nonzero_chars (si, stmt, off: rel_off, rvals) >= 0)
450 {
451 if (offset_out)
452 {
453 *offset_out = rel_off;
454 return last->idx;
455 }
456 else
457 return get_stridx_plus_constant (si, rel_off, ptr);
458 }
459 }
460 return 0;
461}
462
463/* Returns string index for EXP. When EXP is an SSA_NAME that refers
464 to a known strinfo with an offset and OFFRNG is non-null, sets
465 both elements of the OFFRNG array to the range of the offset and
466 returns the index of the known strinfo. In this case the result
467 must not be used in for functions that modify the string.
468 When nonnull, uses RVALS to determine range information. */
469
470static int
471get_stridx (tree exp, gimple *stmt,
472 wide_int offrng[2] = NULL, range_query *rvals = NULL)
473{
474 if (offrng)
475 offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
476
477 if (TREE_CODE (exp) == SSA_NAME)
478 {
479 if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
480 return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
481
482 tree e = exp;
483 int last_idx = 0;
484 HOST_WIDE_INT offset = 0;
485 /* Follow a chain of at most 5 assignments. */
486 for (int i = 0; i < 5; i++)
487 {
488 gimple *def_stmt = SSA_NAME_DEF_STMT (e);
489 if (!is_gimple_assign (gs: def_stmt))
490 return last_idx;
491
492 tree_code rhs_code = gimple_assign_rhs_code (gs: def_stmt);
493 tree ptr, off;
494
495 if (rhs_code == ADDR_EXPR)
496 {
497 /* Handle indices/offsets into VLAs which are implemented
498 as pointers to arrays. */
499 ptr = gimple_assign_rhs1 (gs: def_stmt);
500 ptr = TREE_OPERAND (ptr, 0);
501
502 /* Handle also VLAs of types larger than char. */
503 if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
504 {
505 if (TREE_CODE (ptr) == ARRAY_REF)
506 {
507 off = TREE_OPERAND (ptr, 1);
508 ptr = TREE_OPERAND (ptr, 0);
509 if (!integer_onep (eltsize))
510 {
511 /* Scale the array index by the size of the element
512 type in the rare case that it's greater than
513 the typical 1 for char, making sure both operands
514 have the same type. */
515 eltsize = fold_convert (ssizetype, eltsize);
516 off = fold_convert (ssizetype, off);
517 off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
518 }
519 }
520 else
521 off = integer_zero_node;
522 }
523 else
524 return 0;
525
526 if (TREE_CODE (ptr) != MEM_REF)
527 return 0;
528
529 /* Add the MEM_REF byte offset. */
530 tree mem_off = TREE_OPERAND (ptr, 1);
531 off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
532 ptr = TREE_OPERAND (ptr, 0);
533 }
534 else if (rhs_code == POINTER_PLUS_EXPR)
535 {
536 ptr = gimple_assign_rhs1 (gs: def_stmt);
537 off = gimple_assign_rhs2 (gs: def_stmt);
538 }
539 else
540 return 0;
541
542 if (TREE_CODE (ptr) != SSA_NAME)
543 return 0;
544
545 if (!tree_fits_shwi_p (off))
546 {
547 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
548 if (offrng)
549 {
550 /* Only when requested by setting OFFRNG to non-null,
551 return the index corresponding to the SSA_NAME.
552 Do this irrespective of the whether the offset
553 is known. */
554 if (get_range (val: off, stmt: def_stmt, minmax: offrng, rvals))
555 {
556 /* When the offset range is known, increment it
557 it by the constant offset computed in prior
558 iterations and store it in the OFFRNG array. */
559 offrng[0] += offset;
560 offrng[1] += offset;
561 }
562 else
563 {
564 /* When the offset range cannot be determined
565 store [0, SIZE_MAX] and let the caller decide
566 if the offset matters. */
567 offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
568 offrng[0] = wi::zero (precision: offrng[1].get_precision ());
569 }
570 return idx;
571 }
572 return 0;
573 }
574
575 HOST_WIDE_INT this_off = tree_to_shwi (off);
576 if (offrng)
577 {
578 offrng[0] += wi::shwi (val: this_off, precision: offrng->get_precision ());
579 offrng[1] += offrng[0];
580 }
581
582 if (this_off < 0)
583 return last_idx;
584
585 offset = (unsigned HOST_WIDE_INT) offset + this_off;
586 if (offset < 0)
587 return last_idx;
588
589 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
590 {
591 strinfo *si = get_strinfo (idx);
592 if (si)
593 {
594 if (compare_nonzero_chars (si, off: offset) >= 0)
595 return get_stridx_plus_constant (si, offset, exp);
596
597 if (offrng)
598 last_idx = idx;
599 }
600 }
601 e = ptr;
602 }
603
604 return last_idx;
605 }
606
607 if (TREE_CODE (exp) == ADDR_EXPR)
608 {
609 int idx = get_addr_stridx (TREE_OPERAND (exp, 0), stmt, ptr: exp, NULL);
610 if (idx != 0)
611 return idx;
612 }
613
614 const char *p = c_getstr (exp);
615 if (p)
616 return ~(int) strlen (s: p);
617
618 return 0;
619}
620
621/* Return true if strinfo vector is shared with the immediate dominator. */
622
623static inline bool
624strinfo_shared (void)
625{
626 return vec_safe_length (v: stridx_to_strinfo)
627 && (*stridx_to_strinfo)[0] != NULL;
628}
629
630/* Unshare strinfo vector that is shared with the immediate dominator. */
631
632static void
633unshare_strinfo_vec (void)
634{
635 strinfo *si;
636 unsigned int i = 0;
637
638 gcc_assert (strinfo_shared ());
639 stridx_to_strinfo = vec_safe_copy (src: stridx_to_strinfo);
640 for (i = 1; vec_safe_iterate (v: stridx_to_strinfo, ix: i, ptr: &si); ++i)
641 if (si != NULL)
642 si->refcount++;
643 (*stridx_to_strinfo)[0] = NULL;
644}
645
646/* Attempt to create a string index for exp, ADDR_EXPR's operand.
647 Return a pointer to the location where the string index can
648 be stored (if 0) or is stored, or NULL if this can't be tracked. */
649
650static int *
651addr_stridxptr (tree exp)
652{
653 HOST_WIDE_INT off;
654
655 poly_int64 poff;
656 tree base = get_addr_base_and_unit_offset (exp, &poff);
657 if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (const_value: &off))
658 return NULL;
659
660 if (!decl_to_stridxlist_htab)
661 {
662 decl_to_stridxlist_htab
663 = new hash_map<tree_decl_hash, stridxlist> (64);
664 gcc_obstack_init (&stridx_obstack);
665 }
666
667 bool existed;
668 stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (k: base, existed: &existed);
669 if (existed)
670 {
671 int i;
672 stridxlist *before = NULL;
673 for (i = 0; i < 32; i++)
674 {
675 if (list->offset == off)
676 return &list->idx;
677 if (list->offset > off && before == NULL)
678 before = list;
679 if (list->next == NULL)
680 break;
681 list = list->next;
682 }
683 if (i == 32)
684 return NULL;
685 if (before)
686 {
687 list = before;
688 before = XOBNEW (&stridx_obstack, struct stridxlist);
689 *before = *list;
690 list->next = before;
691 list->offset = off;
692 list->idx = 0;
693 return &list->idx;
694 }
695 list->next = XOBNEW (&stridx_obstack, struct stridxlist);
696 list = list->next;
697 }
698
699 list->next = NULL;
700 list->offset = off;
701 list->idx = 0;
702 return &list->idx;
703}
704
705/* Create a new string index, or return 0 if reached limit. */
706
707static int
708new_stridx (tree exp)
709{
710 int idx;
711 if (max_stridx >= param_max_tracked_strlens)
712 return 0;
713 if (TREE_CODE (exp) == SSA_NAME)
714 {
715 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
716 return 0;
717 idx = max_stridx++;
718 ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
719 return idx;
720 }
721 if (TREE_CODE (exp) == ADDR_EXPR)
722 {
723 int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
724 if (pidx != NULL)
725 {
726 gcc_assert (*pidx == 0);
727 *pidx = max_stridx++;
728 return *pidx;
729 }
730 }
731 return 0;
732}
733
734/* Like new_stridx, but for ADDR_EXPR's operand instead. */
735
736static int
737new_addr_stridx (tree exp)
738{
739 int *pidx;
740 if (max_stridx >= param_max_tracked_strlens)
741 return 0;
742 pidx = addr_stridxptr (exp);
743 if (pidx != NULL)
744 {
745 gcc_assert (*pidx == 0);
746 *pidx = max_stridx++;
747 return *pidx;
748 }
749 return 0;
750}
751
752/* Create a new strinfo. */
753
754static strinfo *
755new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
756{
757 strinfo *si = strinfo_pool.allocate ();
758 si->nonzero_chars = nonzero_chars;
759 STRIP_USELESS_TYPE_CONVERSION (ptr);
760 si->ptr = ptr;
761 si->stmt = NULL;
762 si->alloc = NULL;
763 si->endptr = NULL_TREE;
764 si->refcount = 1;
765 si->idx = idx;
766 si->first = 0;
767 si->prev = 0;
768 si->next = 0;
769 si->writable = false;
770 si->dont_invalidate = false;
771 si->full_string_p = full_string_p;
772 return si;
773}
774
775/* Decrease strinfo refcount and free it if not referenced anymore. */
776
777static inline void
778free_strinfo (strinfo *si)
779{
780 if (si && --si->refcount == 0)
781 strinfo_pool.remove (object: si);
782}
783
784/* Set strinfo in the vector entry IDX to SI. */
785
786static inline void
787set_strinfo (int idx, strinfo *si)
788{
789 if (vec_safe_length (v: stridx_to_strinfo) && (*stridx_to_strinfo)[0])
790 unshare_strinfo_vec ();
791 if (vec_safe_length (v: stridx_to_strinfo) <= (unsigned int) idx)
792 vec_safe_grow_cleared (v&: stridx_to_strinfo, len: idx + 1, exact: true);
793 (*stridx_to_strinfo)[idx] = si;
794}
795
796/* Return the first strinfo in the related strinfo chain
797 if all strinfos in between belong to the chain, otherwise NULL. */
798
799static strinfo *
800verify_related_strinfos (strinfo *origsi)
801{
802 strinfo *si = origsi, *psi;
803
804 if (origsi->first == 0)
805 return NULL;
806 for (; si->prev; si = psi)
807 {
808 if (si->first != origsi->first)
809 return NULL;
810 psi = get_strinfo (idx: si->prev);
811 if (psi == NULL)
812 return NULL;
813 if (psi->next != si->idx)
814 return NULL;
815 }
816 if (si->idx != si->first)
817 return NULL;
818 return si;
819}
820
821/* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
822 Use LOC for folding. */
823
824static void
825set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
826{
827 si->endptr = endptr;
828 si->stmt = NULL;
829 tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
830 tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
831 si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
832 end_as_size, start_as_size);
833 si->full_string_p = true;
834}
835
836/* Return the string length, or NULL if it can't be computed.
837 The length may but need not be constant. Instead, it might be
838 the result of a strlen() call. */
839
840static tree
841get_string_length (strinfo *si)
842{
843 /* If the length has already been computed return it if it's exact
844 (i.e., the string is nul-terminated at NONZERO_CHARS), or return
845 null if it isn't. */
846 if (si->nonzero_chars)
847 return si->full_string_p ? si->nonzero_chars : NULL;
848
849 /* If the string is the result of one of the built-in calls below
850 attempt to compute the length from the call statement. */
851 if (si->stmt)
852 {
853 gimple *stmt = si->stmt, *lenstmt;
854 tree callee, lhs, fn, tem;
855 location_t loc;
856 gimple_stmt_iterator gsi;
857
858 gcc_assert (is_gimple_call (stmt));
859 callee = gimple_call_fndecl (gs: stmt);
860 gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
861 lhs = gimple_call_lhs (gs: stmt);
862 /* unshare_strinfo is intentionally not called here. The (delayed)
863 transformation of strcpy or strcat into stpcpy is done at the place
864 of the former strcpy/strcat call and so can affect all the strinfos
865 with the same stmt. If they were unshared before and transformation
866 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
867 just compute the right length. */
868 switch (DECL_FUNCTION_CODE (decl: callee))
869 {
870 case BUILT_IN_STRCAT:
871 case BUILT_IN_STRCAT_CHK:
872 gsi = gsi_for_stmt (stmt);
873 fn = builtin_decl_implicit (fncode: BUILT_IN_STRLEN);
874 gcc_assert (lhs == NULL_TREE);
875 tem = unshare_expr (gimple_call_arg (gs: stmt, index: 0));
876 lenstmt = gimple_build_call (fn, 1, tem);
877 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt: lenstmt);
878 gimple_call_set_lhs (gs: lenstmt, lhs);
879 gimple_set_vuse (g: lenstmt, vuse: gimple_vuse (g: stmt));
880 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
881 tem = gimple_call_arg (gs: stmt, index: 0);
882 if (!ptrofftype_p (TREE_TYPE (lhs)))
883 {
884 lhs = convert_to_ptrofftype (lhs);
885 lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
886 true, GSI_SAME_STMT);
887 }
888 lenstmt = gimple_build_assign
889 (make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
890 POINTER_PLUS_EXPR,tem, lhs);
891 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
892 gimple_call_set_arg (gs: stmt, index: 0, arg: gimple_assign_lhs (gs: lenstmt));
893 lhs = NULL_TREE;
894 /* FALLTHRU */
895 case BUILT_IN_STRCPY:
896 case BUILT_IN_STRCPY_CHK:
897 gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
898 if (gimple_call_num_args (gs: stmt) == 2)
899 fn = builtin_decl_implicit (fncode: BUILT_IN_STPCPY);
900 else
901 fn = builtin_decl_explicit (fncode: BUILT_IN_STPCPY_CHK);
902 gcc_assert (lhs == NULL_TREE);
903 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
904 {
905 fprintf (stream: dump_file, format: "Optimizing: ");
906 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
907 }
908 gimple_call_set_fndecl (gs: stmt, decl: fn);
909 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
910 gimple_call_set_lhs (gs: stmt, lhs);
911 update_stmt (s: stmt);
912 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
913 {
914 fprintf (stream: dump_file, format: "into: ");
915 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
916 }
917 /* FALLTHRU */
918 case BUILT_IN_STPCPY:
919 case BUILT_IN_STPCPY_CHK:
920 gcc_assert (lhs != NULL_TREE);
921 loc = gimple_location (g: stmt);
922 set_endptr_and_length (loc, si, endptr: lhs);
923 for (strinfo *chainsi = verify_related_strinfos (origsi: si);
924 chainsi != NULL;
925 chainsi = get_next_strinfo (si: chainsi))
926 if (chainsi->nonzero_chars == NULL)
927 set_endptr_and_length (loc, si: chainsi, endptr: lhs);
928 break;
929 case BUILT_IN_ALLOCA:
930 case BUILT_IN_ALLOCA_WITH_ALIGN:
931 case BUILT_IN_MALLOC:
932 break;
933 /* BUILT_IN_CALLOC always has si->nonzero_chars set. */
934 default:
935 gcc_unreachable ();
936 break;
937 }
938 }
939
940 return si->nonzero_chars;
941}
942
943/* Dump strlen data to FP for statement STMT. When non-null, RVALS
944 points to the valuation engine used to calculate ranges, and is
945 used to dump strlen range for non-constant results. */
946
947DEBUG_FUNCTION void
948dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals)
949{
950 if (stmt)
951 {
952 fprintf (stream: fp, format: "\nDumping strlen pass data after ");
953 print_gimple_expr (fp, stmt, TDF_LINENO);
954 fputc (c: '\n', stream: fp);
955 }
956 else
957 fprintf (stream: fp, format: "\nDumping strlen pass data\n");
958
959 fprintf (stream: fp, format: "max_stridx = %i\n", max_stridx);
960 fprintf (stream: fp, format: "ssa_ver_to_stridx has %u elements\n",
961 ssa_ver_to_stridx.length ());
962 fprintf (stream: fp, format: "stridx_to_strinfo");
963 if (stridx_to_strinfo)
964 {
965 fprintf (stream: fp, format: " has %u elements\n", stridx_to_strinfo->length ());
966 for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
967 {
968 if (strinfo *si = (*stridx_to_strinfo)[i])
969 {
970 if (!si->idx)
971 continue;
972 fprintf (stream: fp, format: " idx = %i", si->idx);
973 if (si->ptr)
974 {
975 fprintf (stream: fp, format: ", ptr = ");
976 print_generic_expr (fp, si->ptr);
977 }
978
979 if (si->nonzero_chars)
980 {
981 fprintf (stream: fp, format: ", nonzero_chars = ");
982 print_generic_expr (fp, si->nonzero_chars);
983 if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
984 {
985 value_range vr;
986 if (rvals)
987 rvals->range_of_expr (r&: vr, expr: si->nonzero_chars,
988 si->stmt);
989 else
990 get_range_query (cfun)->range_of_expr (r&: vr,
991 expr: si->nonzero_chars);
992 vr.dump (fp);
993 }
994 }
995
996 fprintf (stream: fp, format: ", refcount = %i", si->refcount);
997 if (si->stmt)
998 {
999 fprintf (stream: fp, format: ", stmt = ");
1000 print_gimple_expr (fp, si->stmt, 0);
1001 }
1002 if (si->alloc)
1003 {
1004 fprintf (stream: fp, format: ", alloc = ");
1005 print_gimple_expr (fp, si->alloc, 0);
1006 }
1007 if (si->writable)
1008 fprintf (stream: fp, format: ", writable");
1009 if (si->dont_invalidate)
1010 fprintf (stream: fp, format: ", dont_invalidate");
1011 if (si->full_string_p)
1012 fprintf (stream: fp, format: ", full_string_p");
1013 if (strinfo *next = get_next_strinfo (si))
1014 {
1015 fprintf (stream: fp, format: ", {");
1016 do
1017 fprintf (stream: fp, format: "%i%s", next->idx, next->first ? ", " : "");
1018 while ((next = get_next_strinfo (si: next)));
1019 fprintf (stream: fp, format: "}");
1020 }
1021 fputs (s: "\n", stream: fp);
1022 }
1023 }
1024 }
1025 else
1026 fprintf (stream: fp, format: " = null\n");
1027
1028 fprintf (stream: fp, format: "decl_to_stridxlist_htab");
1029 if (decl_to_stridxlist_htab)
1030 {
1031 fputs (s: "\n", stream: fp);
1032 typedef decl_to_stridxlist_htab_t::iterator iter_t;
1033 for (iter_t it = decl_to_stridxlist_htab->begin ();
1034 it != decl_to_stridxlist_htab->end (); ++it)
1035 {
1036 tree decl = (*it).first;
1037 stridxlist *list = &(*it).second;
1038 fprintf (stream: fp, format: " decl = ");
1039 print_generic_expr (fp, decl);
1040 if (list)
1041 {
1042 fprintf (stream: fp, format: ", offsets = {");
1043 for (; list; list = list->next)
1044 fprintf (stream: fp, format: "%lli%s", (long long) list->offset,
1045 list->next ? ", " : "");
1046 fputs (s: "}", stream: fp);
1047 }
1048 fputs (s: "\n", stream: fp);
1049 }
1050 }
1051 else
1052 fprintf (stream: fp, format: " = null\n");
1053
1054 if (laststmt.stmt)
1055 {
1056 fprintf (stream: fp, format: "laststmt = ");
1057 print_gimple_expr (fp, laststmt.stmt, 0);
1058 fprintf (stream: fp, format: ", len = ");
1059 print_generic_expr (fp, laststmt.len);
1060 fprintf (stream: fp, format: ", stridx = %i\n", laststmt.stridx);
1061 }
1062}
1063
1064/* Helper of get_range_strlen_dynamic(). See below. */
1065
1066static bool
1067get_range_strlen_phi (tree src, gphi *phi,
1068 c_strlen_data *pdata, bitmap visited,
1069 pointer_query *ptr_qry, unsigned *pssa_def_max)
1070{
1071 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (src)))
1072 return true;
1073
1074 if (*pssa_def_max == 0)
1075 return false;
1076
1077 --*pssa_def_max;
1078
1079 /* Iterate over the PHI arguments and determine the minimum and maximum
1080 length/size of each and incorporate them into the overall result. */
1081 for (unsigned i = 0; i != gimple_phi_num_args (gs: phi); ++i)
1082 {
1083 tree arg = gimple_phi_arg_def (gs: phi, index: i);
1084 if (arg == gimple_phi_result (gs: phi))
1085 continue;
1086
1087 c_strlen_data argdata = { };
1088 if (!get_range_strlen_dynamic (arg, phi, &argdata, visited, ptr_qry,
1089 pssa_def_max))
1090 {
1091 pdata->maxlen = build_all_ones_cst (size_type_node);
1092 continue;
1093 }
1094
1095 /* Set the DECL of an unterminated array this argument refers to
1096 if one hasn't been found yet. */
1097 if (!pdata->decl && argdata.decl)
1098 pdata->decl = argdata.decl;
1099
1100 if (!argdata.minlen
1101 || (integer_zerop (argdata.minlen)
1102 && (!argdata.maxbound
1103 || integer_all_onesp (argdata.maxbound))
1104 && integer_all_onesp (argdata.maxlen)))
1105 {
1106 /* Set the upper bound of the length to unbounded. */
1107 pdata->maxlen = build_all_ones_cst (size_type_node);
1108 continue;
1109 }
1110
1111 /* Adjust the minimum and maximum length determined so far and
1112 the upper bound on the array size. */
1113 if (TREE_CODE (argdata.minlen) == INTEGER_CST
1114 && (!pdata->minlen
1115 || tree_int_cst_lt (t1: argdata.minlen, t2: pdata->minlen)))
1116 pdata->minlen = argdata.minlen;
1117
1118 if (TREE_CODE (argdata.maxlen) == INTEGER_CST
1119 && (!pdata->maxlen
1120 || (argdata.maxlen
1121 && tree_int_cst_lt (t1: pdata->maxlen, t2: argdata.maxlen))))
1122 pdata->maxlen = argdata.maxlen;
1123
1124 if (!pdata->maxbound
1125 || TREE_CODE (pdata->maxbound) != INTEGER_CST
1126 || (argdata.maxbound
1127 && tree_int_cst_lt (t1: pdata->maxbound, t2: argdata.maxbound)
1128 && !integer_all_onesp (argdata.maxbound)))
1129 pdata->maxbound = argdata.maxbound;
1130 }
1131
1132 return true;
1133}
1134
1135/* Return the maximum possible length of the string PTR that's less
1136 than MAXLEN given the size of the object of subobject it points
1137 to at the given STMT. MAXLEN is the maximum length of the string
1138 determined so far. Return null when no such maximum can be
1139 determined. */
1140
1141static tree
1142get_maxbound (tree ptr, gimple *stmt, offset_int maxlen,
1143 pointer_query *ptr_qry)
1144{
1145 access_ref aref;
1146 if (!ptr_qry->get_ref (ptr, stmt, &aref))
1147 return NULL_TREE;
1148
1149 offset_int sizrem = aref.size_remaining ();
1150 if (sizrem <= 0)
1151 return NULL_TREE;
1152
1153 if (sizrem < maxlen)
1154 maxlen = sizrem - 1;
1155
1156 /* Try to determine the maximum from the subobject at the offset.
1157 This handles MEM [&some-struct, member-offset] that's often
1158 the result of folding COMPONENT_REF [some-struct, member]. */
1159 tree reftype = TREE_TYPE (aref.ref);
1160 if (!RECORD_OR_UNION_TYPE_P (reftype)
1161 || aref.offrng[0] != aref.offrng[1]
1162 || !wi::fits_shwi_p (x: aref.offrng[0]))
1163 return wide_int_to_tree (size_type_node, cst: maxlen);
1164
1165 HOST_WIDE_INT off = aref.offrng[0].to_shwi ();
1166 tree fld = field_at_offset (reftype, NULL_TREE, off);
1167 if (!fld || !DECL_SIZE_UNIT (fld))
1168 return wide_int_to_tree (size_type_node, cst: maxlen);
1169
1170 offset_int size = wi::to_offset (DECL_SIZE_UNIT (fld));
1171 if (maxlen < size)
1172 return wide_int_to_tree (size_type_node, cst: maxlen);
1173
1174 return wide_int_to_tree (size_type_node, cst: size - 1);
1175}
1176
1177/* Attempt to determine the length of the string SRC. On success, store
1178 the length in *PDATA and return true. Otherwise, return false.
1179 VISITED is a bitmap of visited PHI nodes. RVALS points to the valuation
1180 engine used to calculate ranges. PSSA_DEF_MAX to an SSA_NAME
1181 assignment limit used to prevent runaway recursion. */
1182
1183static bool
1184get_range_strlen_dynamic (tree src, gimple *stmt,
1185 c_strlen_data *pdata, bitmap visited,
1186 pointer_query *ptr_qry, unsigned *pssa_def_max)
1187{
1188 int idx = get_stridx (exp: src, stmt);
1189 if (!idx)
1190 {
1191 if (TREE_CODE (src) == SSA_NAME)
1192 {
1193 gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1194 if (gphi *phi = dyn_cast<gphi *>(p: def_stmt))
1195 return get_range_strlen_phi (src, phi, pdata, visited, ptr_qry,
1196 pssa_def_max);
1197 }
1198
1199 /* Return success regardless of the result and handle *PDATA
1200 in the caller. */
1201 get_range_strlen (src, pdata, eltsize: 1);
1202 return true;
1203 }
1204
1205 if (idx < 0)
1206 {
1207 /* SRC is a string of constant length. */
1208 pdata->minlen = build_int_cst (size_type_node, ~idx);
1209 pdata->maxlen = pdata->minlen;
1210 pdata->maxbound = pdata->maxlen;
1211 return true;
1212 }
1213
1214 if (strinfo *si = get_strinfo (idx))
1215 {
1216 pdata->minlen = get_string_length (si);
1217 if (!pdata->minlen && si->nonzero_chars)
1218 {
1219 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1220 pdata->minlen = si->nonzero_chars;
1221 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1222 {
1223 value_range vr;
1224 ptr_qry->rvals->range_of_expr (r&: vr, expr: si->nonzero_chars, si->stmt);
1225 if (vr.undefined_p () || vr.varying_p ())
1226 pdata->minlen = build_zero_cst (size_type_node);
1227 else
1228 {
1229 tree type = vr.type ();
1230 pdata->minlen = wide_int_to_tree (type, cst: vr.lower_bound ());
1231 pdata->maxlen = wide_int_to_tree (type, cst: vr.upper_bound ());
1232 }
1233 }
1234 else
1235 pdata->minlen = build_zero_cst (size_type_node);
1236
1237 tree base = si->ptr;
1238 if (TREE_CODE (base) == ADDR_EXPR)
1239 base = TREE_OPERAND (base, 0);
1240
1241 HOST_WIDE_INT off;
1242 poly_int64 poff;
1243 base = get_addr_base_and_unit_offset (base, &poff);
1244 if (base
1245 && DECL_P (base)
1246 && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1247 && TYPE_SIZE_UNIT (TREE_TYPE (base))
1248 && poff.is_constant (const_value: &off))
1249 {
1250 tree basetype = TREE_TYPE (base);
1251 tree size = TYPE_SIZE_UNIT (basetype);
1252 if (TREE_CODE (size) == INTEGER_CST)
1253 {
1254 ++off; /* Increment for the terminating nul. */
1255 tree toffset = build_int_cst (size_type_node, off);
1256 pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node, size,
1257 toffset);
1258 pdata->maxbound = pdata->maxlen;
1259 }
1260 else
1261 pdata->maxlen = build_all_ones_cst (size_type_node);
1262 }
1263 else
1264 pdata->maxlen = build_all_ones_cst (size_type_node);
1265 }
1266 else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1267 {
1268 value_range vr;
1269 ptr_qry->rvals->range_of_expr (r&: vr, expr: si->nonzero_chars, stmt);
1270 if (vr.varying_p () || vr.undefined_p ())
1271 {
1272 pdata->minlen = build_zero_cst (size_type_node);
1273 pdata->maxlen = build_all_ones_cst (size_type_node);
1274 }
1275 else
1276 {
1277 tree type = vr.type ();
1278 pdata->minlen = wide_int_to_tree (type, cst: vr.lower_bound ());
1279 pdata->maxlen = wide_int_to_tree (type, cst: vr.upper_bound ());
1280 offset_int max = offset_int::from (x: vr.upper_bound (pair: 0), sgn: SIGNED);
1281 if (tree maxbound = get_maxbound (ptr: si->ptr, stmt, maxlen: max, ptr_qry))
1282 pdata->maxbound = maxbound;
1283 else
1284 pdata->maxbound = pdata->maxlen;
1285 }
1286 }
1287 else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1288 {
1289 pdata->maxlen = pdata->minlen;
1290 pdata->maxbound = pdata->minlen;
1291 }
1292 else
1293 {
1294 /* For PDATA->MINLEN that's a non-constant expression such
1295 as PLUS_EXPR whose value range is unknown, set the bounds
1296 to zero and SIZE_MAX. */
1297 pdata->minlen = build_zero_cst (size_type_node);
1298 pdata->maxlen = build_all_ones_cst (size_type_node);
1299 }
1300
1301 return true;
1302 }
1303
1304 return false;
1305}
1306
1307/* Analogous to get_range_strlen but for dynamically created strings,
1308 i.e., those created by calls to strcpy as opposed to just string
1309 constants.
1310 Try to obtain the range of the lengths of the string(s) referenced
1311 by SRC, or the size of the largest array SRC refers to if the range
1312 of lengths cannot be determined, and store all in *PDATA. RVALS
1313 points to the valuation engine used to calculate ranges. */
1314
1315void
1316get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata,
1317 pointer_query &ptr_qry)
1318{
1319 auto_bitmap visited;
1320 tree maxbound = pdata->maxbound;
1321
1322 unsigned limit = param_ssa_name_def_chain_limit;
1323 if (!get_range_strlen_dynamic (src, stmt, pdata, visited, ptr_qry: &ptr_qry, pssa_def_max: &limit))
1324 {
1325 /* On failure extend the length range to an impossible maximum
1326 (a valid MAXLEN must be less than PTRDIFF_MAX - 1). Other
1327 members can stay unchanged regardless. */
1328 pdata->minlen = ssize_int (0);
1329 pdata->maxlen = build_all_ones_cst (size_type_node);
1330 }
1331 else if (!pdata->minlen)
1332 pdata->minlen = ssize_int (0);
1333
1334 /* If it's unchanged from it initial non-null value, set the conservative
1335 MAXBOUND to SIZE_MAX. Otherwise leave it null (if it is null). */
1336 if (maxbound && pdata->maxbound == maxbound)
1337 pdata->maxbound = build_all_ones_cst (size_type_node);
1338}
1339
1340/* Invalidate string length information for strings whose length might
1341 change due to stores in STMT, except those marked DONT_INVALIDATE.
1342 For string-modifying statements, ZERO_WRITE is set when the statement
1343 wrote only zeros.
1344 Returns true if any STRIDX_TO_STRINFO entries were considered
1345 for invalidation. */
1346
1347static bool
1348maybe_invalidate (gimple *stmt, bool zero_write = false)
1349{
1350 if (dump_file && (dump_flags & TDF_DETAILS))
1351 {
1352 fprintf (stream: dump_file, format: "%s called for ", __func__);
1353 print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1354 }
1355
1356 strinfo *si;
1357 bool nonempty = false;
1358
1359 for (unsigned i = 1; vec_safe_iterate (v: stridx_to_strinfo, ix: i, ptr: &si); ++i)
1360 {
1361 if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1362 continue;
1363
1364 nonempty = true;
1365
1366 /* Unconditionally reset DONT_INVALIDATE. */
1367 bool dont_invalidate = si->dont_invalidate;
1368 si->dont_invalidate = false;
1369
1370 if (dont_invalidate)
1371 continue;
1372
1373 ao_ref r;
1374 tree size = si->nonzero_chars;
1375 ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1376 /* Include the terminating nul in the size of the string
1377 to consider when determining possible clobber. But do not
1378 add it to 'size' since we don't know whether it would
1379 actually fit the allocated area. */
1380 if (known_size_p (a: r.size))
1381 {
1382 if (known_le (r.size, HOST_WIDE_INT_MAX - BITS_PER_UNIT))
1383 r.max_size += BITS_PER_UNIT;
1384 else
1385 r.max_size = -1;
1386 }
1387 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1388 {
1389 if (dump_file && (dump_flags & TDF_DETAILS))
1390 {
1391 fputs (s: " statement may clobber object ", stream: dump_file);
1392 print_generic_expr (dump_file, si->ptr);
1393 if (size && tree_fits_uhwi_p (size))
1394 fprintf (stream: dump_file, format: " " HOST_WIDE_INT_PRINT_UNSIGNED
1395 " bytes in size", tree_to_uhwi (size));
1396 fputc (c: '\n', stream: dump_file);
1397 }
1398
1399 set_strinfo (idx: i, NULL);
1400 free_strinfo (si);
1401 continue;
1402 }
1403
1404 if (size
1405 && !zero_write
1406 && si->stmt
1407 && is_gimple_call (gs: si->stmt)
1408 && (DECL_FUNCTION_CODE (decl: gimple_call_fndecl (gs: si->stmt))
1409 == BUILT_IN_CALLOC))
1410 {
1411 /* If the clobber test above considered the length of
1412 the string (including the nul), then for (potentially)
1413 non-zero writes that might modify storage allocated by
1414 calloc consider the whole object and if it might be
1415 clobbered by the statement reset the statement. */
1416 ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1417 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1418 si->stmt = NULL;
1419 }
1420 }
1421
1422 if (dump_file && (dump_flags & TDF_DETAILS))
1423 fprintf (stream: dump_file, format: "%s returns %i\n", __func__, nonempty);
1424
1425 return nonempty;
1426}
1427
1428/* Unshare strinfo record SI, if it has refcount > 1 or
1429 if stridx_to_strinfo vector is shared with some other
1430 bbs. */
1431
1432static strinfo *
1433unshare_strinfo (strinfo *si)
1434{
1435 strinfo *nsi;
1436
1437 if (si->refcount == 1 && !strinfo_shared ())
1438 return si;
1439
1440 nsi = new_strinfo (ptr: si->ptr, idx: si->idx, nonzero_chars: si->nonzero_chars, full_string_p: si->full_string_p);
1441 nsi->stmt = si->stmt;
1442 nsi->alloc = si->alloc;
1443 nsi->endptr = si->endptr;
1444 nsi->first = si->first;
1445 nsi->prev = si->prev;
1446 nsi->next = si->next;
1447 nsi->writable = si->writable;
1448 set_strinfo (idx: si->idx, si: nsi);
1449 free_strinfo (si);
1450 return nsi;
1451}
1452
1453/* Attempt to create a new strinfo for BASESI + OFF, or find existing
1454 strinfo if there is any. Return it's idx, or 0 if no strinfo has
1455 been created. */
1456
1457static int
1458get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1459 tree ptr)
1460{
1461 if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1462 return 0;
1463
1464 if (compare_nonzero_chars (si: basesi, off) < 0
1465 || !tree_fits_uhwi_p (basesi->nonzero_chars))
1466 return 0;
1467
1468 unsigned HOST_WIDE_INT nonzero_chars
1469 = tree_to_uhwi (basesi->nonzero_chars) - off;
1470 strinfo *si = basesi, *chainsi;
1471 if (si->first || si->prev || si->next)
1472 si = verify_related_strinfos (origsi: basesi);
1473 if (si == NULL
1474 || si->nonzero_chars == NULL_TREE
1475 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1476 return 0;
1477
1478 if (TREE_CODE (ptr) == SSA_NAME
1479 && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1480 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, exact: true);
1481
1482 gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1483 for (chainsi = si; chainsi->next; chainsi = si)
1484 {
1485 si = get_next_strinfo (si: chainsi);
1486 if (si == NULL
1487 || si->nonzero_chars == NULL_TREE
1488 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1489 break;
1490 int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1491 if (r != 1)
1492 {
1493 if (r == 0)
1494 {
1495 if (TREE_CODE (ptr) == SSA_NAME)
1496 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1497 else
1498 {
1499 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1500 if (pidx != NULL && *pidx == 0)
1501 *pidx = si->idx;
1502 }
1503 return si->idx;
1504 }
1505 break;
1506 }
1507 }
1508
1509 int idx = new_stridx (exp: ptr);
1510 if (idx == 0)
1511 return 0;
1512 si = new_strinfo (ptr, idx, nonzero_chars: build_int_cst (size_type_node, nonzero_chars),
1513 full_string_p: basesi->full_string_p);
1514 set_strinfo (idx, si);
1515 if (strinfo *nextsi = get_strinfo (idx: chainsi->next))
1516 {
1517 nextsi = unshare_strinfo (si: nextsi);
1518 si->next = nextsi->idx;
1519 nextsi->prev = idx;
1520 }
1521 chainsi = unshare_strinfo (si: chainsi);
1522 if (chainsi->first == 0)
1523 chainsi->first = chainsi->idx;
1524 chainsi->next = idx;
1525 if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1526 chainsi->endptr = ptr;
1527 si->endptr = chainsi->endptr;
1528 si->prev = chainsi->idx;
1529 si->first = chainsi->first;
1530 si->writable = chainsi->writable;
1531 return si->idx;
1532}
1533
1534/* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1535 to a zero-length string and if possible chain it to a related strinfo
1536 chain whose part is or might be CHAINSI. */
1537
1538static strinfo *
1539zero_length_string (tree ptr, strinfo *chainsi)
1540{
1541 strinfo *si;
1542 int idx;
1543 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1544 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, exact: true);
1545 gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1546 && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1547
1548 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1549 return NULL;
1550 if (chainsi != NULL)
1551 {
1552 si = verify_related_strinfos (origsi: chainsi);
1553 if (si)
1554 {
1555 do
1556 {
1557 /* We shouldn't mix delayed and non-delayed lengths. */
1558 gcc_assert (si->full_string_p);
1559 if (si->endptr == NULL_TREE)
1560 {
1561 si = unshare_strinfo (si);
1562 si->endptr = ptr;
1563 }
1564 chainsi = si;
1565 si = get_next_strinfo (si);
1566 }
1567 while (si != NULL);
1568 if (zero_length_string_p (si: chainsi))
1569 {
1570 if (chainsi->next)
1571 {
1572 chainsi = unshare_strinfo (si: chainsi);
1573 chainsi->next = 0;
1574 }
1575 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1576 return chainsi;
1577 }
1578 }
1579 else
1580 {
1581 /* We shouldn't mix delayed and non-delayed lengths. */
1582 gcc_assert (chainsi->full_string_p);
1583 if (chainsi->first || chainsi->prev || chainsi->next)
1584 {
1585 chainsi = unshare_strinfo (si: chainsi);
1586 chainsi->first = 0;
1587 chainsi->prev = 0;
1588 chainsi->next = 0;
1589 }
1590 }
1591 }
1592 idx = new_stridx (exp: ptr);
1593 if (idx == 0)
1594 return NULL;
1595 si = new_strinfo (ptr, idx, nonzero_chars: build_int_cst (size_type_node, 0), full_string_p: true);
1596 set_strinfo (idx, si);
1597 si->endptr = ptr;
1598 if (chainsi != NULL)
1599 {
1600 chainsi = unshare_strinfo (si: chainsi);
1601 if (chainsi->first == 0)
1602 chainsi->first = chainsi->idx;
1603 chainsi->next = idx;
1604 if (chainsi->endptr == NULL_TREE)
1605 chainsi->endptr = ptr;
1606 si->prev = chainsi->idx;
1607 si->first = chainsi->first;
1608 si->writable = chainsi->writable;
1609 }
1610 return si;
1611}
1612
1613/* For strinfo ORIGSI whose length has been just updated, adjust other
1614 related strinfos so that they match the new ORIGSI. This involves:
1615
1616 - adding ADJ to the nonzero_chars fields
1617 - copying full_string_p from the new ORIGSI. */
1618
1619static void
1620adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1621{
1622 strinfo *si = verify_related_strinfos (origsi);
1623
1624 if (si == NULL)
1625 return;
1626
1627 while (1)
1628 {
1629 strinfo *nsi;
1630
1631 if (si != origsi)
1632 {
1633 tree tem;
1634
1635 si = unshare_strinfo (si);
1636 /* We shouldn't see delayed lengths here; the caller must
1637 have calculated the old length in order to calculate
1638 the adjustment. */
1639 gcc_assert (si->nonzero_chars);
1640 tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1641 si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1642 TREE_TYPE (si->nonzero_chars),
1643 si->nonzero_chars, tem);
1644 si->full_string_p = origsi->full_string_p;
1645
1646 si->endptr = NULL_TREE;
1647 si->dont_invalidate = true;
1648 }
1649 nsi = get_next_strinfo (si);
1650 if (nsi == NULL)
1651 return;
1652 si = nsi;
1653 }
1654}
1655
1656/* Find if there are other SSA_NAME pointers equal to PTR
1657 for which we don't track their string lengths yet. If so, use
1658 IDX for them. */
1659
1660static void
1661find_equal_ptrs (tree ptr, int idx)
1662{
1663 if (TREE_CODE (ptr) != SSA_NAME)
1664 return;
1665 while (1)
1666 {
1667 gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1668 if (!is_gimple_assign (gs: stmt))
1669 return;
1670 ptr = gimple_assign_rhs1 (gs: stmt);
1671 switch (gimple_assign_rhs_code (gs: stmt))
1672 {
1673 case SSA_NAME:
1674 break;
1675 CASE_CONVERT:
1676 if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1677 return;
1678 if (TREE_CODE (ptr) == SSA_NAME)
1679 break;
1680 if (TREE_CODE (ptr) != ADDR_EXPR)
1681 return;
1682 /* FALLTHRU */
1683 case ADDR_EXPR:
1684 {
1685 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1686 if (pidx != NULL && *pidx == 0)
1687 *pidx = idx;
1688 return;
1689 }
1690 default:
1691 return;
1692 }
1693
1694 /* We might find an endptr created in this pass. Grow the
1695 vector in that case. */
1696 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1697 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, exact: true);
1698
1699 if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1700 return;
1701 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1702 }
1703}
1704
1705/* Return true if STMT is a call to a builtin function with the right
1706 arguments and attributes that should be considered for optimization
1707 by this pass. */
1708
1709static bool
1710valid_builtin_call (gimple *stmt)
1711{
1712 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1713 return false;
1714
1715 tree callee = gimple_call_fndecl (gs: stmt);
1716 switch (DECL_FUNCTION_CODE (decl: callee))
1717 {
1718 case BUILT_IN_MEMCMP:
1719 case BUILT_IN_MEMCMP_EQ:
1720 case BUILT_IN_STRCMP:
1721 case BUILT_IN_STRNCMP:
1722 case BUILT_IN_STRCHR:
1723 case BUILT_IN_STRLEN:
1724 case BUILT_IN_STRNLEN:
1725 /* The above functions should be pure. Punt if they aren't. */
1726 if (gimple_vdef (g: stmt) || gimple_vuse (g: stmt) == NULL_TREE)
1727 return false;
1728 break;
1729
1730 case BUILT_IN_ALLOCA:
1731 case BUILT_IN_ALLOCA_WITH_ALIGN:
1732 case BUILT_IN_CALLOC:
1733 case BUILT_IN_MALLOC:
1734 case BUILT_IN_MEMCPY:
1735 case BUILT_IN_MEMCPY_CHK:
1736 case BUILT_IN_MEMPCPY:
1737 case BUILT_IN_MEMPCPY_CHK:
1738 case BUILT_IN_MEMSET:
1739 case BUILT_IN_STPCPY:
1740 case BUILT_IN_STPCPY_CHK:
1741 case BUILT_IN_STPNCPY:
1742 case BUILT_IN_STPNCPY_CHK:
1743 case BUILT_IN_STRCAT:
1744 case BUILT_IN_STRCAT_CHK:
1745 case BUILT_IN_STRCPY:
1746 case BUILT_IN_STRCPY_CHK:
1747 case BUILT_IN_STRNCAT:
1748 case BUILT_IN_STRNCAT_CHK:
1749 case BUILT_IN_STRNCPY:
1750 case BUILT_IN_STRNCPY_CHK:
1751 /* The above functions should be neither const nor pure. Punt if they
1752 aren't. */
1753 if (gimple_vdef (g: stmt) == NULL_TREE || gimple_vuse (g: stmt) == NULL_TREE)
1754 return false;
1755 break;
1756
1757 default:
1758 break;
1759 }
1760
1761 return true;
1762}
1763
1764/* If the last .MEM setter statement before STMT is
1765 memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1766 and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1767 just memcpy (x, y, strlen (y)). SI must be the zero length
1768 strinfo. */
1769
1770void
1771strlen_pass::adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
1772{
1773 tree vuse, callee, len;
1774 struct laststmt_struct last = laststmt;
1775 strinfo *lastsi, *firstsi;
1776 unsigned len_arg_no = 2;
1777
1778 laststmt.stmt = NULL;
1779 laststmt.len = NULL_TREE;
1780 laststmt.stridx = 0;
1781
1782 if (last.stmt == NULL)
1783 return;
1784
1785 vuse = gimple_vuse (g: stmt);
1786 if (vuse == NULL_TREE
1787 || SSA_NAME_DEF_STMT (vuse) != last.stmt
1788 || !has_single_use (var: vuse))
1789 return;
1790
1791 gcc_assert (last.stridx > 0);
1792 lastsi = get_strinfo (idx: last.stridx);
1793 if (lastsi == NULL)
1794 return;
1795
1796 if (lastsi != si)
1797 {
1798 if (lastsi->first == 0 || lastsi->first != si->first)
1799 return;
1800
1801 firstsi = verify_related_strinfos (origsi: si);
1802 if (firstsi == NULL)
1803 return;
1804 while (firstsi != lastsi)
1805 {
1806 firstsi = get_next_strinfo (si: firstsi);
1807 if (firstsi == NULL)
1808 return;
1809 }
1810 }
1811
1812 if (!is_strcat && !zero_length_string_p (si))
1813 return;
1814
1815 if (is_gimple_assign (gs: last.stmt))
1816 {
1817 gimple_stmt_iterator gsi;
1818
1819 if (!integer_zerop (gimple_assign_rhs1 (gs: last.stmt)))
1820 return;
1821 if (stmt_could_throw_p (cfun, last.stmt))
1822 return;
1823 gsi = gsi_for_stmt (last.stmt);
1824 unlink_stmt_vdef (last.stmt);
1825 release_defs (last.stmt);
1826 gsi_remove (&gsi, true);
1827 return;
1828 }
1829
1830 if (!valid_builtin_call (stmt: last.stmt))
1831 return;
1832
1833 callee = gimple_call_fndecl (gs: last.stmt);
1834 switch (DECL_FUNCTION_CODE (decl: callee))
1835 {
1836 case BUILT_IN_MEMCPY:
1837 case BUILT_IN_MEMCPY_CHK:
1838 break;
1839 default:
1840 return;
1841 }
1842
1843 len = gimple_call_arg (gs: last.stmt, index: len_arg_no);
1844 if (tree_fits_uhwi_p (len))
1845 {
1846 if (!tree_fits_uhwi_p (last.len)
1847 || integer_zerop (len)
1848 || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1849 return;
1850 /* Don't adjust the length if it is divisible by 4, it is more efficient
1851 to store the extra '\0' in that case. */
1852 if ((tree_to_uhwi (len) & 3) == 0)
1853 return;
1854
1855 /* Don't fold away an out of bounds access, as this defeats proper
1856 warnings. */
1857 tree dst = gimple_call_arg (gs: last.stmt, index: 0);
1858
1859 access_ref aref;
1860 tree size = compute_objsize (dst, stmt, 1, &aref, &ptr_qry);
1861 if (size && tree_int_cst_lt (t1: size, t2: len))
1862 return;
1863 }
1864 else if (TREE_CODE (len) == SSA_NAME)
1865 {
1866 gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1867 if (!is_gimple_assign (gs: def_stmt)
1868 || gimple_assign_rhs_code (gs: def_stmt) != PLUS_EXPR
1869 || gimple_assign_rhs1 (gs: def_stmt) != last.len
1870 || !integer_onep (gimple_assign_rhs2 (gs: def_stmt)))
1871 return;
1872 }
1873 else
1874 return;
1875
1876 gimple_call_set_arg (gs: last.stmt, index: len_arg_no, arg: last.len);
1877 update_stmt (s: last.stmt);
1878}
1879
1880/* For an LHS that is an SSA_NAME that is the result of a strlen()
1881 call, or when BOUND is non-null, of a strnlen() call, set LHS
1882 range info to [0, min (MAX, BOUND)] when the range includes more
1883 than one value and return LHS. Otherwise, when the range
1884 [MIN, MAX] is such that MIN == MAX, return the tree representation
1885 of (MIN). The latter allows callers to fold suitable strnlen() calls
1886 to constants. */
1887
1888tree
1889set_strlen_range (tree lhs, wide_int min, wide_int max,
1890 tree bound /* = NULL_TREE */)
1891{
1892 if (TREE_CODE (lhs) != SSA_NAME
1893 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1894 return NULL_TREE;
1895
1896 if (bound)
1897 {
1898 /* For strnlen, adjust MIN and MAX as necessary. If the bound
1899 is less than the size of the array set MAX to it. It it's
1900 greater than MAX and MAX is non-zero bump MAX down to account
1901 for the necessary terminating nul. Otherwise leave it alone. */
1902 if (TREE_CODE (bound) == INTEGER_CST)
1903 {
1904 wide_int wibnd = wi::to_wide (t: bound);
1905 int cmp = wi::cmpu (x: wibnd, y: max);
1906 if (cmp < 0)
1907 max = wibnd;
1908 else if (cmp && wi::ne_p (x: max, y: min))
1909 --max;
1910 }
1911 else if (TREE_CODE (bound) == SSA_NAME)
1912 {
1913 value_range r;
1914 get_range_query (cfun)->range_of_expr (r, expr: bound);
1915 if (!r.undefined_p ())
1916 {
1917 /* For a bound in a known range, adjust the range determined
1918 above as necessary. For a bound in some anti-range or
1919 in an unknown range, use the range determined by callers. */
1920 if (wi::ltu_p (x: r.lower_bound (), y: min))
1921 min = r.lower_bound ();
1922 if (wi::ltu_p (x: r.upper_bound (), y: max))
1923 max = r.upper_bound ();
1924 }
1925 }
1926 }
1927
1928 if (min == max)
1929 return wide_int_to_tree (size_type_node, cst: min);
1930
1931 value_range vr (TREE_TYPE (lhs), min, max);
1932 set_range_info (lhs, vr);
1933 return lhs;
1934}
1935
1936/* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1937 SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1938 a character array A[N] with unknown length bounded by N, and for
1939 strnlen(), by min (N, BOUND). */
1940
1941static tree
1942maybe_set_strlen_range (tree lhs, tree src, tree bound)
1943{
1944 if (TREE_CODE (lhs) != SSA_NAME
1945 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1946 return NULL_TREE;
1947
1948 if (TREE_CODE (src) == SSA_NAME)
1949 {
1950 gimple *def = SSA_NAME_DEF_STMT (src);
1951 if (is_gimple_assign (gs: def)
1952 && gimple_assign_rhs_code (gs: def) == ADDR_EXPR)
1953 src = gimple_assign_rhs1 (gs: def);
1954 }
1955
1956 /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1957 NUL so that the difference between a pointer to just past it and
1958 one to its beginning is positive. */
1959 wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1960
1961 if (TREE_CODE (src) == ADDR_EXPR)
1962 {
1963 /* The last array member of a struct can be bigger than its size
1964 suggests if it's treated as a poor-man's flexible array member. */
1965 src = TREE_OPERAND (src, 0);
1966 if (TREE_CODE (src) != MEM_REF
1967 && !array_ref_flexible_size_p (src))
1968 {
1969 tree type = TREE_TYPE (src);
1970 tree size = TYPE_SIZE_UNIT (type);
1971 if (size
1972 && TREE_CODE (size) == INTEGER_CST
1973 && !integer_zerop (size))
1974 {
1975 /* Even though such uses of strlen would be undefined,
1976 avoid relying on arrays of arrays in case some genius
1977 decides to call strlen on an unterminated array element
1978 that's followed by a terminated one. Likewise, avoid
1979 assuming that a struct array member is necessarily
1980 nul-terminated (the nul may be in the member that
1981 follows). In those cases, assume that the length
1982 of the string stored in such an array is bounded
1983 by the size of the enclosing object if one can be
1984 determined. */
1985 tree base = get_base_address (t: src);
1986 if (VAR_P (base))
1987 {
1988 if (tree size = DECL_SIZE_UNIT (base))
1989 if (size
1990 && TREE_CODE (size) == INTEGER_CST
1991 && TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
1992 max = wi::to_wide (t: size);
1993 }
1994 }
1995
1996 /* For strlen() the upper bound above is equal to
1997 the longest string that can be stored in the array
1998 (i.e., it accounts for the terminating nul. For
1999 strnlen() bump up the maximum by one since the array
2000 need not be nul-terminated. */
2001 if (!bound && max != 0)
2002 --max;
2003 }
2004 }
2005
2006 wide_int min = wi::zero (precision: max.get_precision ());
2007 return set_strlen_range (lhs, min, max, bound);
2008}
2009
2010/* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
2011 either into a region allocated for the object SI when non-null,
2012 or into an object designated by the LHS of STMT otherwise.
2013 For a call STMT, when CALL_LHS is set use its left hand side
2014 as the destination, otherwise use argument zero.
2015 When nonnull uses RVALS to determine range information.
2016 RAWMEM may be set by memcpy and other raw memory functions
2017 to allow accesses across subobject boundaries. */
2018
2019void
2020strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
2021 strinfo *si, bool plus_one, bool rawmem)
2022{
2023 if (!len || warning_suppressed_p (stmt, OPT_Wstringop_overflow_))
2024 return;
2025
2026 /* The DECL of the function performing the write if it is done
2027 by one. */
2028 tree writefn = NULL_TREE;
2029 /* The destination expression involved in the store or call STMT. */
2030 tree dest = NULL_TREE;
2031
2032 if (is_gimple_assign (gs: stmt))
2033 dest = gimple_assign_lhs (gs: stmt);
2034 else if (is_gimple_call (gs: stmt))
2035 {
2036 if (call_lhs)
2037 dest = gimple_call_lhs (gs: stmt);
2038 else
2039 {
2040 gcc_assert (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL));
2041 dest = gimple_call_arg (gs: stmt, index: 0);
2042 }
2043
2044 if (!dest)
2045 return;
2046 writefn = gimple_call_fndecl (gs: stmt);
2047 }
2048 else
2049 return;
2050
2051 if (warning_suppressed_p (dest, OPT_Wstringop_overflow_))
2052 return;
2053
2054 const int ostype = rawmem ? 0 : 1;
2055
2056 /* Use maximum precision to avoid overflow in the addition below.
2057 Make sure all operands have the same precision to keep wide_int
2058 from ICE'ing. */
2059
2060 access_ref aref;
2061 /* The size of the destination region (which is smaller than
2062 the destination object for stores at a non-zero offset). */
2063 tree destsize = compute_objsize (dest, stmt, ostype, &aref, &ptr_qry);
2064
2065 if (!destsize)
2066 {
2067 aref.sizrng[0] = 0;
2068 aref.sizrng[1] = wi::to_offset (t: max_object_size ());
2069 }
2070
2071 /* Return early if the DESTSIZE size expression is the same as LEN
2072 and the offset into the destination is zero. This might happen
2073 in the case of a pair of malloc and memset calls to allocate
2074 an object and clear it as if by calloc. */
2075 if (destsize == len && !plus_one
2076 && aref.offrng[0] == 0 && aref.offrng[0] == aref.offrng[1])
2077 return;
2078
2079 wide_int rng[2];
2080 if (!get_range (val: len, stmt, minmax: rng, rvals: ptr_qry.rvals))
2081 return;
2082
2083 widest_int lenrng[2] =
2084 { widest_int::from (x: rng[0], sgn: SIGNED), widest_int::from (x: rng[1], sgn: SIGNED) };
2085
2086 if (plus_one)
2087 {
2088 lenrng[0] += 1;
2089 lenrng[1] += 1;
2090 }
2091
2092 /* The size of the remaining space in the destination computed
2093 as the size of the latter minus the offset into it. */
2094 widest_int spcrng[2];
2095 {
2096 offset_int remrng[2];
2097 remrng[1] = aref.size_remaining (remrng);
2098 spcrng[0] = remrng[0] == -1 ? 0 : widest_int::from (x: remrng[0], sgn: UNSIGNED);
2099 spcrng[1] = widest_int::from (x: remrng[1], sgn: UNSIGNED);
2100 }
2101
2102 if (wi::leu_p (x: lenrng[0], y: spcrng[0])
2103 && wi::leu_p (x: lenrng[1], y: spcrng[1]))
2104 return;
2105
2106 location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2107 bool warned = false;
2108 if (wi::leu_p (x: lenrng[0], y: spcrng[1]))
2109 {
2110 if (len != destsize
2111 && (!si || rawmem || !is_strlen_related_p (si->ptr, len)))
2112 return;
2113
2114 warned = (writefn
2115 ? warning_at (loc, OPT_Wstringop_overflow_,
2116 "%qD writing one too many bytes into a region "
2117 "of a size that depends on %<strlen%>",
2118 writefn)
2119 : warning_at (loc, OPT_Wstringop_overflow_,
2120 "writing one too many bytes into a region "
2121 "of a size that depends on %<strlen%>"));
2122 }
2123 else if (lenrng[0] == lenrng[1])
2124 {
2125 if (spcrng[0] == spcrng[1])
2126 warned = (writefn
2127 ? warning_n (loc, OPT_Wstringop_overflow_,
2128 lenrng[0].to_uhwi (),
2129 "%qD writing %wu byte into a region "
2130 "of size %wu",
2131 "%qD writing %wu bytes into a region "
2132 "of size %wu",
2133 writefn, lenrng[0].to_uhwi (),
2134 spcrng[0].to_uhwi ())
2135 : warning_n (loc, OPT_Wstringop_overflow_,
2136 lenrng[0].to_uhwi (),
2137 "writing %wu byte into a region "
2138 "of size %wu",
2139 "writing %wu bytes into a region "
2140 "of size %wu",
2141 lenrng[0].to_uhwi (),
2142 spcrng[0].to_uhwi ()));
2143 else
2144 warned = (writefn
2145 ? warning_n (loc, OPT_Wstringop_overflow_,
2146 lenrng[0].to_uhwi (),
2147 "%qD writing %wu byte into a region "
2148 "of size between %wu and %wu",
2149 "%qD writing %wu bytes into a region "
2150 "of size between %wu and %wu",
2151 writefn, lenrng[0].to_uhwi (),
2152 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2153 : warning_n (loc, OPT_Wstringop_overflow_,
2154 lenrng[0].to_uhwi (),
2155 "writing %wu byte into a region "
2156 "of size between %wu and %wu",
2157 "writing %wu bytes into a region "
2158 "of size between %wu and %wu",
2159 lenrng[0].to_uhwi (),
2160 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2161 }
2162 else if (spcrng[0] == spcrng[1])
2163 warned = (writefn
2164 ? warning_at (loc, OPT_Wstringop_overflow_,
2165 "%qD writing between %wu and %wu bytes "
2166 "into a region of size %wu",
2167 writefn, lenrng[0].to_uhwi (),
2168 lenrng[1].to_uhwi (),
2169 spcrng[0].to_uhwi ())
2170 : warning_at (loc, OPT_Wstringop_overflow_,
2171 "writing between %wu and %wu bytes "
2172 "into a region of size %wu",
2173 lenrng[0].to_uhwi (),
2174 lenrng[1].to_uhwi (),
2175 spcrng[0].to_uhwi ()));
2176 else
2177 warned = (writefn
2178 ? warning_at (loc, OPT_Wstringop_overflow_,
2179 "%qD writing between %wu and %wu bytes "
2180 "into a region of size between %wu and %wu",
2181 writefn, lenrng[0].to_uhwi (),
2182 lenrng[1].to_uhwi (),
2183 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2184 : warning_at (loc, OPT_Wstringop_overflow_,
2185 "writing between %wu and %wu bytes "
2186 "into a region of size between %wu and %wu",
2187 lenrng[0].to_uhwi (),
2188 lenrng[1].to_uhwi (),
2189 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2190
2191 if (!warned)
2192 return;
2193
2194 suppress_warning (stmt, OPT_Wstringop_overflow_);
2195
2196 aref.inform_access (access_write_only);
2197}
2198
2199/* Convenience wrapper for the above. */
2200
2201void
2202strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs,
2203 unsigned HOST_WIDE_INT len,
2204 strinfo *si, bool plus_one, bool rawmem)
2205{
2206 tree tlen = build_int_cst (size_type_node, len);
2207 maybe_warn_overflow (stmt, call_lhs, len: tlen, si, plus_one, rawmem);
2208}
2209
2210/* Handle a strlen call. If strlen of the argument is known, replace
2211 the strlen call with the known value, otherwise remember that strlen
2212 of the argument is stored in the lhs SSA_NAME. */
2213
2214void
2215strlen_pass::handle_builtin_strlen ()
2216{
2217 gimple *stmt = gsi_stmt (i: m_gsi);
2218 tree lhs = gimple_call_lhs (gs: stmt);
2219
2220 if (lhs == NULL_TREE)
2221 return;
2222
2223 location_t loc = gimple_location (g: stmt);
2224 tree callee = gimple_call_fndecl (gs: stmt);
2225 tree src = gimple_call_arg (gs: stmt, index: 0);
2226 tree bound = (DECL_FUNCTION_CODE (decl: callee) == BUILT_IN_STRNLEN
2227 ? gimple_call_arg (gs: stmt, index: 1) : NULL_TREE);
2228 int idx = get_stridx (exp: src, stmt);
2229 if (idx || (bound && integer_zerop (bound)))
2230 {
2231 strinfo *si = NULL;
2232 tree rhs;
2233
2234 if (idx < 0)
2235 rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2236 else if (idx == 0)
2237 rhs = bound;
2238 else
2239 {
2240 rhs = NULL_TREE;
2241 si = get_strinfo (idx);
2242 if (si != NULL)
2243 {
2244 rhs = get_string_length (si);
2245 /* For strnlen, if bound is constant, even if si is not known
2246 to be zero terminated, if we know at least bound bytes are
2247 not zero, the return value will be bound. */
2248 if (rhs == NULL_TREE
2249 && bound != NULL_TREE
2250 && TREE_CODE (bound) == INTEGER_CST
2251 && si->nonzero_chars != NULL_TREE
2252 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2253 && tree_int_cst_le (t1: bound, t2: si->nonzero_chars))
2254 rhs = bound;
2255 }
2256 }
2257 if (rhs != NULL_TREE)
2258 {
2259 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2260 {
2261 fprintf (stream: dump_file, format: "Optimizing: ");
2262 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2263 }
2264 rhs = unshare_expr (rhs);
2265 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2266 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2267
2268 if (bound)
2269 rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2270
2271 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2272 stmt = gsi_stmt (i: m_gsi);
2273 update_stmt (s: stmt);
2274 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2275 {
2276 fprintf (stream: dump_file, format: "into: ");
2277 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2278 }
2279
2280 if (si != NULL
2281 /* Don't update anything for strnlen. */
2282 && bound == NULL_TREE
2283 && TREE_CODE (si->nonzero_chars) != SSA_NAME
2284 && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2285 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2286 {
2287 si = unshare_strinfo (si);
2288 si->nonzero_chars = lhs;
2289 gcc_assert (si->full_string_p);
2290 }
2291
2292 if (strlen_to_stridx
2293 && (bound == NULL_TREE
2294 /* For strnlen record this only if the call is proven
2295 to return the same value as strlen would. */
2296 || (TREE_CODE (bound) == INTEGER_CST
2297 && TREE_CODE (rhs) == INTEGER_CST
2298 && tree_int_cst_lt (t1: rhs, t2: bound))))
2299 strlen_to_stridx->put (k: lhs, v: stridx_strlenloc (idx, loc));
2300
2301 return;
2302 }
2303 }
2304 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2305 return;
2306
2307 if (idx == 0)
2308 idx = new_stridx (exp: src);
2309 else
2310 {
2311 strinfo *si = get_strinfo (idx);
2312 if (si != NULL)
2313 {
2314 if (!si->full_string_p && !si->stmt)
2315 {
2316 /* Until now we only had a lower bound on the string length.
2317 Install LHS as the actual length. */
2318 si = unshare_strinfo (si);
2319 tree old = si->nonzero_chars;
2320 si->nonzero_chars = lhs;
2321 si->full_string_p = true;
2322 if (old && TREE_CODE (old) == INTEGER_CST)
2323 {
2324 old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2325 tree adj = fold_build2_loc (loc, MINUS_EXPR,
2326 TREE_TYPE (lhs), lhs, old);
2327 adjust_related_strinfos (loc, origsi: si, adj);
2328 /* Use the constant minimum length as the lower bound
2329 of the non-constant length. */
2330 wide_int min = wi::to_wide (t: old);
2331 wide_int max
2332 = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2333 set_strlen_range (lhs, min, max);
2334 }
2335 else
2336 {
2337 si->first = 0;
2338 si->prev = 0;
2339 si->next = 0;
2340 }
2341 }
2342 return;
2343 }
2344 }
2345 if (idx)
2346 {
2347 if (!bound)
2348 {
2349 /* Only store the new length information for calls to strlen(),
2350 not for those to strnlen(). */
2351 strinfo *si = new_strinfo (ptr: src, idx, nonzero_chars: lhs, full_string_p: true);
2352 set_strinfo (idx, si);
2353 find_equal_ptrs (ptr: src, idx);
2354 }
2355
2356 /* For SRC that is an array of N elements, set LHS's range
2357 to [0, min (N, BOUND)]. A constant return value means
2358 the range would have consisted of a single value. In
2359 that case, fold the result into the returned constant. */
2360 if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2361 if (TREE_CODE (ret) == INTEGER_CST)
2362 {
2363 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2364 {
2365 fprintf (stream: dump_file, format: "Optimizing: ");
2366 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2367 }
2368 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2369 ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2370 gimplify_and_update_call_from_tree (&m_gsi, ret);
2371 stmt = gsi_stmt (i: m_gsi);
2372 update_stmt (s: stmt);
2373 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2374 {
2375 fprintf (stream: dump_file, format: "into: ");
2376 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2377 }
2378 }
2379
2380 if (strlen_to_stridx && !bound)
2381 strlen_to_stridx->put (k: lhs, v: stridx_strlenloc (idx, loc));
2382 }
2383}
2384
2385/* Handle a strchr call. If strlen of the first argument is known, replace
2386 the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2387 that lhs of the call is endptr and strlen of the argument is endptr - x. */
2388
2389void
2390strlen_pass::handle_builtin_strchr ()
2391{
2392 gimple *stmt = gsi_stmt (i: m_gsi);
2393 tree lhs = gimple_call_lhs (gs: stmt);
2394
2395 if (lhs == NULL_TREE)
2396 return;
2397
2398 if (!integer_zerop (gimple_call_arg (gs: stmt, index: 1)))
2399 return;
2400
2401 tree src = gimple_call_arg (gs: stmt, index: 0);
2402
2403 /* Avoid folding if the first argument is not a nul-terminated array.
2404 Defer warning until later. */
2405 if (!check_nul_terminated_array (NULL_TREE, src))
2406 return;
2407
2408 int idx = get_stridx (exp: src, stmt);
2409 if (idx)
2410 {
2411 strinfo *si = NULL;
2412 tree rhs;
2413
2414 if (idx < 0)
2415 rhs = build_int_cst (size_type_node, ~idx);
2416 else
2417 {
2418 rhs = NULL_TREE;
2419 si = get_strinfo (idx);
2420 if (si != NULL)
2421 rhs = get_string_length (si);
2422 }
2423 if (rhs != NULL_TREE)
2424 {
2425 location_t loc = gimple_location (g: stmt);
2426
2427 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2428 {
2429 fprintf (stream: dump_file, format: "Optimizing: ");
2430 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2431 }
2432 if (si != NULL && si->endptr != NULL_TREE)
2433 {
2434 rhs = unshare_expr (si->endptr);
2435 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2436 TREE_TYPE (rhs)))
2437 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2438 }
2439 else
2440 {
2441 rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2442 rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2443 TREE_TYPE (src), src, rhs);
2444 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2445 TREE_TYPE (rhs)))
2446 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2447 }
2448 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2449 stmt = gsi_stmt (i: m_gsi);
2450 update_stmt (s: stmt);
2451 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2452 {
2453 fprintf (stream: dump_file, format: "into: ");
2454 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2455 }
2456 if (si != NULL
2457 && si->endptr == NULL_TREE
2458 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2459 {
2460 si = unshare_strinfo (si);
2461 si->endptr = lhs;
2462 }
2463 zero_length_string (ptr: lhs, chainsi: si);
2464 return;
2465 }
2466 }
2467 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2468 return;
2469 if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2470 {
2471 if (idx == 0)
2472 idx = new_stridx (exp: src);
2473 else if (get_strinfo (idx) != NULL)
2474 {
2475 zero_length_string (ptr: lhs, NULL);
2476 return;
2477 }
2478 if (idx)
2479 {
2480 location_t loc = gimple_location (g: stmt);
2481 tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2482 tree srcu = fold_convert_loc (loc, size_type_node, src);
2483 tree length = fold_build2_loc (loc, MINUS_EXPR,
2484 size_type_node, lhsu, srcu);
2485 strinfo *si = new_strinfo (ptr: src, idx, nonzero_chars: length, full_string_p: true);
2486 si->endptr = lhs;
2487 set_strinfo (idx, si);
2488 find_equal_ptrs (ptr: src, idx);
2489 zero_length_string (ptr: lhs, chainsi: si);
2490 }
2491 }
2492 else
2493 zero_length_string (ptr: lhs, NULL);
2494}
2495
2496/* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2497 If strlen of the second argument is known, strlen of the first argument
2498 is the same after this call. Furthermore, attempt to convert it to
2499 memcpy. Uses RVALS to determine range information. */
2500
2501void
2502strlen_pass::handle_builtin_strcpy (built_in_function bcode)
2503{
2504 int idx, didx;
2505 tree src, dst, srclen, len, lhs, type, fn, oldlen;
2506 bool success;
2507 gimple *stmt = gsi_stmt (i: m_gsi);
2508 strinfo *si, *dsi, *olddsi, *zsi;
2509 location_t loc;
2510
2511 src = gimple_call_arg (gs: stmt, index: 1);
2512 dst = gimple_call_arg (gs: stmt, index: 0);
2513 lhs = gimple_call_lhs (gs: stmt);
2514 idx = get_stridx (exp: src, stmt);
2515 si = NULL;
2516 if (idx > 0)
2517 si = get_strinfo (idx);
2518
2519 didx = get_stridx (exp: dst, stmt);
2520 olddsi = NULL;
2521 oldlen = NULL_TREE;
2522 if (didx > 0)
2523 olddsi = get_strinfo (idx: didx);
2524 else if (didx < 0)
2525 return;
2526
2527 if (olddsi != NULL)
2528 adjust_last_stmt (si: olddsi, stmt, is_strcat: false);
2529
2530 srclen = NULL_TREE;
2531 if (si != NULL)
2532 srclen = get_string_length (si);
2533 else if (idx < 0)
2534 srclen = build_int_cst (size_type_node, ~idx);
2535
2536 maybe_warn_overflow (stmt, call_lhs: false, len: srclen, si: olddsi, plus_one: true);
2537
2538 if (olddsi != NULL)
2539 adjust_last_stmt (si: olddsi, stmt, is_strcat: false);
2540
2541 loc = gimple_location (g: stmt);
2542 if (srclen == NULL_TREE)
2543 switch (bcode)
2544 {
2545 case BUILT_IN_STRCPY:
2546 case BUILT_IN_STRCPY_CHK:
2547 if (lhs != NULL_TREE || !builtin_decl_implicit_p (fncode: BUILT_IN_STPCPY))
2548 return;
2549 break;
2550 case BUILT_IN_STPCPY:
2551 case BUILT_IN_STPCPY_CHK:
2552 if (lhs == NULL_TREE)
2553 return;
2554 else
2555 {
2556 tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2557 srclen = fold_convert_loc (loc, size_type_node, dst);
2558 srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2559 lhsuint, srclen);
2560 }
2561 break;
2562 default:
2563 gcc_unreachable ();
2564 }
2565
2566 if (didx == 0)
2567 {
2568 didx = new_stridx (exp: dst);
2569 if (didx == 0)
2570 return;
2571 }
2572 if (olddsi != NULL)
2573 {
2574 oldlen = olddsi->nonzero_chars;
2575 dsi = unshare_strinfo (si: olddsi);
2576 dsi->nonzero_chars = srclen;
2577 dsi->full_string_p = (srclen != NULL_TREE);
2578 /* Break the chain, so adjust_related_strinfo on later pointers in
2579 the chain won't adjust this one anymore. */
2580 dsi->next = 0;
2581 dsi->stmt = NULL;
2582 dsi->endptr = NULL_TREE;
2583 }
2584 else
2585 {
2586 dsi = new_strinfo (ptr: dst, idx: didx, nonzero_chars: srclen, full_string_p: srclen != NULL_TREE);
2587 set_strinfo (idx: didx, si: dsi);
2588 find_equal_ptrs (ptr: dst, idx: didx);
2589 }
2590 dsi->writable = true;
2591 dsi->dont_invalidate = true;
2592
2593 if (dsi->nonzero_chars == NULL_TREE)
2594 {
2595 strinfo *chainsi;
2596
2597 /* If string length of src is unknown, use delayed length
2598 computation. If string length of dst will be needed, it
2599 can be computed by transforming this strcpy call into
2600 stpcpy and subtracting dst from the return value. */
2601
2602 /* Look for earlier strings whose length could be determined if
2603 this strcpy is turned into an stpcpy. */
2604
2605 if (dsi->prev != 0 && (chainsi = verify_related_strinfos (origsi: dsi)) != NULL)
2606 {
2607 for (; chainsi && chainsi != dsi; chainsi = get_strinfo (idx: chainsi->next))
2608 {
2609 /* When setting a stmt for delayed length computation
2610 prevent all strinfos through dsi from being
2611 invalidated. */
2612 chainsi = unshare_strinfo (si: chainsi);
2613 chainsi->stmt = stmt;
2614 chainsi->nonzero_chars = NULL_TREE;
2615 chainsi->full_string_p = false;
2616 chainsi->endptr = NULL_TREE;
2617 chainsi->dont_invalidate = true;
2618 }
2619 }
2620 dsi->stmt = stmt;
2621
2622 /* Try to detect overlap before returning. This catches cases
2623 like strcpy (d, d + n) where n is non-constant whose range
2624 is such that (n <= strlen (d) holds).
2625
2626 OLDDSI->NONZERO_chars may have been reset by this point with
2627 oldlen holding it original value. */
2628 if (olddsi && oldlen)
2629 {
2630 /* Add 1 for the terminating NUL. */
2631 tree type = TREE_TYPE (oldlen);
2632 oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2633 build_int_cst (type, 1));
2634 check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2635 }
2636
2637 return;
2638 }
2639
2640 if (olddsi != NULL)
2641 {
2642 tree adj = NULL_TREE;
2643 if (oldlen == NULL_TREE)
2644 ;
2645 else if (integer_zerop (oldlen))
2646 adj = srclen;
2647 else if (TREE_CODE (oldlen) == INTEGER_CST
2648 || TREE_CODE (srclen) == INTEGER_CST)
2649 adj = fold_build2_loc (loc, MINUS_EXPR,
2650 TREE_TYPE (srclen), srclen,
2651 fold_convert_loc (loc, TREE_TYPE (srclen),
2652 oldlen));
2653 if (adj != NULL_TREE)
2654 adjust_related_strinfos (loc, origsi: dsi, adj);
2655 else
2656 dsi->prev = 0;
2657 }
2658 /* strcpy src may not overlap dst, so src doesn't need to be
2659 invalidated either. */
2660 if (si != NULL)
2661 si->dont_invalidate = true;
2662
2663 fn = NULL_TREE;
2664 zsi = NULL;
2665 switch (bcode)
2666 {
2667 case BUILT_IN_STRCPY:
2668 fn = builtin_decl_implicit (fncode: BUILT_IN_MEMCPY);
2669 if (lhs)
2670 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2671 break;
2672 case BUILT_IN_STRCPY_CHK:
2673 fn = builtin_decl_explicit (fncode: BUILT_IN_MEMCPY_CHK);
2674 if (lhs)
2675 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2676 break;
2677 case BUILT_IN_STPCPY:
2678 /* This would need adjustment of the lhs (subtract one),
2679 or detection that the trailing '\0' doesn't need to be
2680 written, if it will be immediately overwritten.
2681 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY); */
2682 if (lhs)
2683 {
2684 dsi->endptr = lhs;
2685 zsi = zero_length_string (ptr: lhs, chainsi: dsi);
2686 }
2687 break;
2688 case BUILT_IN_STPCPY_CHK:
2689 /* This would need adjustment of the lhs (subtract one),
2690 or detection that the trailing '\0' doesn't need to be
2691 written, if it will be immediately overwritten.
2692 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK); */
2693 if (lhs)
2694 {
2695 dsi->endptr = lhs;
2696 zsi = zero_length_string (ptr: lhs, chainsi: dsi);
2697 }
2698 break;
2699 default:
2700 gcc_unreachable ();
2701 }
2702 if (zsi != NULL)
2703 zsi->dont_invalidate = true;
2704
2705 if (fn)
2706 {
2707 tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2708 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2709 }
2710 else
2711 type = size_type_node;
2712
2713 len = fold_convert_loc (loc, type, unshare_expr (srclen));
2714 len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2715
2716 /* Disable warning for the transformed statement? */
2717 opt_code no_warning_opt = no_warning;
2718
2719 if (const strinfo *chksi = si ? olddsi ? olddsi : dsi : NULL)
2720 {
2721 no_warning_opt = check_bounds_or_overlap (stmt, chksi->ptr, si->ptr,
2722 NULL_TREE, len);
2723 if (no_warning_opt)
2724 suppress_warning (stmt, no_warning_opt);
2725 }
2726
2727 if (fn == NULL_TREE)
2728 return;
2729
2730 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
2731 GSI_SAME_STMT);
2732 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2733 {
2734 fprintf (stream: dump_file, format: "Optimizing: ");
2735 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2736 }
2737 if (gimple_call_num_args (gs: stmt) == 2)
2738 success = update_gimple_call (&m_gsi, fn, 3, dst, src, len);
2739 else
2740 success = update_gimple_call (&m_gsi, fn, 4, dst, src, len,
2741 gimple_call_arg (gs: stmt, index: 2));
2742 if (success)
2743 {
2744 stmt = gsi_stmt (i: m_gsi);
2745 update_stmt (s: stmt);
2746 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2747 {
2748 fprintf (stream: dump_file, format: "into: ");
2749 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2750 }
2751 /* Allow adjust_last_stmt to decrease this memcpy's size. */
2752 laststmt.stmt = stmt;
2753 laststmt.len = srclen;
2754 laststmt.stridx = dsi->idx;
2755 }
2756 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2757 fprintf (stream: dump_file, format: "not possible.\n");
2758
2759 if (no_warning_opt)
2760 suppress_warning (stmt, no_warning_opt);
2761}
2762
2763/* Check the size argument to the built-in forms of stpncpy and strncpy
2764 for out-of-bounds offsets or overlapping access, and to see if the
2765 size argument is derived from a call to strlen() on the source argument,
2766 and if so, issue an appropriate warning. */
2767
2768void
2769strlen_pass::handle_builtin_strncat (built_in_function)
2770{
2771 /* Same as stxncpy(). */
2772 handle_builtin_stxncpy_strncat (append_p: true);
2773}
2774
2775/* Return true if LEN depends on a call to strlen(SRC) in an interesting
2776 way. LEN can either be an integer expression, or a pointer (to char).
2777 When it is the latter (such as in recursive calls to self) it is
2778 assumed to be the argument in some call to strlen() whose relationship
2779 to SRC is being ascertained. */
2780
2781bool
2782is_strlen_related_p (tree src, tree len)
2783{
2784 if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2785 && operand_equal_p (src, len, flags: 0))
2786 return true;
2787
2788 if (TREE_CODE (len) != SSA_NAME)
2789 return false;
2790
2791 if (TREE_CODE (src) == SSA_NAME)
2792 {
2793 gimple *srcdef = SSA_NAME_DEF_STMT (src);
2794 if (is_gimple_assign (gs: srcdef))
2795 {
2796 /* Handle bitwise AND used in conversions from wider size_t
2797 to narrower unsigned types. */
2798 tree_code code = gimple_assign_rhs_code (gs: srcdef);
2799 if (code == BIT_AND_EXPR
2800 || code == NOP_EXPR)
2801 return is_strlen_related_p (src: gimple_assign_rhs1 (gs: srcdef), len);
2802
2803 return false;
2804 }
2805
2806 if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2807 {
2808 /* If SRC is the result of a call to an allocation function
2809 or strlen, use the function's argument instead. */
2810 tree func = gimple_call_fndecl (gs: srcdef);
2811 built_in_function code = DECL_FUNCTION_CODE (decl: func);
2812 if (code == BUILT_IN_ALLOCA
2813 || code == BUILT_IN_ALLOCA_WITH_ALIGN
2814 || code == BUILT_IN_MALLOC
2815 || code == BUILT_IN_STRLEN)
2816 return is_strlen_related_p (src: gimple_call_arg (gs: srcdef, index: 0), len);
2817
2818 /* FIXME: Handle other functions with attribute alloc_size. */
2819 return false;
2820 }
2821 }
2822
2823 gimple *lendef = SSA_NAME_DEF_STMT (len);
2824 if (!lendef)
2825 return false;
2826
2827 if (is_gimple_call (gs: lendef))
2828 {
2829 tree func = gimple_call_fndecl (gs: lendef);
2830 if (!valid_builtin_call (stmt: lendef)
2831 || DECL_FUNCTION_CODE (decl: func) != BUILT_IN_STRLEN)
2832 return false;
2833
2834 tree arg = gimple_call_arg (gs: lendef, index: 0);
2835 return is_strlen_related_p (src, len: arg);
2836 }
2837
2838 if (!is_gimple_assign (gs: lendef))
2839 return false;
2840
2841 tree_code code = gimple_assign_rhs_code (gs: lendef);
2842 tree rhs1 = gimple_assign_rhs1 (gs: lendef);
2843 tree rhstype = TREE_TYPE (rhs1);
2844
2845 if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2846 || (INTEGRAL_TYPE_P (rhstype)
2847 && (code == BIT_AND_EXPR
2848 || code == NOP_EXPR)))
2849 {
2850 /* Pointer plus (an integer), and truncation are considered among
2851 the (potentially) related expressions to strlen. */
2852 return is_strlen_related_p (src, len: rhs1);
2853 }
2854
2855 if (tree rhs2 = gimple_assign_rhs2 (gs: lendef))
2856 {
2857 /* Integer subtraction is considered strlen-related when both
2858 arguments are integers and second one is strlen-related. */
2859 rhstype = TREE_TYPE (rhs2);
2860 if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
2861 return is_strlen_related_p (src, len: rhs2);
2862 }
2863
2864 return false;
2865}
2866
2867/* Called by handle_builtin_stxncpy_strncat and by
2868 gimple_fold_builtin_strncpy in gimple-fold.cc.
2869 Check to see if the specified bound is a) equal to the size of
2870 the destination DST and if so, b) if it's immediately followed by
2871 DST[CNT - 1] = '\0'. If a) holds and b) does not, warn. Otherwise,
2872 do nothing. Return true if diagnostic has been issued.
2873
2874 The purpose is to diagnose calls to strncpy and stpncpy that do
2875 not nul-terminate the copy while allowing for the idiom where
2876 such a call is immediately followed by setting the last element
2877 to nul, as in:
2878 char a[32];
2879 strncpy (a, s, sizeof a);
2880 a[sizeof a - 1] = '\0';
2881*/
2882
2883bool
2884maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt,
2885 pointer_query *ptr_qry /* = NULL */)
2886{
2887 gimple *stmt = gsi_stmt (i: gsi);
2888 if (warning_suppressed_p (stmt, OPT_Wstringop_truncation))
2889 return false;
2890
2891 wide_int cntrange[2];
2892 value_range r;
2893 if (!get_range_query (cfun)->range_of_expr (r, expr: cnt)
2894 || r.varying_p ()
2895 || r.undefined_p ())
2896 return false;
2897
2898 tree min, max;
2899 value_range_kind kind = get_legacy_range (r, min, max);
2900 cntrange[0] = wi::to_wide (t: min);
2901 cntrange[1] = wi::to_wide (t: max);
2902 if (kind == VR_ANTI_RANGE)
2903 {
2904 wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
2905
2906 if (wi::ltu_p (x: cntrange[1], y: maxobjsize))
2907 {
2908 cntrange[0] = cntrange[1] + 1;
2909 cntrange[1] = maxobjsize;
2910 }
2911 else
2912 {
2913 cntrange[1] = cntrange[0] - 1;
2914 cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
2915 }
2916 }
2917
2918 /* Negative value is the constant string length. If it's less than
2919 the lower bound there is no truncation. Avoid calling get_stridx()
2920 when ssa_ver_to_stridx is empty. That implies the caller isn't
2921 running under the control of this pass and ssa_ver_to_stridx hasn't
2922 been created yet. */
2923 int sidx = ssa_ver_to_stridx.length () ? get_stridx (exp: src, stmt) : 0;
2924 if (sidx < 0 && wi::gtu_p (x: cntrange[0], y: ~sidx))
2925 return false;
2926
2927 tree dst = gimple_call_arg (gs: stmt, index: 0);
2928 tree dstdecl = dst;
2929 if (TREE_CODE (dstdecl) == ADDR_EXPR)
2930 dstdecl = TREE_OPERAND (dstdecl, 0);
2931
2932 tree ref = NULL_TREE;
2933
2934 if (!sidx)
2935 {
2936 /* If the source is a non-string return early to avoid warning
2937 for possible truncation (if the truncation is certain SIDX
2938 is non-zero). */
2939 tree srcdecl = gimple_call_arg (gs: stmt, index: 1);
2940 if (TREE_CODE (srcdecl) == ADDR_EXPR)
2941 srcdecl = TREE_OPERAND (srcdecl, 0);
2942 if (get_attr_nonstring_decl (srcdecl, &ref))
2943 return false;
2944 }
2945
2946 /* Likewise, if the destination refers to an array/pointer declared
2947 nonstring return early. */
2948 if (get_attr_nonstring_decl (dstdecl, &ref))
2949 return false;
2950
2951 /* Look for dst[i] = '\0'; after the stxncpy() call and if found
2952 avoid the truncation warning. */
2953 gsi_next_nondebug (i: &gsi);
2954 gimple *next_stmt = gsi_stmt (i: gsi);
2955 if (!next_stmt)
2956 {
2957 /* When there is no statement in the same basic block check
2958 the immediate successor block. */
2959 if (basic_block bb = gimple_bb (g: stmt))
2960 {
2961 if (single_succ_p (bb))
2962 {
2963 /* For simplicity, ignore blocks with multiple outgoing
2964 edges for now and only consider successor blocks along
2965 normal edges. */
2966 edge e = EDGE_SUCC (bb, 0);
2967 if (!(e->flags & EDGE_ABNORMAL))
2968 {
2969 gsi = gsi_start_bb (bb: e->dest);
2970 next_stmt = gsi_stmt (i: gsi);
2971 if (next_stmt && is_gimple_debug (gs: next_stmt))
2972 {
2973 gsi_next_nondebug (i: &gsi);
2974 next_stmt = gsi_stmt (i: gsi);
2975 }
2976 }
2977 }
2978 }
2979 }
2980
2981 if (next_stmt && is_gimple_assign (gs: next_stmt))
2982 {
2983 tree lhs = gimple_assign_lhs (gs: next_stmt);
2984 tree_code code = TREE_CODE (lhs);
2985 if (code == ARRAY_REF || code == MEM_REF)
2986 lhs = TREE_OPERAND (lhs, 0);
2987
2988 tree func = gimple_call_fndecl (gs: stmt);
2989 if (DECL_FUNCTION_CODE (decl: func) == BUILT_IN_STPNCPY)
2990 {
2991 tree ret = gimple_call_lhs (gs: stmt);
2992 if (ret && operand_equal_p (ret, lhs, flags: 0))
2993 return false;
2994 }
2995
2996 /* Determine the base address and offset of the reference,
2997 ignoring the innermost array index. */
2998 if (TREE_CODE (ref) == ARRAY_REF)
2999 ref = TREE_OPERAND (ref, 0);
3000
3001 poly_int64 dstoff;
3002 tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
3003
3004 poly_int64 lhsoff;
3005 tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
3006 if (lhsbase
3007 && dstbase
3008 && known_eq (dstoff, lhsoff)
3009 && operand_equal_p (dstbase, lhsbase, flags: 0))
3010 return false;
3011 }
3012
3013 int prec = TYPE_PRECISION (TREE_TYPE (cnt));
3014 wide_int lenrange[2];
3015 if (strinfo *sisrc = sidx > 0 ? get_strinfo (idx: sidx) : NULL)
3016 {
3017 lenrange[0] = (sisrc->nonzero_chars
3018 && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
3019 ? wi::to_wide (t: sisrc->nonzero_chars)
3020 : wi::zero (precision: prec));
3021 lenrange[1] = lenrange[0];
3022 }
3023 else if (sidx < 0)
3024 lenrange[0] = lenrange[1] = wi::shwi (val: ~sidx, precision: prec);
3025 else
3026 {
3027 c_strlen_data lendata = { };
3028 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3029 to have it set to the length of the longest string in a PHI. */
3030 lendata.maxbound = src;
3031 get_range_strlen (src, &lendata, /* eltsize = */1);
3032 if (TREE_CODE (lendata.minlen) == INTEGER_CST
3033 && TREE_CODE (lendata.maxbound) == INTEGER_CST)
3034 {
3035 /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
3036 which stores the length of the shortest known string. */
3037 if (integer_all_onesp (lendata.maxlen))
3038 lenrange[0] = wi::shwi (val: 0, precision: prec);
3039 else
3040 lenrange[0] = wi::to_wide (t: lendata.minlen, prec);
3041 lenrange[1] = wi::to_wide (t: lendata.maxbound, prec);
3042 }
3043 else
3044 {
3045 lenrange[0] = wi::shwi (val: 0, precision: prec);
3046 lenrange[1] = wi::shwi (val: -1, precision: prec);
3047 }
3048 }
3049
3050 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3051 tree func = gimple_call_fndecl (gs: stmt);
3052
3053 if (lenrange[0] != 0 || !wi::neg_p (x: lenrange[1]))
3054 {
3055 /* If the longest source string is shorter than the lower bound
3056 of the specified count the copy is definitely nul-terminated. */
3057 if (wi::ltu_p (x: lenrange[1], y: cntrange[0]))
3058 return false;
3059
3060 if (wi::neg_p (x: lenrange[1]))
3061 {
3062 /* The length of one of the strings is unknown but at least
3063 one has non-zero length and that length is stored in
3064 LENRANGE[1]. Swap the bounds to force a "may be truncated"
3065 warning below. */
3066 lenrange[1] = lenrange[0];
3067 lenrange[0] = wi::shwi (val: 0, precision: prec);
3068 }
3069
3070 /* Set to true for strncat whose bound is derived from the length
3071 of the destination (the expected usage pattern). */
3072 bool cat_dstlen_bounded = false;
3073 if (DECL_FUNCTION_CODE (decl: func) == BUILT_IN_STRNCAT)
3074 cat_dstlen_bounded = is_strlen_related_p (src: dst, len: cnt);
3075
3076 if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3077 return warning_n (callloc, OPT_Wstringop_truncation,
3078 cntrange[0].to_uhwi (),
3079 "%qD output truncated before terminating "
3080 "nul copying %E byte from a string of the "
3081 "same length",
3082 "%qD output truncated before terminating nul "
3083 "copying %E bytes from a string of the same "
3084 "length",
3085 func, cnt);
3086 else if (!cat_dstlen_bounded)
3087 {
3088 if (wi::geu_p (x: lenrange[0], y: cntrange[1]))
3089 {
3090 /* The shortest string is longer than the upper bound of
3091 the count so the truncation is certain. */
3092 if (cntrange[0] == cntrange[1])
3093 return warning_n (callloc, OPT_Wstringop_truncation,
3094 cntrange[0].to_uhwi (),
3095 "%qD output truncated copying %E byte "
3096 "from a string of length %wu",
3097 "%qD output truncated copying %E bytes "
3098 "from a string of length %wu",
3099 func, cnt, lenrange[0].to_uhwi ());
3100
3101 return warning_at (callloc, OPT_Wstringop_truncation,
3102 "%qD output truncated copying between %wu "
3103 "and %wu bytes from a string of length %wu",
3104 func, cntrange[0].to_uhwi (),
3105 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3106 }
3107 else if (wi::geu_p (x: lenrange[1], y: cntrange[1]))
3108 {
3109 /* The longest string is longer than the upper bound of
3110 the count so the truncation is possible. */
3111 if (cntrange[0] == cntrange[1])
3112 return warning_n (callloc, OPT_Wstringop_truncation,
3113 cntrange[0].to_uhwi (),
3114 "%qD output may be truncated copying %E "
3115 "byte from a string of length %wu",
3116 "%qD output may be truncated copying %E "
3117 "bytes from a string of length %wu",
3118 func, cnt, lenrange[1].to_uhwi ());
3119
3120 return warning_at (callloc, OPT_Wstringop_truncation,
3121 "%qD output may be truncated copying between "
3122 "%wu and %wu bytes from a string of length %wu",
3123 func, cntrange[0].to_uhwi (),
3124 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3125 }
3126 }
3127
3128 if (!cat_dstlen_bounded
3129 && cntrange[0] != cntrange[1]
3130 && wi::leu_p (x: cntrange[0], y: lenrange[0])
3131 && wi::leu_p (x: cntrange[1], y: lenrange[0] + 1))
3132 {
3133 /* If the source (including the terminating nul) is longer than
3134 the lower bound of the specified count but shorter than the
3135 upper bound the copy may (but need not) be truncated. */
3136 return warning_at (callloc, OPT_Wstringop_truncation,
3137 "%qD output may be truncated copying between "
3138 "%wu and %wu bytes from a string of length %wu",
3139 func, cntrange[0].to_uhwi (),
3140 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3141 }
3142 }
3143
3144 access_ref aref;
3145 if (tree dstsize = compute_objsize (dst, stmt, 1, &aref, ptr_qry))
3146 {
3147 /* The source length is unknown. Try to determine the destination
3148 size and see if it matches the specified bound. If not, bail.
3149 Otherwise go on to see if it should be diagnosed for possible
3150 truncation. */
3151 if (!dstsize)
3152 return false;
3153
3154 if (wi::to_wide (t: dstsize) != cntrange[1])
3155 return false;
3156
3157 /* Avoid warning for strncpy(a, b, N) calls where the following
3158 equalities hold:
3159 N == sizeof a && N == sizeof b */
3160 if (tree srcsize = compute_objsize (src, stmt, 1, &aref, ptr_qry))
3161 if (wi::to_wide (t: srcsize) == cntrange[1])
3162 return false;
3163
3164 if (cntrange[0] == cntrange[1])
3165 return warning_at (callloc, OPT_Wstringop_truncation,
3166 "%qD specified bound %E equals destination size",
3167 func, cnt);
3168 }
3169
3170 return false;
3171}
3172
3173/* Check the arguments to the built-in forms of stpncpy, strncpy, and
3174 strncat, for out-of-bounds offsets or overlapping access, and to see
3175 if the size is derived from calling strlen() on the source argument,
3176 and if so, issue the appropriate warning.
3177 APPEND_P is true for strncat. */
3178
3179void
3180strlen_pass::handle_builtin_stxncpy_strncat (bool append_p)
3181{
3182 if (!strlen_to_stridx)
3183 return;
3184
3185 gimple *stmt = gsi_stmt (i: m_gsi);
3186
3187 tree dst = gimple_call_arg (gs: stmt, index: 0);
3188 tree src = gimple_call_arg (gs: stmt, index: 1);
3189 tree len = gimple_call_arg (gs: stmt, index: 2);
3190 /* An upper bound of the size of the destination. */
3191 tree dstsize = NULL_TREE;
3192 /* The length of the destination and source strings (plus 1 for those
3193 whose FULL_STRING_P is set, i.e., whose length is exact rather than
3194 a lower bound). */
3195 tree dstlenp1 = NULL_TREE, srclenp1 = NULL_TREE;;
3196
3197 int didx = get_stridx (exp: dst, stmt);
3198 if (strinfo *sidst = didx > 0 ? get_strinfo (idx: didx) : NULL)
3199 {
3200 /* Compute the size of the destination string including the nul
3201 if it is known to be nul-terminated. */
3202 if (sidst->nonzero_chars)
3203 {
3204 if (sidst->full_string_p)
3205 {
3206 /* String is known to be nul-terminated. */
3207 tree type = TREE_TYPE (sidst->nonzero_chars);
3208 dstlenp1 = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3209 build_int_cst (type, 1));
3210 }
3211 else
3212 dstlenp1 = sidst->nonzero_chars;
3213 }
3214 else if (TREE_CODE (sidst->ptr) == SSA_NAME)
3215 {
3216 gimple *def_stmt = SSA_NAME_DEF_STMT (sidst->ptr);
3217 dstsize = gimple_call_alloc_size (def_stmt);
3218 }
3219
3220 dst = sidst->ptr;
3221 }
3222
3223 int sidx = get_stridx (exp: src, stmt);
3224 strinfo *sisrc = sidx > 0 ? get_strinfo (idx: sidx) : NULL;
3225 if (sisrc)
3226 {
3227 /* strncat() and strncpy() can modify the source string by writing
3228 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3229 clear. */
3230
3231 /* Compute the size of the source string including the terminating
3232 nul if its known to be nul-terminated. */
3233 if (sisrc->nonzero_chars)
3234 {
3235 if (sisrc->full_string_p)
3236 {
3237 tree type = TREE_TYPE (sisrc->nonzero_chars);
3238 srclenp1 = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3239 build_int_cst (type, 1));
3240 }
3241 else
3242 srclenp1 = sisrc->nonzero_chars;
3243 }
3244
3245 src = sisrc->ptr;
3246 }
3247 else
3248 srclenp1 = NULL_TREE;
3249
3250 opt_code opt = check_bounds_or_overlap (stmt, dst, src, dstlenp1, srclenp1);
3251 if (opt != no_warning)
3252 {
3253 suppress_warning (stmt, opt);
3254 return;
3255 }
3256
3257 /* If the length argument was computed from strlen(S) for some string
3258 S retrieve the strinfo index for the string (PSS->FIRST) along with
3259 the location of the strlen() call (PSS->SECOND). */
3260 stridx_strlenloc *pss = strlen_to_stridx->get (k: len);
3261 if (!pss || pss->first <= 0)
3262 {
3263 if (maybe_diag_stxncpy_trunc (gsi: m_gsi, src, cnt: len))
3264 suppress_warning (stmt, OPT_Wstringop_truncation);
3265
3266 return;
3267 }
3268
3269 /* Retrieve the strinfo data for the string S that LEN was computed
3270 from as some function F of strlen (S) (i.e., LEN need not be equal
3271 to strlen(S)). */
3272 strinfo *silen = get_strinfo (idx: pss->first);
3273
3274 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3275
3276 tree func = gimple_call_fndecl (gs: stmt);
3277
3278 bool warned = false;
3279
3280 /* When -Wstringop-truncation is set, try to determine truncation
3281 before diagnosing possible overflow. Truncation is implied by
3282 the LEN argument being equal to strlen(SRC), regardless of
3283 whether its value is known. Otherwise, when appending, or
3284 when copying into a destination of known size, issue the more
3285 generic -Wstringop-overflow which triggers for LEN arguments
3286 that in any meaningful way depend on strlen(SRC). */
3287 if (!append_p
3288 && sisrc == silen
3289 && is_strlen_related_p (src, len)
3290 && warning_at (callloc, OPT_Wstringop_truncation,
3291 "%qD output truncated before terminating nul "
3292 "copying as many bytes from a string as its length",
3293 func))
3294 warned = true;
3295 else if ((append_p || !dstsize || len == dstlenp1)
3296 && silen && is_strlen_related_p (src, len: silen->ptr))
3297 {
3298 /* Issue -Wstringop-overflow when appending or when writing into
3299 a destination of a known size. Otherwise, when copying into
3300 a destination of an unknown size, it's truncation. */
3301 opt_code opt = (append_p || dstsize
3302 ? OPT_Wstringop_overflow_ : OPT_Wstringop_truncation);
3303 warned = warning_at (callloc, opt,
3304 "%qD specified bound depends on the length "
3305 "of the source argument",
3306 func);
3307 }
3308 if (warned)
3309 {
3310 location_t strlenloc = pss->second;
3311 if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3312 inform (strlenloc, "length computed here");
3313 }
3314}
3315
3316/* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3317 If strlen of the second argument is known and length of the third argument
3318 is that plus one, strlen of the first argument is the same after this
3319 call. Uses RVALS to determine range information. */
3320
3321void
3322strlen_pass::handle_builtin_memcpy (built_in_function bcode)
3323{
3324 tree lhs, oldlen, newlen;
3325 gimple *stmt = gsi_stmt (i: m_gsi);
3326 strinfo *si, *dsi;
3327
3328 tree len = gimple_call_arg (gs: stmt, index: 2);
3329 tree src = gimple_call_arg (gs: stmt, index: 1);
3330 tree dst = gimple_call_arg (gs: stmt, index: 0);
3331
3332 int didx = get_stridx (exp: dst, stmt);
3333 strinfo *olddsi = NULL;
3334 if (didx > 0)
3335 olddsi = get_strinfo (idx: didx);
3336 else if (didx < 0)
3337 return;
3338
3339 if (olddsi != NULL
3340 && !integer_zerop (len))
3341 {
3342 maybe_warn_overflow (stmt, call_lhs: false, len, si: olddsi, plus_one: false, rawmem: true);
3343 if (tree_fits_uhwi_p (len))
3344 adjust_last_stmt (si: olddsi, stmt, is_strcat: false);
3345 }
3346
3347 int idx = get_stridx (exp: src, stmt);
3348 if (idx == 0)
3349 return;
3350
3351 bool full_string_p;
3352 if (idx > 0)
3353 {
3354 gimple *def_stmt;
3355
3356 /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3357 is known. */
3358 si = get_strinfo (idx);
3359 if (si == NULL || si->nonzero_chars == NULL_TREE)
3360 return;
3361 if (TREE_CODE (len) == INTEGER_CST
3362 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3363 {
3364 if (tree_int_cst_le (t1: len, t2: si->nonzero_chars))
3365 {
3366 /* Copying LEN nonzero characters, where LEN is constant. */
3367 newlen = len;
3368 full_string_p = false;
3369 }
3370 else
3371 {
3372 /* Copying the whole of the analyzed part of SI. */
3373 newlen = si->nonzero_chars;
3374 full_string_p = si->full_string_p;
3375 }
3376 }
3377 else
3378 {
3379 if (!si->full_string_p)
3380 return;
3381 if (TREE_CODE (len) != SSA_NAME)
3382 return;
3383 def_stmt = SSA_NAME_DEF_STMT (len);
3384 if (!is_gimple_assign (gs: def_stmt)
3385 || gimple_assign_rhs_code (gs: def_stmt) != PLUS_EXPR
3386 || gimple_assign_rhs1 (gs: def_stmt) != si->nonzero_chars
3387 || !integer_onep (gimple_assign_rhs2 (gs: def_stmt)))
3388 return;
3389 /* Copying variable-length string SI (and no more). */
3390 newlen = si->nonzero_chars;
3391 full_string_p = true;
3392 }
3393 }
3394 else
3395 {
3396 si = NULL;
3397 /* Handle memcpy (x, "abcd", 5) or
3398 memcpy (x, "abc\0uvw", 7). */
3399 if (!tree_fits_uhwi_p (len))
3400 return;
3401
3402 unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3403 unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3404 newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3405 full_string_p = clen > nonzero_chars;
3406 }
3407
3408 if (!full_string_p
3409 && olddsi
3410 && olddsi->nonzero_chars
3411 && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3412 && tree_int_cst_le (t1: newlen, t2: olddsi->nonzero_chars))
3413 {
3414 /* The SRC substring being written strictly overlaps
3415 a subsequence of the existing string OLDDSI. */
3416 newlen = olddsi->nonzero_chars;
3417 full_string_p = olddsi->full_string_p;
3418 }
3419
3420 if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3421 adjust_last_stmt (si: olddsi, stmt, is_strcat: false);
3422
3423 if (didx == 0)
3424 {
3425 didx = new_stridx (exp: dst);
3426 if (didx == 0)
3427 return;
3428 }
3429 oldlen = NULL_TREE;
3430 if (olddsi != NULL)
3431 {
3432 dsi = unshare_strinfo (si: olddsi);
3433 oldlen = olddsi->nonzero_chars;
3434 dsi->nonzero_chars = newlen;
3435 dsi->full_string_p = full_string_p;
3436 /* Break the chain, so adjust_related_strinfo on later pointers in
3437 the chain won't adjust this one anymore. */
3438 dsi->next = 0;
3439 dsi->stmt = NULL;
3440 dsi->endptr = NULL_TREE;
3441 }
3442 else
3443 {
3444 dsi = new_strinfo (ptr: dst, idx: didx, nonzero_chars: newlen, full_string_p);
3445 set_strinfo (idx: didx, si: dsi);
3446 find_equal_ptrs (ptr: dst, idx: didx);
3447 }
3448 dsi->writable = true;
3449 dsi->dont_invalidate = true;
3450 if (olddsi != NULL)
3451 {
3452 tree adj = NULL_TREE;
3453 location_t loc = gimple_location (g: stmt);
3454 if (oldlen == NULL_TREE)
3455 ;
3456 else if (integer_zerop (oldlen))
3457 adj = newlen;
3458 else if (TREE_CODE (oldlen) == INTEGER_CST
3459 || TREE_CODE (newlen) == INTEGER_CST)
3460 adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3461 fold_convert_loc (loc, TREE_TYPE (newlen),
3462 oldlen));
3463 if (adj != NULL_TREE)
3464 adjust_related_strinfos (loc, origsi: dsi, adj);
3465 else
3466 dsi->prev = 0;
3467 }
3468 /* memcpy src may not overlap dst, so src doesn't need to be
3469 invalidated either. */
3470 if (si != NULL)
3471 si->dont_invalidate = true;
3472
3473 if (full_string_p)
3474 {
3475 lhs = gimple_call_lhs (gs: stmt);
3476 switch (bcode)
3477 {
3478 case BUILT_IN_MEMCPY:
3479 case BUILT_IN_MEMCPY_CHK:
3480 /* Allow adjust_last_stmt to decrease this memcpy's size. */
3481 laststmt.stmt = stmt;
3482 laststmt.len = dsi->nonzero_chars;
3483 laststmt.stridx = dsi->idx;
3484 if (lhs)
3485 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3486 break;
3487 case BUILT_IN_MEMPCPY:
3488 case BUILT_IN_MEMPCPY_CHK:
3489 break;
3490 default:
3491 gcc_unreachable ();
3492 }
3493 }
3494}
3495
3496/* Handle a strcat-like ({strcat,__strcat_chk}) call.
3497 If strlen of the second argument is known, strlen of the first argument
3498 is increased by the length of the second argument. Furthermore, attempt
3499 to convert it to memcpy/strcpy if the length of the first argument
3500 is known. */
3501
3502void
3503strlen_pass::handle_builtin_strcat (built_in_function bcode)
3504{
3505 int idx, didx;
3506 tree srclen, args, type, fn, objsz, endptr;
3507 bool success;
3508 gimple *stmt = gsi_stmt (i: m_gsi);
3509 strinfo *si, *dsi;
3510 location_t loc = gimple_location (g: stmt);
3511
3512 tree src = gimple_call_arg (gs: stmt, index: 1);
3513 tree dst = gimple_call_arg (gs: stmt, index: 0);
3514
3515 /* Bail if the source is the same as destination. It will be diagnosed
3516 elsewhere. */
3517 if (operand_equal_p (src, dst, flags: 0))
3518 return;
3519
3520 tree lhs = gimple_call_lhs (gs: stmt);
3521
3522 didx = get_stridx (exp: dst, stmt);
3523 if (didx < 0)
3524 return;
3525
3526 dsi = NULL;
3527 if (didx > 0)
3528 dsi = get_strinfo (idx: didx);
3529
3530 srclen = NULL_TREE;
3531 si = NULL;
3532 idx = get_stridx (exp: src, stmt);
3533 if (idx < 0)
3534 srclen = build_int_cst (size_type_node, ~idx);
3535 else if (idx > 0)
3536 {
3537 si = get_strinfo (idx);
3538 if (si != NULL)
3539 srclen = get_string_length (si);
3540 }
3541
3542 /* Disable warning for the transformed statement? */
3543 opt_code no_warning_opt = no_warning;
3544
3545 if (dsi == NULL || get_string_length (si: dsi) == NULL_TREE)
3546 {
3547 {
3548 /* The concatenation always involves copying at least one byte
3549 (the terminating nul), even if the source string is empty.
3550 If the source is unknown assume it's one character long and
3551 used that as both sizes. */
3552 tree slen = srclen;
3553 if (slen)
3554 {
3555 tree type = TREE_TYPE (slen);
3556 slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3557 }
3558
3559 tree sptr = si && si->ptr ? si->ptr : src;
3560 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE,
3561 slen);
3562 if (no_warning_opt)
3563 suppress_warning (stmt, no_warning_opt);
3564 }
3565
3566 /* strcat (p, q) can be transformed into
3567 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3568 with length endptr - p if we need to compute the length
3569 later on. Don't do this transformation if we don't need
3570 it. */
3571 if (builtin_decl_implicit_p (fncode: BUILT_IN_STPCPY) && lhs == NULL_TREE)
3572 {
3573 if (didx == 0)
3574 {
3575 didx = new_stridx (exp: dst);
3576 if (didx == 0)
3577 return;
3578 }
3579 if (dsi == NULL)
3580 {
3581 dsi = new_strinfo (ptr: dst, idx: didx, NULL_TREE, full_string_p: false);
3582 set_strinfo (idx: didx, si: dsi);
3583 find_equal_ptrs (ptr: dst, idx: didx);
3584 }
3585 else
3586 {
3587 dsi = unshare_strinfo (si: dsi);
3588 dsi->nonzero_chars = NULL_TREE;
3589 dsi->full_string_p = false;
3590 dsi->next = 0;
3591 dsi->endptr = NULL_TREE;
3592 }
3593 dsi->writable = true;
3594 dsi->stmt = stmt;
3595 dsi->dont_invalidate = true;
3596 }
3597 return;
3598 }
3599
3600 tree dstlen = dsi->nonzero_chars;
3601 endptr = dsi->endptr;
3602
3603 dsi = unshare_strinfo (si: dsi);
3604 dsi->endptr = NULL_TREE;
3605 dsi->stmt = NULL;
3606 dsi->writable = true;
3607
3608 if (srclen != NULL_TREE)
3609 {
3610 dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3611 TREE_TYPE (dsi->nonzero_chars),
3612 dsi->nonzero_chars, srclen);
3613 gcc_assert (dsi->full_string_p);
3614 adjust_related_strinfos (loc, origsi: dsi, adj: srclen);
3615 dsi->dont_invalidate = true;
3616 }
3617 else
3618 {
3619 dsi->nonzero_chars = NULL;
3620 dsi->full_string_p = false;
3621 if (lhs == NULL_TREE && builtin_decl_implicit_p (fncode: BUILT_IN_STPCPY))
3622 dsi->dont_invalidate = true;
3623 }
3624
3625 if (si != NULL)
3626 /* strcat src may not overlap dst, so src doesn't need to be
3627 invalidated either. */
3628 si->dont_invalidate = true;
3629
3630 /* For now. Could remove the lhs from the call and add
3631 lhs = dst; afterwards. */
3632 if (lhs)
3633 return;
3634
3635 fn = NULL_TREE;
3636 objsz = NULL_TREE;
3637 switch (bcode)
3638 {
3639 case BUILT_IN_STRCAT:
3640 if (srclen != NULL_TREE)
3641 fn = builtin_decl_implicit (fncode: BUILT_IN_MEMCPY);
3642 else
3643 fn = builtin_decl_implicit (fncode: BUILT_IN_STRCPY);
3644 break;
3645 case BUILT_IN_STRCAT_CHK:
3646 if (srclen != NULL_TREE)
3647 fn = builtin_decl_explicit (fncode: BUILT_IN_MEMCPY_CHK);
3648 else
3649 fn = builtin_decl_explicit (fncode: BUILT_IN_STRCPY_CHK);
3650 objsz = gimple_call_arg (gs: stmt, index: 2);
3651 break;
3652 default:
3653 gcc_unreachable ();
3654 }
3655
3656 if (fn == NULL_TREE)
3657 return;
3658
3659 if (dsi && dstlen)
3660 {
3661 tree type = TREE_TYPE (dstlen);
3662
3663 /* Compute the size of the source sequence, including the nul. */
3664 tree srcsize = srclen ? srclen : size_zero_node;
3665 tree one = build_int_cst (type, 1);
3666 srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3667 tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3668 tree sptr = si && si->ptr ? si->ptr : src;
3669
3670 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, dstsize,
3671 srcsize);
3672 if (no_warning_opt)
3673 suppress_warning (stmt, no_warning_opt);
3674 }
3675
3676 tree len = NULL_TREE;
3677 if (srclen != NULL_TREE)
3678 {
3679 args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3680 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3681
3682 len = fold_convert_loc (loc, type, unshare_expr (srclen));
3683 len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3684 build_int_cst (type, 1));
3685 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
3686 GSI_SAME_STMT);
3687 }
3688 if (endptr)
3689 dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3690 else
3691 dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3692 fold_convert_loc (loc, sizetype,
3693 unshare_expr (dstlen)));
3694 dst = force_gimple_operand_gsi (&m_gsi, dst, true, NULL_TREE, true,
3695 GSI_SAME_STMT);
3696 if (objsz)
3697 {
3698 objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3699 fold_convert_loc (loc, TREE_TYPE (objsz),
3700 unshare_expr (dstlen)));
3701 objsz = force_gimple_operand_gsi (&m_gsi, objsz, true, NULL_TREE, true,
3702 GSI_SAME_STMT);
3703 }
3704 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3705 {
3706 fprintf (stream: dump_file, format: "Optimizing: ");
3707 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3708 }
3709 if (srclen != NULL_TREE)
3710 success = update_gimple_call (&m_gsi, fn, 3 + (objsz != NULL_TREE),
3711 dst, src, len, objsz);
3712 else
3713 success = update_gimple_call (&m_gsi, fn, 2 + (objsz != NULL_TREE),
3714 dst, src, objsz);
3715 if (success)
3716 {
3717 stmt = gsi_stmt (i: m_gsi);
3718 update_stmt (s: stmt);
3719 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3720 {
3721 fprintf (stream: dump_file, format: "into: ");
3722 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3723 }
3724 /* If srclen == NULL, note that current string length can be
3725 computed by transforming this strcpy into stpcpy. */
3726 if (srclen == NULL_TREE && dsi->dont_invalidate)
3727 dsi->stmt = stmt;
3728 adjust_last_stmt (si: dsi, stmt, is_strcat: true);
3729 if (srclen != NULL_TREE)
3730 {
3731 laststmt.stmt = stmt;
3732 laststmt.len = srclen;
3733 laststmt.stridx = dsi->idx;
3734 }
3735 }
3736 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3737 fprintf (stream: dump_file, format: "not possible.\n");
3738
3739 if (no_warning_opt)
3740 suppress_warning (stmt, no_warning_opt);
3741}
3742
3743/* Handle a call to an allocation function like alloca, malloc or calloc,
3744 or an ordinary allocation function declared with attribute alloc_size. */
3745
3746void
3747strlen_pass::handle_alloc_call (built_in_function bcode)
3748{
3749 gimple *stmt = gsi_stmt (i: m_gsi);
3750 tree lhs = gimple_call_lhs (gs: stmt);
3751 if (lhs == NULL_TREE)
3752 return;
3753
3754 gcc_assert (get_stridx (lhs, stmt) == 0);
3755 int idx = new_stridx (exp: lhs);
3756 tree length = NULL_TREE;
3757 if (bcode == BUILT_IN_CALLOC)
3758 length = build_int_cst (size_type_node, 0);
3759 strinfo *si = new_strinfo (ptr: lhs, idx, nonzero_chars: length, full_string_p: length != NULL_TREE);
3760 if (bcode == BUILT_IN_CALLOC)
3761 {
3762 /* Only set STMT for calloc and malloc. */
3763 si->stmt = stmt;
3764 /* Only set ENDPTR for calloc. */
3765 si->endptr = lhs;
3766 }
3767 else if (bcode == BUILT_IN_MALLOC)
3768 si->stmt = stmt;
3769
3770 /* Set ALLOC is set for all allocation functions. */
3771 si->alloc = stmt;
3772 set_strinfo (idx, si);
3773 si->writable = true;
3774 si->dont_invalidate = true;
3775}
3776
3777/* Handle a call to memset.
3778 After a call to calloc, memset(,0,) is unnecessary.
3779 memset(malloc(n),0,n) is calloc(n,1).
3780 return true when the call is transformed, false otherwise.
3781 When nonnull uses RVALS to determine range information. */
3782
3783bool
3784strlen_pass::handle_builtin_memset (bool *zero_write)
3785{
3786 gimple *memset_stmt = gsi_stmt (i: m_gsi);
3787 tree ptr = gimple_call_arg (gs: memset_stmt, index: 0);
3788 tree memset_val = gimple_call_arg (gs: memset_stmt, index: 1);
3789 tree memset_size = gimple_call_arg (gs: memset_stmt, index: 2);
3790
3791 /* Set to the non-constant offset added to PTR. */
3792 wide_int offrng[2];
3793 int idx1 = get_stridx (exp: ptr, stmt: memset_stmt, offrng, rvals: ptr_qry.rvals);
3794 if (idx1 == 0
3795 && TREE_CODE (memset_val) == INTEGER_CST
3796 && ((TREE_CODE (memset_size) == INTEGER_CST
3797 && !integer_zerop (memset_size))
3798 || TREE_CODE (memset_size) == SSA_NAME))
3799 {
3800 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << CHAR_TYPE_SIZE) - 1;
3801 bool full_string_p = (wi::to_wide (t: memset_val) & mask) == 0;
3802
3803 /* We only handle symbolic lengths when writing non-zero values. */
3804 if (full_string_p && TREE_CODE (memset_size) != INTEGER_CST)
3805 return false;
3806
3807 idx1 = new_stridx (exp: ptr);
3808 if (idx1 == 0)
3809 return false;
3810 tree newlen;
3811 if (full_string_p)
3812 newlen = build_int_cst (size_type_node, 0);
3813 else if (TREE_CODE (memset_size) == INTEGER_CST)
3814 newlen = fold_convert (size_type_node, memset_size);
3815 else
3816 newlen = memset_size;
3817
3818 strinfo *dsi = new_strinfo (ptr, idx: idx1, nonzero_chars: newlen, full_string_p);
3819 set_strinfo (idx: idx1, si: dsi);
3820 find_equal_ptrs (ptr, idx: idx1);
3821 dsi->dont_invalidate = true;
3822 dsi->writable = true;
3823 return false;
3824 }
3825
3826 if (idx1 <= 0)
3827 return false;
3828 strinfo *si1 = get_strinfo (idx: idx1);
3829 if (!si1)
3830 return false;
3831 gimple *alloc_stmt = si1->alloc;
3832 if (!alloc_stmt || !is_gimple_call (gs: alloc_stmt))
3833 return false;
3834 tree callee1 = gimple_call_fndecl (gs: alloc_stmt);
3835 if (!valid_builtin_call (stmt: alloc_stmt))
3836 return false;
3837 tree alloc_size = gimple_call_arg (gs: alloc_stmt, index: 0);
3838
3839 /* Check for overflow. */
3840 maybe_warn_overflow (stmt: memset_stmt, call_lhs: false, len: memset_size, NULL, plus_one: false, rawmem: true);
3841
3842 /* Bail when there is no statement associated with the destination
3843 (the statement may be null even when SI1->ALLOC is not). */
3844 if (!si1->stmt)
3845 return false;
3846
3847 /* Avoid optimizing if store is at a variable offset from the beginning
3848 of the allocated object. */
3849 if (offrng[0] != 0 || offrng[0] != offrng[1])
3850 return false;
3851
3852 /* Bail when the call writes a non-zero value. */
3853 if (!integer_zerop (memset_val))
3854 return false;
3855
3856 /* Let the caller know the memset call cleared the destination. */
3857 *zero_write = true;
3858
3859 enum built_in_function code1 = DECL_FUNCTION_CODE (decl: callee1);
3860 if (code1 == BUILT_IN_CALLOC)
3861 /* Not touching alloc_stmt */ ;
3862 else if (code1 == BUILT_IN_MALLOC
3863 && operand_equal_p (memset_size, alloc_size, flags: 0))
3864 {
3865 /* Replace the malloc + memset calls with calloc. */
3866 gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3867 update_gimple_call (&gsi1, builtin_decl_implicit (fncode: BUILT_IN_CALLOC), 2,
3868 alloc_size, build_one_cst (size_type_node));
3869 si1->nonzero_chars = build_int_cst (size_type_node, 0);
3870 si1->full_string_p = true;
3871 si1->stmt = gsi_stmt (i: gsi1);
3872 }
3873 else
3874 return false;
3875 tree lhs = gimple_call_lhs (gs: memset_stmt);
3876 unlink_stmt_vdef (memset_stmt);
3877 if (lhs)
3878 {
3879 gimple *assign = gimple_build_assign (lhs, ptr);
3880 gsi_replace (&m_gsi, assign, false);
3881 }
3882 else
3883 {
3884 gsi_remove (&m_gsi, true);
3885 release_defs (memset_stmt);
3886 }
3887
3888 return true;
3889}
3890
3891/* Return first such statement if RES is used in statements testing its
3892 equality to zero, and null otherwise. If EXCLUSIVE is true, return
3893 nonnull if and only RES is used in such expressions exclusively and
3894 in none other. */
3895
3896gimple *
3897use_in_zero_equality (tree res, bool exclusive)
3898{
3899 gimple *first_use = NULL;
3900
3901 use_operand_p use_p;
3902 imm_use_iterator iter;
3903
3904 FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3905 {
3906 gimple *use_stmt = USE_STMT (use_p);
3907
3908 if (is_gimple_debug (gs: use_stmt))
3909 continue;
3910
3911 if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN)
3912 {
3913 tree_code code = gimple_assign_rhs_code (gs: use_stmt);
3914 if (code == COND_EXPR)
3915 {
3916 tree cond_expr = gimple_assign_rhs1 (gs: use_stmt);
3917 if ((TREE_CODE (cond_expr) != EQ_EXPR
3918 && (TREE_CODE (cond_expr) != NE_EXPR))
3919 || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
3920 {
3921 if (exclusive)
3922 return NULL;
3923 continue;
3924 }
3925 }
3926 else if (code == EQ_EXPR || code == NE_EXPR)
3927 {
3928 if (!integer_zerop (gimple_assign_rhs2 (gs: use_stmt)))
3929 {
3930 if (exclusive)
3931 return NULL;
3932 continue;
3933 }
3934 }
3935 else if (exclusive)
3936 return NULL;
3937 else
3938 continue;
3939 }
3940 else if (gimple_code (g: use_stmt) == GIMPLE_COND)
3941 {
3942 tree_code code = gimple_cond_code (gs: use_stmt);
3943 if ((code != EQ_EXPR && code != NE_EXPR)
3944 || !integer_zerop (gimple_cond_rhs (gs: use_stmt)))
3945 {
3946 if (exclusive)
3947 return NULL;
3948 continue;
3949 }
3950 }
3951 else if (exclusive)
3952 return NULL;
3953 else
3954 continue;
3955
3956 if (!first_use)
3957 first_use = use_stmt;
3958 }
3959
3960 return first_use;
3961}
3962
3963/* Handle a call to memcmp. We try to handle small comparisons by
3964 converting them to load and compare, and replacing the call to memcmp
3965 with a __builtin_memcmp_eq call where possible.
3966 return true when call is transformed, return false otherwise. */
3967
3968bool
3969strlen_pass::handle_builtin_memcmp ()
3970{
3971 gcall *stmt = as_a <gcall *> (p: gsi_stmt (i: m_gsi));
3972 tree res = gimple_call_lhs (gs: stmt);
3973
3974 if (!res || !use_in_zero_equality (res))
3975 return false;
3976
3977 tree arg1 = gimple_call_arg (gs: stmt, index: 0);
3978 tree arg2 = gimple_call_arg (gs: stmt, index: 1);
3979 tree len = gimple_call_arg (gs: stmt, index: 2);
3980 unsigned HOST_WIDE_INT leni;
3981
3982 if (tree_fits_uhwi_p (len)
3983 && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (mode: word_mode)
3984 && pow2p_hwi (x: leni))
3985 {
3986 leni *= CHAR_TYPE_SIZE;
3987 unsigned align1 = get_pointer_alignment (arg1);
3988 unsigned align2 = get_pointer_alignment (arg2);
3989 unsigned align = MIN (align1, align2);
3990 scalar_int_mode mode;
3991 if (int_mode_for_size (size: leni, limit: 1).exists (mode: &mode)
3992 && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
3993 {
3994 location_t loc = gimple_location (g: stmt);
3995 tree type, off;
3996 type = build_nonstandard_integer_type (leni, 1);
3997 gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
3998 tree ptrtype = build_pointer_type_for_mode (char_type_node,
3999 ptr_mode, true);
4000 off = build_int_cst (ptrtype, 0);
4001 arg1 = build2_loc (loc, code: MEM_REF, type, arg0: arg1, arg1: off);
4002 arg2 = build2_loc (loc, code: MEM_REF, type, arg0: arg2, arg1: off);
4003 tree tem1 = fold_const_aggregate_ref (arg1);
4004 if (tem1)
4005 arg1 = tem1;
4006 tree tem2 = fold_const_aggregate_ref (arg2);
4007 if (tem2)
4008 arg2 = tem2;
4009 res = fold_convert_loc (loc, TREE_TYPE (res),
4010 fold_build2_loc (loc, NE_EXPR,
4011 boolean_type_node,
4012 arg1, arg2));
4013 gimplify_and_update_call_from_tree (&m_gsi, res);
4014 return true;
4015 }
4016 }
4017
4018 gimple_call_set_fndecl (gs: stmt, decl: builtin_decl_explicit (fncode: BUILT_IN_MEMCMP_EQ));
4019 return true;
4020}
4021
4022/* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
4023 of the string(s) referenced by ARG if it can be determined.
4024 If the length cannot be determined, sets *SIZE to the size of
4025 the array the string is stored in, if any. If no such array is
4026 known, sets *SIZE to -1. When the strings are nul-terminated sets
4027 *NULTERM to true, otherwise to false. When nonnull uses RVALS to
4028 determine range information. Returns true on success. */
4029
4030bool
4031strlen_pass::get_len_or_size (gimple *stmt, tree arg, int idx,
4032 unsigned HOST_WIDE_INT lenrng[2],
4033 unsigned HOST_WIDE_INT *size, bool *nulterm)
4034{
4035 /* Invalidate. */
4036 *size = HOST_WIDE_INT_M1U;
4037
4038 if (idx < 0)
4039 {
4040 /* IDX is the inverted constant string length. */
4041 lenrng[0] = ~idx;
4042 lenrng[1] = lenrng[0];
4043 *nulterm = true;
4044 return true;
4045 }
4046
4047 /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
4048 possible length + 1. */
4049 lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
4050
4051 if (strinfo *si = idx ? get_strinfo (idx) : NULL)
4052 {
4053 /* FIXME: Handle all this in_range_strlen_dynamic. */
4054 if (!si->nonzero_chars)
4055 ;
4056 else if (tree_fits_uhwi_p (si->nonzero_chars))
4057 {
4058 lenrng[0] = tree_to_uhwi (si->nonzero_chars);
4059 *nulterm = si->full_string_p;
4060 /* Set the upper bound only if the string is known to be
4061 nul-terminated, otherwise leave it at maximum + 1. */
4062 if (*nulterm)
4063 lenrng[1] = lenrng[0];
4064 }
4065 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
4066 {
4067 value_range r;
4068 if (get_range_query (cfun)->range_of_expr (r, expr: si->nonzero_chars)
4069 && !r.undefined_p ()
4070 && !r.varying_p ())
4071 {
4072 lenrng[0] = r.lower_bound ().to_uhwi ();
4073 lenrng[1] = r.upper_bound ().to_uhwi ();
4074 *nulterm = si->full_string_p;
4075 }
4076 }
4077 }
4078
4079 if (lenrng[0] != HOST_WIDE_INT_MAX)
4080 return true;
4081
4082 /* Compute the minimum and maximum real or possible lengths. */
4083 c_strlen_data lendata = { };
4084 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
4085 to have it set to the length of the longest string in a PHI. */
4086 lendata.maxbound = arg;
4087 get_range_strlen_dynamic (src: arg, stmt, pdata: &lendata, ptr_qry);
4088
4089 unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
4090 if (tree_fits_uhwi_p (lendata.maxbound)
4091 && !integer_all_onesp (lendata.maxbound))
4092 maxbound = tree_to_uhwi (lendata.maxbound);
4093
4094 if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
4095 {
4096 unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
4097 unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
4098
4099 /* The longest string in this data model. */
4100 const unsigned HOST_WIDE_INT lenmax
4101 = tree_to_uhwi (max_object_size ()) - 2;
4102
4103 if (maxbound == HOST_WIDE_INT_M1U)
4104 {
4105 lenrng[0] = minlen;
4106 lenrng[1] = maxlen;
4107 *nulterm = minlen == maxlen;
4108 }
4109 else if (maxlen < lenmax)
4110 {
4111 *size = maxbound + 1;
4112 *nulterm = false;
4113 }
4114 else
4115 return false;
4116
4117 return true;
4118 }
4119
4120 if (maxbound != HOST_WIDE_INT_M1U
4121 && lendata.maxlen
4122 && !integer_all_onesp (lendata.maxlen))
4123 {
4124 /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4125 of the longest string based on the sizes of the arrays referenced
4126 by ARG. */
4127 *size = maxbound + 1;
4128 *nulterm = false;
4129 return true;
4130 }
4131
4132 return false;
4133}
4134
4135/* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4136 the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4137 for a sufficiently large BOUND). If the result is based on the length
4138 of one string being greater than the longest string that would fit in
4139 the array pointer to by the argument, set *PLEN and *PSIZE to
4140 the corresponding length (or its complement when the string is known
4141 to be at least as long and need not be nul-terminated) and size.
4142 Otherwise return null. */
4143
4144tree
4145strlen_pass::strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
4146 tree arg2, int idx2,
4147 unsigned HOST_WIDE_INT bound,
4148 unsigned HOST_WIDE_INT len[2],
4149 unsigned HOST_WIDE_INT *psize)
4150{
4151 /* Determine the range the length of each string is in and whether it's
4152 known to be nul-terminated, or the size of the array it's stored in. */
4153 bool nul1, nul2;
4154 unsigned HOST_WIDE_INT siz1, siz2;
4155 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4156 if (!get_len_or_size (stmt, arg: arg1, idx: idx1, lenrng: len1rng, size: &siz1, nulterm: &nul1)
4157 || !get_len_or_size (stmt, arg: arg2, idx: idx2, lenrng: len2rng, size: &siz2, nulterm: &nul2))
4158 return NULL_TREE;
4159
4160 /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4161 to HWI_MAX when invalid. Adjust the length of each string to consider
4162 to be no more than BOUND. */
4163 if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4164 len1rng[0] = bound;
4165 if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4166 len1rng[1] = bound;
4167 if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4168 len2rng[0] = bound;
4169 if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4170 len2rng[1] = bound;
4171
4172 /* Two empty strings are equal. */
4173 if (len1rng[1] == 0 && len2rng[1] == 0)
4174 return integer_one_node;
4175
4176 /* The strings are definitely unequal when the lower bound of the length
4177 of one of them is greater than the length of the longest string that
4178 would fit into the other array. */
4179 if (len1rng[0] == HOST_WIDE_INT_MAX
4180 && len2rng[0] != HOST_WIDE_INT_MAX
4181 && ((len2rng[0] < bound && len2rng[0] >= siz1)
4182 || len2rng[0] > siz1))
4183 {
4184 *psize = siz1;
4185 len[0] = len1rng[0];
4186 /* Set LEN[0] to the lower bound of ARG1's length when it's
4187 nul-terminated or to the complement of its minimum length
4188 otherwise, */
4189 len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4190 return integer_zero_node;
4191 }
4192
4193 if (len2rng[0] == HOST_WIDE_INT_MAX
4194 && len1rng[0] != HOST_WIDE_INT_MAX
4195 && ((len1rng[0] < bound && len1rng[0] >= siz2)
4196 || len1rng[0] > siz2))
4197 {
4198 *psize = siz2;
4199 len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4200 len[1] = len2rng[0];
4201 return integer_zero_node;
4202 }
4203
4204 /* The strings are also definitely unequal when their lengths are unequal
4205 and at least one is nul-terminated. */
4206 if (len1rng[0] != HOST_WIDE_INT_MAX
4207 && len2rng[0] != HOST_WIDE_INT_MAX
4208 && ((len1rng[1] < len2rng[0] && nul1)
4209 || (len2rng[1] < len1rng[0] && nul2)))
4210 {
4211 if (bound <= len1rng[0] || bound <= len2rng[0])
4212 *psize = bound;
4213 else
4214 *psize = HOST_WIDE_INT_M1U;
4215
4216 len[0] = len1rng[0];
4217 len[1] = len2rng[0];
4218 return integer_zero_node;
4219 }
4220
4221 /* The string lengths may be equal or unequal. Even when equal and
4222 both strings nul-terminated, without the string contents there's
4223 no way to determine whether they are equal. */
4224 return NULL_TREE;
4225}
4226
4227/* Diagnose pointless calls to strcmp or strncmp STMT with string
4228 arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4229 whose result is used in equality expressions that evaluate to
4230 a constant due to one argument being longer than the size of
4231 the other. */
4232
4233static void
4234maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4235 unsigned HOST_WIDE_INT len[2],
4236 unsigned HOST_WIDE_INT siz)
4237{
4238 tree lhs = gimple_call_lhs (gs: stmt);
4239 gimple *use = use_in_zero_equality (res: lhs, /* exclusive = */ false);
4240 if (!use)
4241 return;
4242
4243 bool at_least = false;
4244
4245 /* Excessive LEN[i] indicates a lower bound. */
4246 if (len[0] > HOST_WIDE_INT_MAX)
4247 {
4248 at_least = true;
4249 len[0] = ~len[0];
4250 }
4251
4252 if (len[1] > HOST_WIDE_INT_MAX)
4253 {
4254 at_least = true;
4255 len[1] = ~len[1];
4256 }
4257
4258 unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4259
4260 /* FIXME: Include a note pointing to the declaration of the smaller
4261 array. */
4262 location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4263
4264 tree callee = gimple_call_fndecl (gs: stmt);
4265 bool warned = false;
4266 if (siz <= minlen && bound == -1)
4267 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4268 (at_least
4269 ? G_("%qD of a string of length %wu or more and "
4270 "an array of size %wu evaluates to nonzero")
4271 : G_("%qD of a string of length %wu and an array "
4272 "of size %wu evaluates to nonzero")),
4273 callee, minlen, siz);
4274 else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4275 {
4276 if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4277 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4278 "%qD of strings of length %wu and %wu "
4279 "and bound of %wu evaluates to nonzero",
4280 callee, len[0], len[1], bound);
4281 else
4282 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4283 "%qD of a string of length %wu, an array "
4284 "of size %wu and bound of %wu evaluates to "
4285 "nonzero",
4286 callee, minlen, siz, bound);
4287 }
4288
4289 if (!warned)
4290 return;
4291
4292 location_t use_loc = gimple_location (g: use);
4293 if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4294 inform (use_loc, "in this expression");
4295}
4296
4297
4298/* Optimize a call to strcmp or strncmp either by folding it to a constant
4299 when possible or by transforming the latter to the former. Warn about
4300 calls where the length of one argument is greater than the size of
4301 the array to which the other argument points if the latter's length
4302 is not known. Return true when the call has been transformed into
4303 another and false otherwise. */
4304
4305bool
4306strlen_pass::handle_builtin_string_cmp ()
4307{
4308 gcall *stmt = as_a <gcall *> (p: gsi_stmt (i: m_gsi));
4309 tree lhs = gimple_call_lhs (gs: stmt);
4310
4311 if (!lhs)
4312 return false;
4313
4314 tree arg1 = gimple_call_arg (gs: stmt, index: 0);
4315 tree arg2 = gimple_call_arg (gs: stmt, index: 1);
4316 int idx1 = get_stridx (exp: arg1, stmt);
4317 int idx2 = get_stridx (exp: arg2, stmt);
4318
4319 /* For strncmp set to the value of the third argument if known. */
4320 HOST_WIDE_INT bound = -1;
4321 tree len = NULL_TREE;
4322 /* Extract the strncmp bound. */
4323 if (gimple_call_num_args (gs: stmt) == 3)
4324 {
4325 len = gimple_call_arg (gs: stmt, index: 2);
4326 if (tree_fits_shwi_p (len))
4327 bound = tree_to_shwi (len);
4328
4329 /* If the bound argument is NOT known, do nothing. */
4330 if (bound < 0)
4331 return false;
4332 }
4333
4334 /* Avoid folding if either argument is not a nul-terminated array.
4335 Defer warning until later. */
4336 if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4337 || !check_nul_terminated_array (NULL_TREE, arg2, len))
4338 return false;
4339
4340 {
4341 /* Set to the length of one argument (or its complement if it's
4342 the lower bound of a range) and the size of the array storing
4343 the other if the result is based on the former being equal to
4344 or greater than the latter. */
4345 unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4346 unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4347
4348 /* Try to determine if the two strings are either definitely equal
4349 or definitely unequal and if so, either fold the result to zero
4350 (when equal) or set the range of the result to ~[0, 0] otherwise. */
4351 if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound,
4352 len, psize: &siz))
4353 {
4354 if (integer_zerop (eqz))
4355 {
4356 maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4357
4358 /* When the lengths of the first two string arguments are
4359 known to be unequal set the range of the result to non-zero.
4360 This allows the call to be eliminated if its result is only
4361 used in tests for equality to zero. */
4362 value_range nz;
4363 nz.set_nonzero (TREE_TYPE (lhs));
4364 set_range_info (lhs, nz);
4365 return false;
4366 }
4367 /* When the two strings are definitely equal (such as when they
4368 are both empty) fold the call to the constant result. */
4369 replace_call_with_value (&m_gsi, integer_zero_node);
4370 return true;
4371 }
4372 }
4373
4374 /* Return if nothing is known about the strings pointed to by ARG1
4375 and ARG2. */
4376 if (idx1 == 0 && idx2 == 0)
4377 return false;
4378
4379 /* Determine either the length or the size of each of the strings,
4380 whichever is available. */
4381 HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4382 HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4383
4384 {
4385 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4386 unsigned HOST_WIDE_INT arsz1, arsz2;
4387 bool nulterm[2];
4388
4389 if (!get_len_or_size (stmt, arg: arg1, idx: idx1, lenrng: len1rng, size: &arsz1, nulterm)
4390 || !get_len_or_size (stmt, arg: arg2, idx: idx2, lenrng: len2rng, size: &arsz2, nulterm: nulterm + 1))
4391 return false;
4392
4393 if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4394 cstlen1 = len1rng[0];
4395 else if (arsz1 < HOST_WIDE_INT_M1U)
4396 arysiz1 = arsz1;
4397
4398 if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4399 cstlen2 = len2rng[0];
4400 else if (arsz2 < HOST_WIDE_INT_M1U)
4401 arysiz2 = arsz2;
4402 }
4403
4404 /* Bail if neither the string length nor the size of the array
4405 it is stored in can be determined. */
4406 if ((cstlen1 < 0 && arysiz1 < 0)
4407 || (cstlen2 < 0 && arysiz2 < 0)
4408 || (cstlen1 < 0 && cstlen2 < 0))
4409 return false;
4410
4411 if (cstlen1 >= 0)
4412 ++cstlen1;
4413 if (cstlen2 >= 0)
4414 ++cstlen2;
4415
4416 /* The exact number of characters to compare. */
4417 HOST_WIDE_INT cmpsiz;
4418 if (cstlen1 >= 0 && cstlen2 >= 0)
4419 cmpsiz = MIN (cstlen1, cstlen2);
4420 else if (cstlen1 >= 0)
4421 cmpsiz = cstlen1;
4422 else
4423 cmpsiz = cstlen2;
4424 if (bound >= 0)
4425 cmpsiz = MIN (cmpsiz, bound);
4426 /* The size of the array in which the unknown string is stored. */
4427 HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4428
4429 if ((varsiz < 0 || cmpsiz < varsiz) && use_in_zero_equality (res: lhs))
4430 {
4431 /* If the known length is less than the size of the other array
4432 and the strcmp result is only used to test equality to zero,
4433 transform the call to the equivalent _eq call. */
4434 if (tree fn = builtin_decl_implicit (fncode: bound < 0 ? BUILT_IN_STRCMP_EQ
4435 : BUILT_IN_STRNCMP_EQ))
4436 {
4437 tree n = build_int_cst (size_type_node, cmpsiz);
4438 update_gimple_call (&m_gsi, fn, 3, arg1, arg2, n);
4439 return true;
4440 }
4441 }
4442
4443 return false;
4444}
4445
4446/* Handle a POINTER_PLUS_EXPR statement.
4447 For p = "abcd" + 2; compute associated length, or if
4448 p = q + off is pointing to a '\0' character of a string, call
4449 zero_length_string on it. */
4450
4451void
4452strlen_pass::handle_pointer_plus ()
4453{
4454 gimple *stmt = gsi_stmt (i: m_gsi);
4455 tree lhs = gimple_assign_lhs (gs: stmt), off;
4456 int idx = get_stridx (exp: gimple_assign_rhs1 (gs: stmt), stmt);
4457 strinfo *si, *zsi;
4458
4459 if (idx == 0)
4460 return;
4461
4462 if (idx < 0)
4463 {
4464 tree off = gimple_assign_rhs2 (gs: stmt);
4465 if (tree_fits_uhwi_p (off)
4466 && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4467 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4468 = ~(~idx - (int) tree_to_uhwi (off));
4469 return;
4470 }
4471
4472 si = get_strinfo (idx);
4473 if (si == NULL || si->nonzero_chars == NULL_TREE)
4474 return;
4475
4476 off = gimple_assign_rhs2 (gs: stmt);
4477 zsi = NULL;
4478 if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, flags: 0))
4479 zsi = zero_length_string (ptr: lhs, chainsi: si);
4480 else if (TREE_CODE (off) == SSA_NAME)
4481 {
4482 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4483 if (gimple_assign_single_p (gs: def_stmt)
4484 && si->full_string_p
4485 && operand_equal_p (si->nonzero_chars,
4486 gimple_assign_rhs1 (gs: def_stmt), flags: 0))
4487 zsi = zero_length_string (ptr: lhs, chainsi: si);
4488 }
4489 if (zsi != NULL
4490 && si->endptr != NULL_TREE
4491 && si->endptr != lhs
4492 && TREE_CODE (si->endptr) == SSA_NAME)
4493 {
4494 enum tree_code rhs_code
4495 = useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4496 ? SSA_NAME : NOP_EXPR;
4497 gimple_assign_set_rhs_with_ops (gsi: &m_gsi, code: rhs_code, op1: si->endptr);
4498 gcc_assert (gsi_stmt (m_gsi) == stmt);
4499 update_stmt (s: stmt);
4500 }
4501}
4502
4503/* Set LENRANGE to the number of nonzero bytes for a store of TYPE and
4504 clear all flags. Return true on success and false on failure. */
4505
4506static bool
4507nonzero_bytes_for_type (tree type, unsigned lenrange[3],
4508 bool *nulterm, bool *allnul, bool *allnonnul)
4509{
4510 /* Use the size of the type of the expression as the size of the store,
4511 and set the upper bound of the length range to that of the size.
4512 Nothing is known about the contents so clear all flags. */
4513 tree typesize = TYPE_SIZE_UNIT (type);
4514 if (!type)
4515 return false;
4516
4517 if (!tree_fits_uhwi_p (typesize))
4518 return false;
4519
4520 unsigned HOST_WIDE_INT sz = tree_to_uhwi (typesize);
4521 if (sz > UINT_MAX)
4522 return false;
4523
4524 lenrange[2] = sz;
4525 lenrange[1] = lenrange[2] ? lenrange[2] - 1 : 0;
4526 lenrange[0] = 0;
4527 *nulterm = false;
4528 *allnul = false;
4529 *allnonnul = false;
4530 return true;
4531}
4532
4533/* Recursively determine the minimum and maximum number of leading nonzero
4534 bytes in the representation of EXP at memory state VUSE and set
4535 LENRANGE[0] and LENRANGE[1] to each.
4536 Sets LENRANGE[2] to the total size of the access (which may be less
4537 than LENRANGE[1] when what's being referenced by EXP is a pointer
4538 rather than an array).
4539 Sets *NULTERM if the representation contains a zero byte, sets *ALLNUL
4540 if all the bytes are zero, and *ALLNONNUL is all are nonzero.
4541 OFFSET and NBYTES are the offset into the representation and
4542 the size of the access to it determined from an ADDR_EXPR (i.e.,
4543 a pointer) or MEM_REF or zero for other expressions.
4544 Uses RVALS to determine range information.
4545 Avoids recursing deeper than the limits in SNLIM allow.
4546 Returns true on success and false otherwise. */
4547
4548bool
4549strlen_pass::count_nonzero_bytes (tree exp, tree vuse, gimple *stmt,
4550 unsigned HOST_WIDE_INT offset,
4551 unsigned HOST_WIDE_INT nbytes,
4552 unsigned lenrange[3], bool *nulterm,
4553 bool *allnul, bool *allnonnul,
4554 ssa_name_limit_t &snlim)
4555{
4556 if (TREE_CODE (exp) == SSA_NAME)
4557 {
4558 /* Handle non-zero single-character stores specially. */
4559 tree type = TREE_TYPE (exp);
4560 if (TREE_CODE (type) == INTEGER_TYPE
4561 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4562 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4563 && tree_expr_nonzero_p (exp))
4564 {
4565 /* If the character EXP is known to be non-zero (even if its
4566 exact value is not known) recurse once to set the range
4567 for an arbitrary constant. */
4568 exp = build_int_cst (type, 1);
4569 return count_nonzero_bytes (exp, vuse, stmt,
4570 offset, nbytes: 1, lenrange,
4571 nulterm, allnul, allnonnul, snlim);
4572 }
4573
4574 gimple *g = SSA_NAME_DEF_STMT (exp);
4575 if (gimple_assign_single_p (gs: g))
4576 {
4577 exp = gimple_assign_rhs1 (gs: g);
4578 if (!DECL_P (exp)
4579 && TREE_CODE (exp) != CONSTRUCTOR
4580 && TREE_CODE (exp) != MEM_REF)
4581 return false;
4582 /* Handle DECLs, CONSTRUCTOR and MEM_REF below. */
4583 stmt = g;
4584 }
4585 else if (gimple_code (g) == GIMPLE_PHI)
4586 {
4587 /* Avoid processing an SSA_NAME that has already been visited
4588 or if an SSA_NAME limit has been reached. Indicate success
4589 if the former and failure if the latter. */
4590 if (int res = snlim.next_phi (exp))
4591 return res > 0;
4592
4593 /* Determine the minimum and maximum from the PHI arguments. */
4594 unsigned int n = gimple_phi_num_args (gs: g);
4595 for (unsigned i = 0; i != n; i++)
4596 {
4597 tree def = gimple_phi_arg_def (gs: g, index: i);
4598 if (!count_nonzero_bytes (exp: def, vuse, stmt: g,
4599 offset, nbytes, lenrange, nulterm,
4600 allnul, allnonnul, snlim))
4601 return false;
4602 }
4603
4604 return true;
4605 }
4606 }
4607
4608 if (TREE_CODE (exp) == CONSTRUCTOR)
4609 {
4610 if (nbytes)
4611 /* If NBYTES has already been determined by an outer MEM_REF
4612 fail rather than overwriting it (this shouldn't happen). */
4613 return false;
4614
4615 tree type = TREE_TYPE (exp);
4616 tree size = TYPE_SIZE_UNIT (type);
4617 if (!size || !tree_fits_uhwi_p (size))
4618 return false;
4619
4620 unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size);
4621 if (byte_size < offset)
4622 return false;
4623
4624 nbytes = byte_size - offset;
4625 }
4626
4627 if (TREE_CODE (exp) == MEM_REF)
4628 {
4629 if (nbytes)
4630 return false;
4631
4632 tree arg = TREE_OPERAND (exp, 0);
4633 tree off = TREE_OPERAND (exp, 1);
4634
4635 if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4636 return false;
4637
4638 unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4639 if (INT_MAX < wioff)
4640 return false;
4641
4642 offset += wioff;
4643 if (INT_MAX < offset)
4644 return false;
4645
4646 /* The size of the MEM_REF access determines the number of bytes. */
4647 tree type = TREE_TYPE (exp);
4648 tree typesize = TYPE_SIZE_UNIT (type);
4649 if (!typesize || !tree_fits_uhwi_p (typesize))
4650 return false;
4651 nbytes = tree_to_uhwi (typesize);
4652 if (!nbytes)
4653 return false;
4654
4655 /* Handle MEM_REF = SSA_NAME types of assignments. */
4656 return count_nonzero_bytes_addr (exp: arg, vuse, stmt,
4657 offset, nbytes, lenrange, nulterm,
4658 allnul, allnonnul, snlim);
4659 }
4660
4661 if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4662 {
4663 /* If EXP can be folded into a constant use the result. Otherwise
4664 proceed to use EXP to determine a range of the result. */
4665 if (tree fold_exp = ctor_for_folding (exp))
4666 if (fold_exp != error_mark_node)
4667 exp = fold_exp;
4668 }
4669
4670 const char *prep = NULL;
4671 if (TREE_CODE (exp) == STRING_CST)
4672 {
4673 unsigned nchars = TREE_STRING_LENGTH (exp);
4674 if (nchars < offset)
4675 return false;
4676
4677 if (!nbytes)
4678 /* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4679 (i.e., it's the size of a pointer), or from MEM_REF (as the size
4680 of the access), set it here to the size of the string, including
4681 all internal and trailing nuls if the string has any. */
4682 nbytes = nchars - offset;
4683 else if (nchars - offset < nbytes)
4684 return false;
4685
4686 prep = TREE_STRING_POINTER (exp) + offset;
4687 }
4688
4689 unsigned char buf[256];
4690 if (!prep)
4691 {
4692 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4693 return false;
4694 /* If the pointer to representation hasn't been set above
4695 for STRING_CST point it at the buffer. */
4696 prep = reinterpret_cast <char *>(buf);
4697 /* Try to extract the representation of the constant object
4698 or expression starting from the offset. */
4699 unsigned repsize = native_encode_expr (exp, buf, sizeof buf, off: offset);
4700 if (repsize < nbytes)
4701 {
4702 /* This should only happen when REPSIZE is zero because EXP
4703 doesn't denote an object with a known initializer, except
4704 perhaps when the reference reads past its end. */
4705 lenrange[0] = 0;
4706 prep = NULL;
4707 }
4708 else if (!nbytes)
4709 nbytes = repsize;
4710 else if (nbytes < repsize)
4711 return false;
4712 }
4713
4714 if (!nbytes)
4715 return nonzero_bytes_for_type (TREE_TYPE (exp), lenrange,
4716 nulterm, allnul, allnonnul);
4717
4718 /* Compute the number of leading nonzero bytes in the representation
4719 and update the minimum and maximum. */
4720 unsigned HOST_WIDE_INT n = prep ? strnlen (string: prep, maxlen: nbytes) : nbytes;
4721
4722 if (n < lenrange[0])
4723 lenrange[0] = n;
4724 if (lenrange[1] < n)
4725 lenrange[1] = n;
4726
4727 /* Set the size of the representation. */
4728 if (lenrange[2] < nbytes)
4729 lenrange[2] = nbytes;
4730
4731 /* Clear NULTERM if none of the bytes is zero. */
4732 if (n == nbytes)
4733 *nulterm = false;
4734
4735 if (n)
4736 {
4737 /* When the initial number of non-zero bytes N is non-zero, reset
4738 *ALLNUL; if N is less than that the size of the representation
4739 also clear *ALLNONNUL. */
4740 *allnul = false;
4741 if (n < nbytes)
4742 *allnonnul = false;
4743 }
4744 else if (*allnul || *allnonnul)
4745 {
4746 *allnonnul = false;
4747
4748 if (*allnul)
4749 {
4750 /* When either ALLNUL is set and N is zero, also determine
4751 whether all subsequent bytes after the first one (which
4752 is nul) are zero or nonzero and clear ALLNUL if not. */
4753 for (const char *p = prep; p != prep + nbytes; ++p)
4754 if (*p)
4755 {
4756 *allnul = false;
4757 break;
4758 }
4759 }
4760 }
4761
4762 return true;
4763}
4764
4765/* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4766 bytes that are pointed to by EXP, which should be a pointer. */
4767
4768bool
4769strlen_pass::count_nonzero_bytes_addr (tree exp, tree vuse, gimple *stmt,
4770 unsigned HOST_WIDE_INT offset,
4771 unsigned HOST_WIDE_INT nbytes,
4772 unsigned lenrange[3], bool *nulterm,
4773 bool *allnul, bool *allnonnul,
4774 ssa_name_limit_t &snlim)
4775{
4776 int idx = get_stridx (exp, stmt);
4777 if (idx > 0)
4778 {
4779 /* get_strinfo reflects string lengths before the current statement,
4780 where the current statement is the outermost count_nonzero_bytes
4781 stmt. If there are any stores in between stmt and that
4782 current statement, the string length information might describe
4783 something significantly different. */
4784 if (gimple_vuse (g: stmt) != vuse)
4785 return false;
4786
4787 strinfo *si = get_strinfo (idx);
4788 if (!si)
4789 return false;
4790
4791 /* Handle both constant lengths as well non-constant lengths
4792 in some range. */
4793 unsigned HOST_WIDE_INT minlen, maxlen;
4794 if (tree_fits_shwi_p (si->nonzero_chars))
4795 minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4796 else if (si->nonzero_chars
4797 && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4798 {
4799 value_range vr;
4800 if (!ptr_qry.rvals->range_of_expr (r&: vr, expr: si->nonzero_chars, stmt)
4801 || vr.undefined_p ()
4802 || vr.varying_p ())
4803 return false;
4804
4805 minlen = vr.lower_bound ().to_uhwi ();
4806 maxlen = vr.upper_bound ().to_uhwi ();
4807 }
4808 else
4809 return false;
4810
4811 if (maxlen < offset)
4812 return false;
4813
4814 minlen = minlen < offset ? 0 : minlen - offset;
4815 maxlen -= offset;
4816 if (maxlen + 1 < nbytes)
4817 return false;
4818
4819 if (nbytes <= minlen)
4820 *nulterm = false;
4821
4822 if (nbytes < minlen)
4823 {
4824 minlen = nbytes;
4825 if (nbytes < maxlen)
4826 maxlen = nbytes;
4827 }
4828
4829 if (minlen < lenrange[0])
4830 lenrange[0] = minlen;
4831 if (lenrange[1] < maxlen)
4832 lenrange[1] = maxlen;
4833
4834 if (lenrange[2] < nbytes)
4835 lenrange[2] = nbytes;
4836
4837 /* Since only the length of the string are known and not its contents,
4838 clear ALLNUL and ALLNONNUL purely on the basis of the length. */
4839 *allnul = false;
4840 if (minlen < nbytes)
4841 *allnonnul = false;
4842
4843 return true;
4844 }
4845
4846 if (TREE_CODE (exp) == ADDR_EXPR)
4847 return count_nonzero_bytes (TREE_OPERAND (exp, 0), vuse, stmt,
4848 offset, nbytes,
4849 lenrange, nulterm, allnul, allnonnul, snlim);
4850
4851 if (TREE_CODE (exp) == SSA_NAME)
4852 {
4853 gimple *g = SSA_NAME_DEF_STMT (exp);
4854 if (gimple_code (g) == GIMPLE_PHI)
4855 {
4856 /* Avoid processing an SSA_NAME that has already been visited
4857 or if an SSA_NAME limit has been reached. Indicate success
4858 if the former and failure if the latter. */
4859 if (int res = snlim.next_phi (exp))
4860 return res > 0;
4861
4862 /* Determine the minimum and maximum from the PHI arguments. */
4863 unsigned int n = gimple_phi_num_args (gs: g);
4864 for (unsigned i = 0; i != n; i++)
4865 {
4866 tree def = gimple_phi_arg_def (gs: g, index: i);
4867 if (!count_nonzero_bytes_addr (exp: def, vuse, stmt: g,
4868 offset, nbytes, lenrange,
4869 nulterm, allnul, allnonnul,
4870 snlim))
4871 return false;
4872 }
4873
4874 return true;
4875 }
4876 }
4877
4878 /* Otherwise we don't know anything. */
4879 lenrange[0] = 0;
4880 if (lenrange[1] < nbytes)
4881 lenrange[1] = nbytes;
4882 if (lenrange[2] < nbytes)
4883 lenrange[2] = nbytes;
4884 *nulterm = false;
4885 *allnul = false;
4886 *allnonnul = false;
4887 return true;
4888}
4889
4890/* Same as above except with an implicit SSA_NAME limit. When EXPR_OR_TYPE
4891 is a type rather than an expression use its size to compute the range.
4892 RVALS is used to determine ranges of dynamically computed string lengths
4893 (the results of strlen). */
4894
4895bool
4896strlen_pass::count_nonzero_bytes (tree expr_or_type, gimple *stmt,
4897 unsigned lenrange[3], bool *nulterm,
4898 bool *allnul, bool *allnonnul)
4899{
4900 if (TYPE_P (expr_or_type))
4901 return nonzero_bytes_for_type (type: expr_or_type, lenrange,
4902 nulterm, allnul, allnonnul);
4903
4904 /* Set to optimistic values so the caller doesn't have to worry about
4905 initializing these and to what. On success, the function will clear
4906 these if it determines their values are different but being recursive
4907 it never sets either to true. On failure, their values are
4908 unspecified. */
4909 *nulterm = true;
4910 *allnul = true;
4911 *allnonnul = true;
4912
4913 ssa_name_limit_t snlim;
4914 tree expr = expr_or_type;
4915 return count_nonzero_bytes (exp: expr, vuse: gimple_vuse (g: stmt), stmt,
4916 offset: 0, nbytes: 0, lenrange, nulterm, allnul, allnonnul,
4917 snlim);
4918}
4919
4920/* Handle a single or multibyte store other than by a built-in function,
4921 either via a single character assignment or by multi-byte assignment
4922 either via MEM_REF or via a type other than char (such as in
4923 '*(int*)a = 12345'). Return true to let the caller advance *GSI to
4924 the next statement in the basic block and false otherwise. */
4925
4926bool
4927strlen_pass::handle_store (bool *zero_write)
4928{
4929 gimple *stmt = gsi_stmt (i: m_gsi);
4930 /* The LHS and RHS of the store. The RHS is null if STMT is a function
4931 call. STORETYPE is the type of the store (determined from either
4932 the RHS of the assignment statement or the LHS of a function call. */
4933 tree lhs, rhs, storetype;
4934 if (is_gimple_assign (gs: stmt))
4935 {
4936 lhs = gimple_assign_lhs (gs: stmt);
4937 rhs = gimple_assign_rhs1 (gs: stmt);
4938 storetype = TREE_TYPE (rhs);
4939 }
4940 else if (is_gimple_call (gs: stmt))
4941 {
4942 lhs = gimple_call_lhs (gs: stmt);
4943 rhs = NULL_TREE;
4944 storetype = TREE_TYPE (lhs);
4945 }
4946 else
4947 return true;
4948
4949 tree ssaname = NULL_TREE;
4950 strinfo *si = NULL;
4951 int idx = -1;
4952
4953 range_query *const rvals = ptr_qry.rvals;
4954
4955 /* The offset of the first byte in LHS modified by the store. */
4956 unsigned HOST_WIDE_INT offset = 0;
4957
4958 if (TREE_CODE (lhs) == MEM_REF
4959 && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4960 {
4961 tree mem_offset = TREE_OPERAND (lhs, 1);
4962 if (tree_fits_uhwi_p (mem_offset))
4963 {
4964 /* Get the strinfo for the base, and use it if it starts with at
4965 least OFFSET nonzero characters. This is trivially true if
4966 OFFSET is zero. */
4967 offset = tree_to_uhwi (mem_offset);
4968 idx = get_stridx (TREE_OPERAND (lhs, 0), stmt);
4969 if (idx > 0)
4970 si = get_strinfo (idx);
4971 if (offset == 0)
4972 ssaname = TREE_OPERAND (lhs, 0);
4973 else if (si == NULL
4974 || compare_nonzero_chars (si, stmt, off: offset, rvals) < 0)
4975 {
4976 *zero_write = rhs ? initializer_zerop (rhs) : false;
4977
4978 bool dummy;
4979 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4980 if (count_nonzero_bytes (expr_or_type: rhs ? rhs : storetype, stmt, lenrange,
4981 nulterm: &dummy, allnul: &dummy, allnonnul: &dummy))
4982 maybe_warn_overflow (stmt, call_lhs: true, len: lenrange[2]);
4983
4984 return true;
4985 }
4986 }
4987 }
4988 else
4989 {
4990 idx = get_addr_stridx (exp: lhs, stmt, NULL_TREE, offset_out: &offset, rvals);
4991 if (idx > 0)
4992 si = get_strinfo (idx);
4993 }
4994
4995 /* Minimum and maximum leading non-zero bytes and the size of the store. */
4996 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4997
4998 /* Set to the minimum length of the string being assigned if known. */
4999 unsigned HOST_WIDE_INT rhs_minlen;
5000
5001 /* STORING_NONZERO_P is true iff not all stored characters are zero.
5002 STORING_ALL_NONZERO_P is true if all stored characters are zero.
5003 STORING_ALL_ZEROS_P is true iff all stored characters are zero.
5004 Both are false when it's impossible to determine which is true. */
5005 bool storing_nonzero_p;
5006 bool storing_all_nonzero_p;
5007 bool storing_all_zeros_p;
5008 /* FULL_STRING_P is set when the stored sequence of characters form
5009 a nul-terminated string. */
5010 bool full_string_p;
5011
5012 const bool ranges_valid
5013 = count_nonzero_bytes (expr_or_type: rhs ? rhs : storetype, stmt,
5014 lenrange, nulterm: &full_string_p,
5015 allnul: &storing_all_zeros_p, allnonnul: &storing_all_nonzero_p);
5016
5017 if (ranges_valid)
5018 {
5019 rhs_minlen = lenrange[0];
5020 storing_nonzero_p = lenrange[1] > 0;
5021 *zero_write = storing_all_zeros_p;
5022
5023 maybe_warn_overflow (stmt, call_lhs: true, len: lenrange[2]);
5024 }
5025 else
5026 {
5027 rhs_minlen = HOST_WIDE_INT_M1U;
5028 full_string_p = false;
5029 storing_nonzero_p = false;
5030 storing_all_zeros_p = false;
5031 storing_all_nonzero_p = false;
5032 }
5033
5034 if (si != NULL)
5035 {
5036 /* The corresponding element is set to 1 if the first and last
5037 element, respectively, of the sequence of characters being
5038 written over the string described by SI ends before
5039 the terminating nul (if it has one), to zero if the nul is
5040 being overwritten but not beyond, or negative otherwise. */
5041 int store_before_nul[2];
5042 if (ranges_valid)
5043 {
5044 /* The offset of the last stored byte. */
5045 unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
5046 store_before_nul[0]
5047 = compare_nonzero_chars (si, stmt, off: offset, rvals);
5048 if (endoff == offset)
5049 store_before_nul[1] = store_before_nul[0];
5050 else
5051 store_before_nul[1]
5052 = compare_nonzero_chars (si, stmt, off: endoff, rvals);
5053 }
5054 else
5055 {
5056 store_before_nul[0]
5057 = compare_nonzero_chars (si, stmt, off: offset, rvals);
5058 store_before_nul[1] = store_before_nul[0];
5059 gcc_assert (offset == 0 || store_before_nul[0] >= 0);
5060 }
5061
5062 if (storing_all_zeros_p
5063 && store_before_nul[0] == 0
5064 && store_before_nul[1] == 0
5065 && si->full_string_p)
5066 {
5067 /* When overwriting a '\0' with a '\0', the store can be removed
5068 if we know it has been stored in the current function. */
5069 if (!stmt_could_throw_p (cfun, stmt) && si->writable)
5070 {
5071 unlink_stmt_vdef (stmt);
5072 release_defs (stmt);
5073 gsi_remove (&m_gsi, true);
5074 return false;
5075 }
5076 else
5077 {
5078 si->writable = true;
5079 gsi_next (i: &m_gsi);
5080 return false;
5081 }
5082 }
5083
5084 if (store_before_nul[1] > 0
5085 && storing_nonzero_p
5086 && lenrange[0] == lenrange[1]
5087 && lenrange[0] == lenrange[2]
5088 && TREE_CODE (storetype) == INTEGER_TYPE)
5089 {
5090 /* Handle a store of one or more non-nul characters that ends
5091 before the terminating nul of the destination and so does
5092 not affect its length
5093 If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
5094 and if we aren't storing '\0', we know that the length of
5095 the string and any other zero terminated string in memory
5096 remains the same. In that case we move to the next gimple
5097 statement and return to signal the caller that it shouldn't
5098 invalidate anything.
5099
5100 This is beneficial for cases like:
5101
5102 char p[20];
5103 void foo (char *q)
5104 {
5105 strcpy (p, "foobar");
5106 size_t len = strlen (p); // can be folded to 6
5107 size_t len2 = strlen (q); // has to be computed
5108 p[0] = 'X';
5109 size_t len3 = strlen (p); // can be folded to 6
5110 size_t len4 = strlen (q); // can be folded to len2
5111 bar (len, len2, len3, len4);
5112 } */
5113 gsi_next (i: &m_gsi);
5114 return false;
5115 }
5116
5117 if (storing_nonzero_p
5118 || storing_all_zeros_p
5119 || (full_string_p && lenrange[1] == 0)
5120 || (offset != 0 && store_before_nul[1] > 0))
5121 {
5122 /* When STORING_NONZERO_P, we know that the string will start
5123 with at least OFFSET + 1 nonzero characters. If storing
5124 a single character, set si->NONZERO_CHARS to the result.
5125 If storing multiple characters, try to determine the number
5126 of leading non-zero characters and set si->NONZERO_CHARS to
5127 the result instead.
5128
5129 When STORING_ALL_ZEROS_P, or the first byte written is zero,
5130 i.e. FULL_STRING_P && LENRANGE[1] == 0, we know that the
5131 string is now OFFSET characters long.
5132
5133 Otherwise, we're storing an unknown value at offset OFFSET,
5134 so need to clip the nonzero_chars to OFFSET.
5135 Use the minimum length of the string (or individual character)
5136 being stored if it's known. Otherwise, STORING_NONZERO_P
5137 guarantees it's at least 1. */
5138 HOST_WIDE_INT len
5139 = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5140 location_t loc = gimple_location (g: stmt);
5141 tree oldlen = si->nonzero_chars;
5142 if (store_before_nul[1] == 0 && si->full_string_p)
5143 /* We're overwriting the nul terminator with a nonzero or
5144 unknown character. If the previous stmt was a memcpy,
5145 its length may be decreased. */
5146 adjust_last_stmt (si, stmt, is_strcat: false);
5147 si = unshare_strinfo (si);
5148 if (storing_nonzero_p)
5149 {
5150 gcc_assert (len >= 0);
5151 si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5152 }
5153 else
5154 si->nonzero_chars = build_int_cst (size_type_node, offset);
5155
5156 /* Set FULL_STRING_P only if the length of the strings being
5157 written is the same, and clear it if the strings have
5158 different lengths. In the latter case the length stored
5159 in si->NONZERO_CHARS becomes the lower bound.
5160 FIXME: Handle the upper bound of the length if possible. */
5161 si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5162
5163 if (storing_all_zeros_p
5164 && ssaname
5165 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5166 si->endptr = ssaname;
5167 else
5168 si->endptr = NULL;
5169 si->next = 0;
5170 si->stmt = NULL;
5171 si->writable = true;
5172 si->dont_invalidate = true;
5173 if (oldlen)
5174 {
5175 tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5176 si->nonzero_chars, oldlen);
5177 adjust_related_strinfos (loc, origsi: si, adj);
5178 }
5179 else
5180 si->prev = 0;
5181 }
5182 }
5183 else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5184 {
5185 if (ssaname)
5186 idx = new_stridx (exp: ssaname);
5187 else
5188 idx = new_addr_stridx (exp: lhs);
5189 if (idx != 0)
5190 {
5191 tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5192
5193 HOST_WIDE_INT slen;
5194 if (storing_all_zeros_p)
5195 slen = 0;
5196 else if (storing_nonzero_p && ranges_valid)
5197 {
5198 /* FIXME: Handle the upper bound of the length when
5199 LENRANGE[0] != LENRANGE[1]. */
5200 slen = lenrange[0];
5201 if (lenrange[0] != lenrange[1])
5202 /* Set the minimum length but ignore the maximum
5203 for now. */
5204 full_string_p = false;
5205 }
5206 else
5207 slen = -1;
5208
5209 tree len = (slen <= 0
5210 ? size_zero_node
5211 : build_int_cst (size_type_node, slen));
5212 si = new_strinfo (ptr, idx, nonzero_chars: len, full_string_p: slen >= 0 && full_string_p);
5213 set_strinfo (idx, si);
5214 if (storing_all_zeros_p
5215 && ssaname
5216 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5217 si->endptr = ssaname;
5218 si->dont_invalidate = true;
5219 si->writable = true;
5220 }
5221 }
5222 else if (idx == 0
5223 && rhs_minlen < HOST_WIDE_INT_M1U
5224 && ssaname == NULL_TREE
5225 && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5226 {
5227 HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5228 if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5229 {
5230 int idx = new_addr_stridx (exp: lhs);
5231 if (idx != 0)
5232 {
5233 si = new_strinfo (build_fold_addr_expr (lhs), idx,
5234 nonzero_chars: build_int_cst (size_type_node, rhs_minlen),
5235 full_string_p);
5236 set_strinfo (idx, si);
5237 si->dont_invalidate = true;
5238 }
5239 }
5240 }
5241
5242 if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5243 {
5244 /* For single-byte stores only, allow adjust_last_stmt to remove
5245 the statement if the stored '\0' is immediately overwritten. */
5246 laststmt.stmt = stmt;
5247 laststmt.len = build_int_cst (size_type_node, 1);
5248 laststmt.stridx = si->idx;
5249 }
5250 return true;
5251}
5252
5253/* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0. */
5254
5255static void
5256fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5257{
5258 if (TREE_CODE (rhs1) != SSA_NAME
5259 || TREE_CODE (rhs2) != SSA_NAME)
5260 return;
5261
5262 gimple *call_stmt = NULL;
5263 for (int pass = 0; pass < 2; pass++)
5264 {
5265 gimple *g = SSA_NAME_DEF_STMT (rhs1);
5266 if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5267 && has_single_use (var: rhs1)
5268 && gimple_call_arg (gs: g, index: 0) == rhs2)
5269 {
5270 call_stmt = g;
5271 break;
5272 }
5273 std::swap (a&: rhs1, b&: rhs2);
5274 }
5275
5276 if (call_stmt)
5277 {
5278 tree arg0 = gimple_call_arg (gs: call_stmt, index: 0);
5279
5280 if (arg0 == rhs2)
5281 {
5282 tree arg1 = gimple_call_arg (gs: call_stmt, index: 1);
5283 tree arg1_len = NULL_TREE;
5284 int idx = get_stridx (exp: arg1, stmt: call_stmt);
5285
5286 if (idx)
5287 {
5288 if (idx < 0)
5289 arg1_len = build_int_cst (size_type_node, ~idx);
5290 else
5291 {
5292 strinfo *si = get_strinfo (idx);
5293 if (si)
5294 arg1_len = get_string_length (si);
5295 }
5296 }
5297
5298 if (arg1_len != NULL_TREE)
5299 {
5300 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5301 tree strncmp_decl = builtin_decl_explicit (fncode: BUILT_IN_STRNCMP);
5302
5303 if (!is_gimple_val (arg1_len))
5304 {
5305 tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5306 gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5307 arg1_len);
5308 gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5309 arg1_len = arg1_len_tmp;
5310 }
5311
5312 gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5313 arg0, arg1, arg1_len);
5314 tree strncmp_lhs = make_ssa_name (integer_type_node);
5315 gimple_set_vuse (g: strncmp_call, vuse: gimple_vuse (g: call_stmt));
5316 gimple_call_set_lhs (gs: strncmp_call, lhs: strncmp_lhs);
5317 gsi_remove (&gsi, true);
5318 gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5319 tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5320
5321 if (is_gimple_assign (gs: stmt))
5322 {
5323 if (gimple_assign_rhs_code (gs: stmt) == COND_EXPR)
5324 {
5325 tree cond = gimple_assign_rhs1 (gs: stmt);
5326 TREE_OPERAND (cond, 0) = strncmp_lhs;
5327 TREE_OPERAND (cond, 1) = zero;
5328 }
5329 else
5330 {
5331 gimple_assign_set_rhs1 (gs: stmt, rhs: strncmp_lhs);
5332 gimple_assign_set_rhs2 (gs: stmt, rhs: zero);
5333 }
5334 }
5335 else
5336 {
5337 gcond *cond = as_a<gcond *> (p: stmt);
5338 gimple_cond_set_lhs (gs: cond, lhs: strncmp_lhs);
5339 gimple_cond_set_rhs (gs: cond, rhs: zero);
5340 }
5341 update_stmt (s: stmt);
5342 }
5343 }
5344 }
5345}
5346
5347/* Return true if TYPE corresponds to a narrow character type. */
5348
5349static bool
5350is_char_type (tree type)
5351{
5352 return (TREE_CODE (type) == INTEGER_TYPE
5353 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5354 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5355}
5356
5357/* Check the built-in call at GSI for validity and optimize it.
5358 Uses RVALS to determine range information.
5359 Return true to let the caller advance *GSI to the next statement
5360 in the basic block and false otherwise. */
5361
5362bool
5363strlen_pass::check_and_optimize_call (bool *zero_write)
5364{
5365 gimple *stmt = gsi_stmt (i: m_gsi);
5366
5367 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5368 {
5369 tree fntype = gimple_call_fntype (gs: stmt);
5370 if (!fntype)
5371 return true;
5372
5373 if (lookup_attribute (attr_name: "alloc_size", TYPE_ATTRIBUTES (fntype)))
5374 {
5375 handle_alloc_call (bcode: BUILT_IN_NONE);
5376 return true;
5377 }
5378
5379 if (tree lhs = gimple_call_lhs (gs: stmt))
5380 handle_assign (lhs, zero_write);
5381
5382 /* Proceed to handle user-defined formatting functions. */
5383 }
5384
5385 /* When not optimizing we must be checking printf calls which
5386 we do even for user-defined functions when they are declared
5387 with attribute format. */
5388 if (!flag_optimize_strlen
5389 || !strlen_optimize
5390 || !valid_builtin_call (stmt))
5391 return !handle_printf_call (&m_gsi, ptr_qry);
5392
5393 tree callee = gimple_call_fndecl (gs: stmt);
5394 switch (DECL_FUNCTION_CODE (decl: callee))
5395 {
5396 case BUILT_IN_STRLEN:
5397 case BUILT_IN_STRNLEN:
5398 handle_builtin_strlen ();
5399 break;
5400 case BUILT_IN_STRCHR:
5401 handle_builtin_strchr ();
5402 break;
5403 case BUILT_IN_STRCPY:
5404 case BUILT_IN_STRCPY_CHK:
5405 case BUILT_IN_STPCPY:
5406 case BUILT_IN_STPCPY_CHK:
5407 handle_builtin_strcpy (bcode: DECL_FUNCTION_CODE (decl: callee));
5408 break;
5409
5410 case BUILT_IN_STRNCAT:
5411 case BUILT_IN_STRNCAT_CHK:
5412 handle_builtin_strncat (DECL_FUNCTION_CODE (decl: callee));
5413 break;
5414
5415 case BUILT_IN_STPNCPY:
5416 case BUILT_IN_STPNCPY_CHK:
5417 case BUILT_IN_STRNCPY:
5418 case BUILT_IN_STRNCPY_CHK:
5419 handle_builtin_stxncpy_strncat (append_p: false);
5420 break;
5421
5422 case BUILT_IN_MEMCPY:
5423 case BUILT_IN_MEMCPY_CHK:
5424 case BUILT_IN_MEMPCPY:
5425 case BUILT_IN_MEMPCPY_CHK:
5426 handle_builtin_memcpy (bcode: DECL_FUNCTION_CODE (decl: callee));
5427 break;
5428 case BUILT_IN_STRCAT:
5429 case BUILT_IN_STRCAT_CHK:
5430 handle_builtin_strcat (bcode: DECL_FUNCTION_CODE (decl: callee));
5431 break;
5432 case BUILT_IN_ALLOCA:
5433 case BUILT_IN_ALLOCA_WITH_ALIGN:
5434 case BUILT_IN_MALLOC:
5435 case BUILT_IN_CALLOC:
5436 handle_alloc_call (bcode: DECL_FUNCTION_CODE (decl: callee));
5437 break;
5438 case BUILT_IN_MEMSET:
5439 if (handle_builtin_memset (zero_write))
5440 return false;
5441 break;
5442 case BUILT_IN_MEMCMP:
5443 if (handle_builtin_memcmp ())
5444 return false;
5445 break;
5446 case BUILT_IN_STRCMP:
5447 case BUILT_IN_STRNCMP:
5448 if (handle_builtin_string_cmp ())
5449 return false;
5450 break;
5451 default:
5452 if (handle_printf_call (&m_gsi, ptr_qry))
5453 return false;
5454 break;
5455 }
5456
5457 return true;
5458}
5459
5460/* Handle an assignment statement at *GSI to a LHS of integral type.
5461 If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true. */
5462
5463void
5464strlen_pass::handle_integral_assign (bool *cleanup_eh)
5465{
5466 gimple *stmt = gsi_stmt (i: m_gsi);
5467 tree lhs = gimple_assign_lhs (gs: stmt);
5468 tree lhs_type = TREE_TYPE (lhs);
5469
5470 enum tree_code code = gimple_assign_rhs_code (gs: stmt);
5471 if (code == COND_EXPR)
5472 {
5473 tree cond = gimple_assign_rhs1 (gs: stmt);
5474 enum tree_code cond_code = TREE_CODE (cond);
5475
5476 if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5477 fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5478 TREE_OPERAND (cond, 1), stmt);
5479 }
5480 else if (code == EQ_EXPR || code == NE_EXPR)
5481 fold_strstr_to_strncmp (rhs1: gimple_assign_rhs1 (gs: stmt),
5482 rhs2: gimple_assign_rhs2 (gs: stmt), stmt);
5483 else if (gimple_assign_load_p (stmt)
5484 && TREE_CODE (lhs_type) == INTEGER_TYPE
5485 && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5486 && (TYPE_PRECISION (lhs_type)
5487 == TYPE_PRECISION (char_type_node))
5488 && !gimple_has_volatile_ops (stmt))
5489 {
5490 tree off = integer_zero_node;
5491 unsigned HOST_WIDE_INT coff = 0;
5492 int idx = 0;
5493 tree rhs1 = gimple_assign_rhs1 (gs: stmt);
5494 if (code == MEM_REF)
5495 {
5496 idx = get_stridx (TREE_OPERAND (rhs1, 0), stmt);
5497 if (idx > 0)
5498 {
5499 strinfo *si = get_strinfo (idx);
5500 if (si
5501 && si->nonzero_chars
5502 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5503 && (wi::to_widest (t: si->nonzero_chars)
5504 >= wi::to_widest (t: off)))
5505 off = TREE_OPERAND (rhs1, 1);
5506 else
5507 /* This case is not useful. See if get_addr_stridx
5508 returns something usable. */
5509 idx = 0;
5510 }
5511 }
5512 if (idx <= 0)
5513 idx = get_addr_stridx (exp: rhs1, stmt, NULL_TREE, offset_out: &coff);
5514 if (idx > 0)
5515 {
5516 strinfo *si = get_strinfo (idx);
5517 if (si
5518 && si->nonzero_chars
5519 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5520 {
5521 widest_int w1 = wi::to_widest (t: si->nonzero_chars);
5522 widest_int w2 = wi::to_widest (t: off) + coff;
5523 if (w1 == w2
5524 && si->full_string_p)
5525 {
5526 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5527 {
5528 fprintf (stream: dump_file, format: "Optimizing: ");
5529 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5530 }
5531
5532 /* Reading the final '\0' character. */
5533 tree zero = build_int_cst (lhs_type, 0);
5534 gimple_set_vuse (g: stmt, NULL_TREE);
5535 gimple_assign_set_rhs_from_tree (&m_gsi, zero);
5536 *cleanup_eh
5537 |= maybe_clean_or_replace_eh_stmt (stmt,
5538 gsi_stmt (i: m_gsi));
5539 stmt = gsi_stmt (i: m_gsi);
5540 update_stmt (s: stmt);
5541
5542 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5543 {
5544 fprintf (stream: dump_file, format: "into: ");
5545 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5546 }
5547 }
5548 else if (w1 > w2)
5549 {
5550 /* Reading a character before the final '\0'
5551 character. Just set the value range to ~[0, 0]
5552 if we don't have anything better. */
5553 value_range r;
5554 if (!get_range_query (cfun)->range_of_expr (r, expr: lhs)
5555 || r.varying_p ())
5556 {
5557 r.set_nonzero (lhs_type);
5558 set_range_info (lhs, r);
5559 }
5560 }
5561 }
5562 }
5563 }
5564 else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5565 {
5566 if (int idx = new_stridx (exp: lhs))
5567 {
5568 /* Record multi-byte assignments from MEM_REFs. */
5569 bool storing_all_nonzero_p;
5570 bool storing_all_zeros_p;
5571 bool full_string_p;
5572 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5573 tree rhs = gimple_assign_rhs1 (gs: stmt);
5574 const bool ranges_valid
5575 = count_nonzero_bytes (expr_or_type: rhs, stmt,
5576 lenrange, nulterm: &full_string_p,
5577 allnul: &storing_all_zeros_p,
5578 allnonnul: &storing_all_nonzero_p);
5579 if (ranges_valid)
5580 {
5581 tree length = build_int_cst (sizetype, lenrange[0]);
5582 strinfo *si = new_strinfo (ptr: lhs, idx, nonzero_chars: length, full_string_p);
5583 set_strinfo (idx, si);
5584 si->writable = true;
5585 si->dont_invalidate = true;
5586 }
5587 }
5588 }
5589
5590 if (strlen_to_stridx)
5591 {
5592 tree rhs1 = gimple_assign_rhs1 (gs: stmt);
5593 if (stridx_strlenloc *ps = strlen_to_stridx->get (k: rhs1))
5594 strlen_to_stridx->put (k: lhs, v: stridx_strlenloc (*ps));
5595 }
5596}
5597
5598/* Handle assignment statement at *GSI to LHS. Set *ZERO_WRITE if
5599 the assignment stores all zero bytes. */
5600
5601bool
5602strlen_pass::handle_assign (tree lhs, bool *zero_write)
5603{
5604 tree type = TREE_TYPE (lhs);
5605 if (TREE_CODE (type) == ARRAY_TYPE)
5606 type = TREE_TYPE (type);
5607
5608 bool is_char_store = is_char_type (type);
5609 if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5610 {
5611 /* To consider stores into char objects via integer types other
5612 than char but not those to non-character objects, determine
5613 the type of the destination rather than just the type of
5614 the access. */
5615 for (int i = 0; i != 2; ++i)
5616 {
5617 tree ref = TREE_OPERAND (lhs, i);
5618 type = TREE_TYPE (ref);
5619 if (TREE_CODE (type) == POINTER_TYPE)
5620 type = TREE_TYPE (type);
5621 if (TREE_CODE (type) == ARRAY_TYPE)
5622 type = TREE_TYPE (type);
5623 if (is_char_type (type))
5624 {
5625 is_char_store = true;
5626 break;
5627 }
5628 }
5629 }
5630
5631 /* Handle a single or multibyte assignment. */
5632 if (is_char_store && !handle_store (zero_write))
5633 return false;
5634
5635 return true;
5636}
5637
5638
5639/* Attempt to check for validity of the performed access a single statement
5640 at *GSI using string length knowledge, and to optimize it.
5641 If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5642 true. Return true to let the caller advance *GSI to the next statement
5643 in the basic block and false otherwise. */
5644
5645bool
5646strlen_pass::check_and_optimize_stmt (bool *cleanup_eh)
5647{
5648 gimple *stmt = gsi_stmt (i: m_gsi);
5649
5650 /* For statements that modify a string, set to true if the write
5651 is only zeros. */
5652 bool zero_write = false;
5653
5654 if (is_gimple_call (gs: stmt))
5655 {
5656 if (!check_and_optimize_call (zero_write: &zero_write))
5657 return false;
5658 }
5659 else if (!flag_optimize_strlen || !strlen_optimize)
5660 return true;
5661 else if (is_gimple_assign (gs: stmt) && !gimple_clobber_p (s: stmt))
5662 {
5663 /* Handle non-clobbering assignment. */
5664 tree lhs = gimple_assign_lhs (gs: stmt);
5665 tree lhs_type = TREE_TYPE (lhs);
5666
5667 if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5668 {
5669 if (gimple_assign_single_p (gs: stmt)
5670 || (gimple_assign_cast_p (s: stmt)
5671 && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5672 {
5673 int idx = get_stridx (exp: gimple_assign_rhs1 (gs: stmt), stmt);
5674 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5675 }
5676 else if (gimple_assign_rhs_code (gs: stmt) == POINTER_PLUS_EXPR)
5677 handle_pointer_plus ();
5678 }
5679 else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5680 /* Handle assignment to a character. */
5681 handle_integral_assign (cleanup_eh);
5682 else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5683 if (!handle_assign (lhs, zero_write: &zero_write))
5684 return false;
5685 }
5686 else if (gcond *cond = dyn_cast<gcond *> (p: stmt))
5687 {
5688 enum tree_code code = gimple_cond_code (gs: cond);
5689 if (code == EQ_EXPR || code == NE_EXPR)
5690 fold_strstr_to_strncmp (rhs1: gimple_cond_lhs (gs: stmt),
5691 rhs2: gimple_cond_rhs (gs: stmt), stmt);
5692 }
5693
5694 if (gimple_vdef (g: stmt))
5695 maybe_invalidate (stmt, zero_write);
5696 return true;
5697}
5698
5699/* Recursively call maybe_invalidate on stmts that might be executed
5700 in between dombb and current bb and that contain a vdef. Stop when
5701 *count stmts are inspected, or if the whole strinfo vector has
5702 been invalidated. */
5703
5704static void
5705do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5706{
5707 unsigned int i, n = gimple_phi_num_args (gs: phi);
5708
5709 for (i = 0; i < n; i++)
5710 {
5711 tree vuse = gimple_phi_arg_def (gs: phi, index: i);
5712 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5713 basic_block bb = gimple_bb (g: stmt);
5714 if (bb == NULL
5715 || bb == dombb
5716 || !bitmap_set_bit (visited, bb->index)
5717 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5718 continue;
5719 while (1)
5720 {
5721 if (gimple_code (g: stmt) == GIMPLE_PHI)
5722 {
5723 do_invalidate (dombb, phi: stmt, visited, count);
5724 if (*count == 0)
5725 return;
5726 break;
5727 }
5728 if (--*count == 0)
5729 return;
5730 if (!maybe_invalidate (stmt))
5731 {
5732 *count = 0;
5733 return;
5734 }
5735 vuse = gimple_vuse (g: stmt);
5736 stmt = SSA_NAME_DEF_STMT (vuse);
5737 if (gimple_bb (g: stmt) != bb)
5738 {
5739 bb = gimple_bb (g: stmt);
5740 if (bb == NULL
5741 || bb == dombb
5742 || !bitmap_set_bit (visited, bb->index)
5743 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5744 break;
5745 }
5746 }
5747 }
5748}
5749
5750/* Release pointer_query cache. */
5751
5752strlen_pass::~strlen_pass ()
5753{
5754 ptr_qry.flush_cache ();
5755}
5756
5757/* Callback for walk_dominator_tree. Attempt to optimize various
5758 string ops by remembering string lengths pointed by pointer SSA_NAMEs. */
5759
5760edge
5761strlen_pass::before_dom_children (basic_block bb)
5762{
5763 basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5764
5765 if (dombb == NULL)
5766 stridx_to_strinfo = NULL;
5767 else
5768 {
5769 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5770 if (stridx_to_strinfo)
5771 {
5772 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi);
5773 gsi_next (i: &gsi))
5774 {
5775 gphi *phi = gsi.phi ();
5776 if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
5777 {
5778 bitmap visited = BITMAP_ALLOC (NULL);
5779 int count_vdef = 100;
5780 do_invalidate (dombb, phi, visited, count: &count_vdef);
5781 BITMAP_FREE (visited);
5782 if (count_vdef == 0)
5783 {
5784 /* If there were too many vdefs in between immediate
5785 dominator and current bb, invalidate everything.
5786 If stridx_to_strinfo has been unshared, we need
5787 to free it, otherwise just set it to NULL. */
5788 if (!strinfo_shared ())
5789 {
5790 unsigned int i;
5791 strinfo *si;
5792
5793 for (i = 1;
5794 vec_safe_iterate (v: stridx_to_strinfo, ix: i, ptr: &si);
5795 ++i)
5796 {
5797 free_strinfo (si);
5798 (*stridx_to_strinfo)[i] = NULL;
5799 }
5800 }
5801 else
5802 stridx_to_strinfo = NULL;
5803 }
5804 break;
5805 }
5806 }
5807 }
5808 }
5809
5810 /* If all PHI arguments have the same string index, the PHI result
5811 has it as well. */
5812 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi);
5813 gsi_next (i: &gsi))
5814 {
5815 gphi *phi = gsi.phi ();
5816 tree result = gimple_phi_result (gs: phi);
5817 if (!virtual_operand_p (op: result) && POINTER_TYPE_P (TREE_TYPE (result)))
5818 {
5819 int idx = get_stridx (exp: gimple_phi_arg_def (gs: phi, index: 0), stmt: phi);
5820 if (idx != 0)
5821 {
5822 unsigned int i, n = gimple_phi_num_args (gs: phi);
5823 for (i = 1; i < n; i++)
5824 if (idx != get_stridx (exp: gimple_phi_arg_def (gs: phi, index: i), stmt: phi))
5825 break;
5826 if (i == n)
5827 ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5828 }
5829 }
5830 }
5831
5832 bool cleanup_eh = false;
5833
5834 /* Attempt to optimize individual statements. */
5835 for (m_gsi = gsi_start_bb (bb); !gsi_end_p (i: m_gsi); )
5836 {
5837 /* Reset search depth performance counter. */
5838 ptr_qry.depth = 0;
5839
5840 if (check_and_optimize_stmt (cleanup_eh: &cleanup_eh))
5841 gsi_next (i: &m_gsi);
5842 }
5843
5844 if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5845 m_cleanup_cfg = true;
5846
5847 bb->aux = stridx_to_strinfo;
5848 if (vec_safe_length (v: stridx_to_strinfo) && !strinfo_shared ())
5849 (*stridx_to_strinfo)[0] = (strinfo *) bb;
5850 return NULL;
5851}
5852
5853/* Callback for walk_dominator_tree. Free strinfo vector if it is
5854 owned by the current bb, clear bb->aux. */
5855
5856void
5857strlen_pass::after_dom_children (basic_block bb)
5858{
5859 if (bb->aux)
5860 {
5861 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5862 if (vec_safe_length (v: stridx_to_strinfo)
5863 && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5864 {
5865 unsigned int i;
5866 strinfo *si;
5867
5868 for (i = 1; vec_safe_iterate (v: stridx_to_strinfo, ix: i, ptr: &si); ++i)
5869 free_strinfo (si);
5870 vec_free (v&: stridx_to_strinfo);
5871 }
5872 bb->aux = NULL;
5873 }
5874}
5875
5876namespace {
5877
5878static unsigned int
5879printf_strlen_execute (function *fun, bool warn_only)
5880{
5881 strlen_optimize = !warn_only;
5882
5883 calculate_dominance_info (CDI_DOMINATORS);
5884 loop_optimizer_init (LOOPS_NORMAL);
5885 scev_initialize ();
5886
5887 gcc_assert (!strlen_to_stridx);
5888 if (warn_stringop_overflow || warn_stringop_truncation)
5889 strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5890
5891 /* This has to happen after initializing the loop optimizer
5892 and initializing SCEV as they create new SSA_NAMEs. */
5893 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, exact: true);
5894 max_stridx = 1;
5895
5896 /* String length optimization is implemented as a walk of the dominator
5897 tree and a forward walk of statements within each block. */
5898 strlen_pass walker (CDI_DOMINATORS);
5899 walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5900
5901 if (dump_file && (dump_flags & TDF_DETAILS))
5902 walker.ptr_qry.dump (dump_file, true);
5903
5904 ssa_ver_to_stridx.release ();
5905 strinfo_pool.release ();
5906 if (decl_to_stridxlist_htab)
5907 {
5908 obstack_free (&stridx_obstack, NULL);
5909 delete decl_to_stridxlist_htab;
5910 decl_to_stridxlist_htab = NULL;
5911 }
5912 laststmt.stmt = NULL;
5913 laststmt.len = NULL_TREE;
5914 laststmt.stridx = 0;
5915
5916 if (strlen_to_stridx)
5917 {
5918 strlen_to_stridx->empty ();
5919 delete strlen_to_stridx;
5920 strlen_to_stridx = NULL;
5921 }
5922
5923 scev_finalize ();
5924 loop_optimizer_finalize ();
5925
5926 return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5927}
5928
5929/* This file defines two passes: one for warnings that runs only when
5930 optimization is disabled, and another that implements optimizations
5931 and also issues warnings. */
5932
5933const pass_data pass_data_warn_printf =
5934{
5935 .type: GIMPLE_PASS, /* type */
5936 .name: "warn-printf", /* name */
5937 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
5938 .tv_id: TV_NONE, /* tv_id */
5939 /* Normally an optimization pass would require PROP_ssa but because
5940 this pass runs early, with no optimization, to do sprintf format
5941 checking, it only requires PROP_cfg. */
5942 PROP_cfg, /* properties_required */
5943 .properties_provided: 0, /* properties_provided */
5944 .properties_destroyed: 0, /* properties_destroyed */
5945 .todo_flags_start: 0, /* todo_flags_start */
5946 .todo_flags_finish: 0, /* todo_flags_finish */
5947};
5948
5949class pass_warn_printf : public gimple_opt_pass
5950{
5951public:
5952 pass_warn_printf (gcc::context *ctxt)
5953 : gimple_opt_pass (pass_data_warn_printf, ctxt)
5954 {}
5955
5956 bool gate (function *) final override;
5957 unsigned int execute (function *fun) final override
5958 {
5959 return printf_strlen_execute (fun, warn_only: true);
5960 }
5961};
5962
5963
5964/* Return true to run the warning pass only when not optimizing and
5965 iff either -Wformat-overflow or -Wformat-truncation is specified. */
5966
5967bool
5968pass_warn_printf::gate (function *)
5969{
5970 return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5971}
5972
5973const pass_data pass_data_strlen =
5974{
5975 .type: GIMPLE_PASS, /* type */
5976 .name: "strlen", /* name */
5977 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
5978 .tv_id: TV_TREE_STRLEN, /* tv_id */
5979 PROP_cfg | PROP_ssa, /* properties_required */
5980 .properties_provided: 0, /* properties_provided */
5981 .properties_destroyed: 0, /* properties_destroyed */
5982 .todo_flags_start: 0, /* todo_flags_start */
5983 .todo_flags_finish: 0, /* todo_flags_finish */
5984};
5985
5986class pass_strlen : public gimple_opt_pass
5987{
5988public:
5989 pass_strlen (gcc::context *ctxt)
5990 : gimple_opt_pass (pass_data_strlen, ctxt)
5991 {}
5992
5993 opt_pass * clone () final override { return new pass_strlen (m_ctxt); }
5994
5995 bool gate (function *) final override;
5996 unsigned int execute (function *fun) final override
5997 {
5998 return printf_strlen_execute (fun, warn_only: false);
5999 }
6000};
6001
6002/* Return true to run the pass only when the sprintf and/or strlen
6003 optimizations are enabled and -Wformat-overflow or -Wformat-truncation
6004 are specified. */
6005
6006bool
6007pass_strlen::gate (function *)
6008{
6009 return ((warn_format_overflow > 0
6010 || warn_format_trunc > 0
6011 || warn_restrict > 0
6012 || flag_optimize_strlen > 0
6013 || flag_printf_return_value)
6014 && optimize > 0);
6015}
6016
6017} // anon namespace
6018
6019gimple_opt_pass *
6020make_pass_warn_printf (gcc::context *ctxt)
6021{
6022 return new pass_warn_printf (ctxt);
6023}
6024
6025gimple_opt_pass *
6026make_pass_strlen (gcc::context *ctxt)
6027{
6028 return new pass_strlen (ctxt);
6029}
6030

source code of gcc/tree-ssa-strlen.cc