1 | // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Shared details for processing format strings of printf and scanf |
10 | // (and friends). |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "FormatStringParsing.h" |
15 | #include "clang/Basic/LangOptions.h" |
16 | #include "clang/Basic/TargetInfo.h" |
17 | #include "llvm/Support/ConvertUTF.h" |
18 | #include <optional> |
19 | |
20 | using clang::analyze_format_string::ArgType; |
21 | using clang::analyze_format_string::FormatStringHandler; |
22 | using clang::analyze_format_string::FormatSpecifier; |
23 | using clang::analyze_format_string::LengthModifier; |
24 | using clang::analyze_format_string::OptionalAmount; |
25 | using clang::analyze_format_string::ConversionSpecifier; |
26 | using namespace clang; |
27 | |
28 | // Key function to FormatStringHandler. |
29 | FormatStringHandler::~FormatStringHandler() {} |
30 | |
31 | //===----------------------------------------------------------------------===// |
32 | // Functions for parsing format strings components in both printf and |
33 | // scanf format strings. |
34 | //===----------------------------------------------------------------------===// |
35 | |
36 | OptionalAmount |
37 | clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { |
38 | const char *I = Beg; |
39 | UpdateOnReturn <const char*> UpdateBeg(Beg, I); |
40 | |
41 | unsigned accumulator = 0; |
42 | bool hasDigits = false; |
43 | |
44 | for ( ; I != E; ++I) { |
45 | char c = *I; |
46 | if (c >= '0' && c <= '9') { |
47 | hasDigits = true; |
48 | accumulator = (accumulator * 10) + (c - '0'); |
49 | continue; |
50 | } |
51 | |
52 | if (hasDigits) |
53 | return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, |
54 | false); |
55 | |
56 | break; |
57 | } |
58 | |
59 | return OptionalAmount(); |
60 | } |
61 | |
62 | OptionalAmount |
63 | clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, |
64 | const char *E, |
65 | unsigned &argIndex) { |
66 | if (*Beg == '*') { |
67 | ++Beg; |
68 | return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); |
69 | } |
70 | |
71 | return ParseAmount(Beg, E); |
72 | } |
73 | |
74 | OptionalAmount |
75 | clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, |
76 | const char *Start, |
77 | const char *&Beg, |
78 | const char *E, |
79 | PositionContext p) { |
80 | if (*Beg == '*') { |
81 | const char *I = Beg + 1; |
82 | const OptionalAmount &Amt = ParseAmount(Beg&: I, E); |
83 | |
84 | if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { |
85 | H.HandleInvalidPosition(startPos: Beg, posLen: I - Beg, p); |
86 | return OptionalAmount(false); |
87 | } |
88 | |
89 | if (I == E) { |
90 | // No more characters left? |
91 | H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start); |
92 | return OptionalAmount(false); |
93 | } |
94 | |
95 | assert(Amt.getHowSpecified() == OptionalAmount::Constant); |
96 | |
97 | if (*I == '$') { |
98 | // Handle positional arguments |
99 | |
100 | // Special case: '*0$', since this is an easy mistake. |
101 | if (Amt.getConstantAmount() == 0) { |
102 | H.HandleZeroPosition(startPos: Beg, posLen: I - Beg + 1); |
103 | return OptionalAmount(false); |
104 | } |
105 | |
106 | const char *Tmp = Beg; |
107 | Beg = ++I; |
108 | |
109 | return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, |
110 | Tmp, 0, true); |
111 | } |
112 | |
113 | H.HandleInvalidPosition(startPos: Beg, posLen: I - Beg, p); |
114 | return OptionalAmount(false); |
115 | } |
116 | |
117 | return ParseAmount(Beg, E); |
118 | } |
119 | |
120 | |
121 | bool |
122 | clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, |
123 | FormatSpecifier &CS, |
124 | const char *Start, |
125 | const char *&Beg, const char *E, |
126 | unsigned *argIndex) { |
127 | // FIXME: Support negative field widths. |
128 | if (argIndex) { |
129 | CS.setFieldWidth(ParseNonPositionAmount(Beg, E, argIndex&: *argIndex)); |
130 | } |
131 | else { |
132 | const OptionalAmount Amt = |
133 | ParsePositionAmount(H, Start, Beg, E, |
134 | p: analyze_format_string::FieldWidthPos); |
135 | |
136 | if (Amt.isInvalid()) |
137 | return true; |
138 | CS.setFieldWidth(Amt); |
139 | } |
140 | return false; |
141 | } |
142 | |
143 | bool |
144 | clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, |
145 | FormatSpecifier &FS, |
146 | const char *Start, |
147 | const char *&Beg, |
148 | const char *E) { |
149 | const char *I = Beg; |
150 | |
151 | const OptionalAmount &Amt = ParseAmount(Beg&: I, E); |
152 | |
153 | if (I == E) { |
154 | // No more characters left? |
155 | H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start); |
156 | return true; |
157 | } |
158 | |
159 | if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { |
160 | // Warn that positional arguments are non-standard. |
161 | H.HandlePosition(startPos: Start, posLen: I - Start); |
162 | |
163 | // Special case: '%0$', since this is an easy mistake. |
164 | if (Amt.getConstantAmount() == 0) { |
165 | H.HandleZeroPosition(startPos: Start, posLen: I - Start); |
166 | return true; |
167 | } |
168 | |
169 | FS.setArgIndex(Amt.getConstantAmount() - 1); |
170 | FS.setUsesPositionalArg(); |
171 | // Update the caller's pointer if we decided to consume |
172 | // these characters. |
173 | Beg = I; |
174 | return false; |
175 | } |
176 | |
177 | return false; |
178 | } |
179 | |
180 | bool |
181 | clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H, |
182 | FormatSpecifier &FS, |
183 | const char *&I, |
184 | const char *E, |
185 | const LangOptions &LO) { |
186 | if (!LO.OpenCL) |
187 | return false; |
188 | |
189 | const char *Start = I; |
190 | if (*I == 'v') { |
191 | ++I; |
192 | |
193 | if (I == E) { |
194 | H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start); |
195 | return true; |
196 | } |
197 | |
198 | OptionalAmount NumElts = ParseAmount(Beg&: I, E); |
199 | if (NumElts.getHowSpecified() != OptionalAmount::Constant) { |
200 | H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start); |
201 | return true; |
202 | } |
203 | |
204 | FS.setVectorNumElts(NumElts); |
205 | } |
206 | |
207 | return false; |
208 | } |
209 | |
210 | bool |
211 | clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, |
212 | const char *&I, |
213 | const char *E, |
214 | const LangOptions &LO, |
215 | bool IsScanf) { |
216 | LengthModifier::Kind lmKind = LengthModifier::None; |
217 | const char *lmPosition = I; |
218 | switch (*I) { |
219 | default: |
220 | return false; |
221 | case 'h': |
222 | ++I; |
223 | if (I != E && *I == 'h') { |
224 | ++I; |
225 | lmKind = LengthModifier::AsChar; |
226 | } else if (I != E && *I == 'l' && LO.OpenCL) { |
227 | ++I; |
228 | lmKind = LengthModifier::AsShortLong; |
229 | } else { |
230 | lmKind = LengthModifier::AsShort; |
231 | } |
232 | break; |
233 | case 'l': |
234 | ++I; |
235 | if (I != E && *I == 'l') { |
236 | ++I; |
237 | lmKind = LengthModifier::AsLongLong; |
238 | } else { |
239 | lmKind = LengthModifier::AsLong; |
240 | } |
241 | break; |
242 | case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; |
243 | case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; |
244 | case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; |
245 | case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; |
246 | case 'q': lmKind = LengthModifier::AsQuad; ++I; break; |
247 | case 'a': |
248 | if (IsScanf && !LO.C99 && !LO.CPlusPlus11) { |
249 | // For scanf in C90, look at the next character to see if this should |
250 | // be parsed as the GNU extension 'a' length modifier. If not, this |
251 | // will be parsed as a conversion specifier. |
252 | ++I; |
253 | if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { |
254 | lmKind = LengthModifier::AsAllocate; |
255 | break; |
256 | } |
257 | --I; |
258 | } |
259 | return false; |
260 | case 'm': |
261 | if (IsScanf) { |
262 | lmKind = LengthModifier::AsMAllocate; |
263 | ++I; |
264 | break; |
265 | } |
266 | return false; |
267 | // printf: AsInt64, AsInt32, AsInt3264 |
268 | // scanf: AsInt64 |
269 | case 'I': |
270 | if (I + 1 != E && I + 2 != E) { |
271 | if (I[1] == '6' && I[2] == '4') { |
272 | I += 3; |
273 | lmKind = LengthModifier::AsInt64; |
274 | break; |
275 | } |
276 | if (IsScanf) |
277 | return false; |
278 | |
279 | if (I[1] == '3' && I[2] == '2') { |
280 | I += 3; |
281 | lmKind = LengthModifier::AsInt32; |
282 | break; |
283 | } |
284 | } |
285 | ++I; |
286 | lmKind = LengthModifier::AsInt3264; |
287 | break; |
288 | case 'w': |
289 | lmKind = LengthModifier::AsWide; ++I; break; |
290 | } |
291 | LengthModifier lm(lmPosition, lmKind); |
292 | FS.setLengthModifier(lm); |
293 | return true; |
294 | } |
295 | |
296 | bool clang::analyze_format_string::ParseUTF8InvalidSpecifier( |
297 | const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) { |
298 | if (SpecifierBegin + 1 >= FmtStrEnd) |
299 | return false; |
300 | |
301 | const llvm::UTF8 *SB = |
302 | reinterpret_cast<const llvm::UTF8 *>(SpecifierBegin + 1); |
303 | const llvm::UTF8 *SE = reinterpret_cast<const llvm::UTF8 *>(FmtStrEnd); |
304 | const char FirstByte = *SB; |
305 | |
306 | // If the invalid specifier is a multibyte UTF-8 string, return the |
307 | // total length accordingly so that the conversion specifier can be |
308 | // properly updated to reflect a complete UTF-8 specifier. |
309 | unsigned NumBytes = llvm::getNumBytesForUTF8(firstByte: FirstByte); |
310 | if (NumBytes == 1) |
311 | return false; |
312 | if (SB + NumBytes > SE) |
313 | return false; |
314 | |
315 | Len = NumBytes + 1; |
316 | return true; |
317 | } |
318 | |
319 | //===----------------------------------------------------------------------===// |
320 | // Methods on ArgType. |
321 | //===----------------------------------------------------------------------===// |
322 | |
323 | clang::analyze_format_string::ArgType::MatchKind |
324 | ArgType::matchesType(ASTContext &C, QualType argTy) const { |
325 | // When using the format attribute in C++, you can receive a function or an |
326 | // array that will necessarily decay to a pointer when passed to the final |
327 | // format consumer. Apply decay before type comparison. |
328 | if (argTy->canDecayToPointerType()) |
329 | argTy = C.getDecayedType(T: argTy); |
330 | |
331 | if (Ptr) { |
332 | // It has to be a pointer. |
333 | const PointerType *PT = argTy->getAs<PointerType>(); |
334 | if (!PT) |
335 | return NoMatch; |
336 | |
337 | // We cannot write through a const qualified pointer. |
338 | if (PT->getPointeeType().isConstQualified()) |
339 | return NoMatch; |
340 | |
341 | argTy = PT->getPointeeType(); |
342 | } |
343 | |
344 | switch (K) { |
345 | case InvalidTy: |
346 | llvm_unreachable("ArgType must be valid" ); |
347 | |
348 | case UnknownTy: |
349 | return Match; |
350 | |
351 | case AnyCharTy: { |
352 | if (const auto *ETy = argTy->getAs<EnumType>()) { |
353 | // If the enum is incomplete we know nothing about the underlying type. |
354 | // Assume that it's 'int'. Do not use the underlying type for a scoped |
355 | // enumeration. |
356 | if (!ETy->getDecl()->isComplete()) |
357 | return NoMatch; |
358 | if (ETy->isUnscopedEnumerationType()) |
359 | argTy = ETy->getDecl()->getIntegerType(); |
360 | } |
361 | |
362 | if (const auto *BT = argTy->getAs<BuiltinType>()) { |
363 | // The types are perfectly matched? |
364 | switch (BT->getKind()) { |
365 | default: |
366 | break; |
367 | case BuiltinType::Char_S: |
368 | case BuiltinType::SChar: |
369 | case BuiltinType::UChar: |
370 | case BuiltinType::Char_U: |
371 | return Match; |
372 | case BuiltinType::Bool: |
373 | if (!Ptr) |
374 | return Match; |
375 | break; |
376 | } |
377 | // "Partially matched" because of promotions? |
378 | if (!Ptr) { |
379 | switch (BT->getKind()) { |
380 | default: |
381 | break; |
382 | case BuiltinType::Int: |
383 | case BuiltinType::UInt: |
384 | return MatchPromotion; |
385 | case BuiltinType::Short: |
386 | case BuiltinType::UShort: |
387 | case BuiltinType::WChar_S: |
388 | case BuiltinType::WChar_U: |
389 | return NoMatchPromotionTypeConfusion; |
390 | } |
391 | } |
392 | } |
393 | return NoMatch; |
394 | } |
395 | |
396 | case SpecificTy: { |
397 | if (const EnumType *ETy = argTy->getAs<EnumType>()) { |
398 | // If the enum is incomplete we know nothing about the underlying type. |
399 | // Assume that it's 'int'. Do not use the underlying type for a scoped |
400 | // enumeration as that needs an exact match. |
401 | if (!ETy->getDecl()->isComplete()) |
402 | argTy = C.IntTy; |
403 | else if (ETy->isUnscopedEnumerationType()) |
404 | argTy = ETy->getDecl()->getIntegerType(); |
405 | } |
406 | |
407 | if (argTy->isSaturatedFixedPointType()) |
408 | argTy = C.getCorrespondingUnsaturatedType(Ty: argTy); |
409 | |
410 | argTy = C.getCanonicalType(T: argTy).getUnqualifiedType(); |
411 | |
412 | if (T == argTy) |
413 | return Match; |
414 | if (const auto *BT = argTy->getAs<BuiltinType>()) { |
415 | // Check if the only difference between them is signed vs unsigned |
416 | // if true, return match signedness. |
417 | switch (BT->getKind()) { |
418 | default: |
419 | break; |
420 | case BuiltinType::Bool: |
421 | if (Ptr && (T == C.UnsignedCharTy || T == C.SignedCharTy)) |
422 | return NoMatch; |
423 | [[fallthrough]]; |
424 | case BuiltinType::Char_S: |
425 | case BuiltinType::SChar: |
426 | if (T == C.UnsignedShortTy || T == C.ShortTy) |
427 | return NoMatchTypeConfusion; |
428 | if (T == C.UnsignedCharTy) |
429 | return NoMatchSignedness; |
430 | if (T == C.SignedCharTy) |
431 | return Match; |
432 | break; |
433 | case BuiltinType::Char_U: |
434 | case BuiltinType::UChar: |
435 | if (T == C.UnsignedShortTy || T == C.ShortTy) |
436 | return NoMatchTypeConfusion; |
437 | if (T == C.UnsignedCharTy) |
438 | return Match; |
439 | if (T == C.SignedCharTy) |
440 | return NoMatchSignedness; |
441 | break; |
442 | case BuiltinType::Short: |
443 | if (T == C.UnsignedShortTy) |
444 | return NoMatchSignedness; |
445 | break; |
446 | case BuiltinType::UShort: |
447 | if (T == C.ShortTy) |
448 | return NoMatchSignedness; |
449 | break; |
450 | case BuiltinType::Int: |
451 | if (T == C.UnsignedIntTy) |
452 | return NoMatchSignedness; |
453 | break; |
454 | case BuiltinType::UInt: |
455 | if (T == C.IntTy) |
456 | return NoMatchSignedness; |
457 | break; |
458 | case BuiltinType::Long: |
459 | if (T == C.UnsignedLongTy) |
460 | return NoMatchSignedness; |
461 | break; |
462 | case BuiltinType::ULong: |
463 | if (T == C.LongTy) |
464 | return NoMatchSignedness; |
465 | break; |
466 | case BuiltinType::LongLong: |
467 | if (T == C.UnsignedLongLongTy) |
468 | return NoMatchSignedness; |
469 | break; |
470 | case BuiltinType::ULongLong: |
471 | if (T == C.LongLongTy) |
472 | return NoMatchSignedness; |
473 | break; |
474 | } |
475 | // "Partially matched" because of promotions? |
476 | if (!Ptr) { |
477 | switch (BT->getKind()) { |
478 | default: |
479 | break; |
480 | case BuiltinType::Bool: |
481 | if (T == C.IntTy || T == C.UnsignedIntTy) |
482 | return MatchPromotion; |
483 | break; |
484 | case BuiltinType::Int: |
485 | case BuiltinType::UInt: |
486 | if (T == C.SignedCharTy || T == C.UnsignedCharTy || |
487 | T == C.ShortTy || T == C.UnsignedShortTy || T == C.WCharTy || |
488 | T == C.WideCharTy) |
489 | return MatchPromotion; |
490 | break; |
491 | case BuiltinType::Char_U: |
492 | if (T == C.UnsignedIntTy) |
493 | return MatchPromotion; |
494 | if (T == C.UnsignedShortTy) |
495 | return NoMatchPromotionTypeConfusion; |
496 | break; |
497 | case BuiltinType::Char_S: |
498 | if (T == C.IntTy) |
499 | return MatchPromotion; |
500 | if (T == C.ShortTy) |
501 | return NoMatchPromotionTypeConfusion; |
502 | break; |
503 | case BuiltinType::Half: |
504 | case BuiltinType::Float: |
505 | if (T == C.DoubleTy) |
506 | return MatchPromotion; |
507 | break; |
508 | case BuiltinType::Short: |
509 | case BuiltinType::UShort: |
510 | if (T == C.SignedCharTy || T == C.UnsignedCharTy) |
511 | return NoMatchPromotionTypeConfusion; |
512 | break; |
513 | case BuiltinType::WChar_U: |
514 | case BuiltinType::WChar_S: |
515 | if (T != C.WCharTy && T != C.WideCharTy) |
516 | return NoMatchPromotionTypeConfusion; |
517 | } |
518 | } |
519 | } |
520 | return NoMatch; |
521 | } |
522 | |
523 | case CStrTy: { |
524 | const PointerType *PT = argTy->getAs<PointerType>(); |
525 | if (!PT) |
526 | return NoMatch; |
527 | QualType pointeeTy = PT->getPointeeType(); |
528 | if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) |
529 | switch (BT->getKind()) { |
530 | case BuiltinType::Char_U: |
531 | case BuiltinType::UChar: |
532 | case BuiltinType::Char_S: |
533 | case BuiltinType::SChar: |
534 | return Match; |
535 | default: |
536 | break; |
537 | } |
538 | |
539 | return NoMatch; |
540 | } |
541 | |
542 | case WCStrTy: { |
543 | const PointerType *PT = argTy->getAs<PointerType>(); |
544 | if (!PT) |
545 | return NoMatch; |
546 | QualType pointeeTy = |
547 | C.getCanonicalType(T: PT->getPointeeType()).getUnqualifiedType(); |
548 | return pointeeTy == C.getWideCharType() ? Match : NoMatch; |
549 | } |
550 | |
551 | case WIntTy: { |
552 | QualType WInt = C.getCanonicalType(T: C.getWIntType()).getUnqualifiedType(); |
553 | |
554 | if (C.getCanonicalType(T: argTy).getUnqualifiedType() == WInt) |
555 | return Match; |
556 | |
557 | QualType PromoArg = C.isPromotableIntegerType(T: argTy) |
558 | ? C.getPromotedIntegerType(PromotableType: argTy) |
559 | : argTy; |
560 | PromoArg = C.getCanonicalType(T: PromoArg).getUnqualifiedType(); |
561 | |
562 | // If the promoted argument is the corresponding signed type of the |
563 | // wint_t type, then it should match. |
564 | if (PromoArg->hasSignedIntegerRepresentation() && |
565 | C.getCorrespondingUnsignedType(T: PromoArg) == WInt) |
566 | return Match; |
567 | |
568 | return WInt == PromoArg ? Match : NoMatch; |
569 | } |
570 | |
571 | case CPointerTy: |
572 | if (argTy->isVoidPointerType()) { |
573 | return Match; |
574 | } if (argTy->isPointerType() || argTy->isObjCObjectPointerType() || |
575 | argTy->isBlockPointerType() || argTy->isNullPtrType()) { |
576 | return NoMatchPedantic; |
577 | } else { |
578 | return NoMatch; |
579 | } |
580 | |
581 | case ObjCPointerTy: { |
582 | if (argTy->getAs<ObjCObjectPointerType>() || |
583 | argTy->getAs<BlockPointerType>()) |
584 | return Match; |
585 | |
586 | // Handle implicit toll-free bridging. |
587 | if (const PointerType *PT = argTy->getAs<PointerType>()) { |
588 | // Things such as CFTypeRef are really just opaque pointers |
589 | // to C structs representing CF types that can often be bridged |
590 | // to Objective-C objects. Since the compiler doesn't know which |
591 | // structs can be toll-free bridged, we just accept them all. |
592 | QualType pointee = PT->getPointeeType(); |
593 | if (pointee->getAsStructureType() || pointee->isVoidType()) |
594 | return Match; |
595 | } |
596 | return NoMatch; |
597 | } |
598 | } |
599 | |
600 | llvm_unreachable("Invalid ArgType Kind!" ); |
601 | } |
602 | |
603 | ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const { |
604 | // Check for valid vector element types. |
605 | if (T.isNull()) |
606 | return ArgType::Invalid(); |
607 | |
608 | QualType Vec = C.getExtVectorType(T, NumElts); |
609 | return ArgType(Vec, Name); |
610 | } |
611 | |
612 | QualType ArgType::getRepresentativeType(ASTContext &C) const { |
613 | QualType Res; |
614 | switch (K) { |
615 | case InvalidTy: |
616 | llvm_unreachable("No representative type for Invalid ArgType" ); |
617 | case UnknownTy: |
618 | llvm_unreachable("No representative type for Unknown ArgType" ); |
619 | case AnyCharTy: |
620 | Res = C.CharTy; |
621 | break; |
622 | case SpecificTy: |
623 | Res = T; |
624 | break; |
625 | case CStrTy: |
626 | Res = C.getPointerType(C.CharTy); |
627 | break; |
628 | case WCStrTy: |
629 | Res = C.getPointerType(T: C.getWideCharType()); |
630 | break; |
631 | case ObjCPointerTy: |
632 | Res = C.ObjCBuiltinIdTy; |
633 | break; |
634 | case CPointerTy: |
635 | Res = C.VoidPtrTy; |
636 | break; |
637 | case WIntTy: { |
638 | Res = C.getWIntType(); |
639 | break; |
640 | } |
641 | } |
642 | |
643 | if (Ptr) |
644 | Res = C.getPointerType(T: Res); |
645 | return Res; |
646 | } |
647 | |
648 | std::string ArgType::getRepresentativeTypeName(ASTContext &C) const { |
649 | std::string S = getRepresentativeType(C).getAsString(Policy: C.getPrintingPolicy()); |
650 | |
651 | std::string Alias; |
652 | if (Name) { |
653 | // Use a specific name for this type, e.g. "size_t". |
654 | Alias = Name; |
655 | if (Ptr) { |
656 | // If ArgType is actually a pointer to T, append an asterisk. |
657 | Alias += (Alias[Alias.size()-1] == '*') ? "*" : " *" ; |
658 | } |
659 | // If Alias is the same as the underlying type, e.g. wchar_t, then drop it. |
660 | if (S == Alias) |
661 | Alias.clear(); |
662 | } |
663 | |
664 | if (!Alias.empty()) |
665 | return std::string("'" ) + Alias + "' (aka '" + S + "')" ; |
666 | return std::string("'" ) + S + "'" ; |
667 | } |
668 | |
669 | |
670 | //===----------------------------------------------------------------------===// |
671 | // Methods on OptionalAmount. |
672 | //===----------------------------------------------------------------------===// |
673 | |
674 | ArgType |
675 | analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { |
676 | return Ctx.IntTy; |
677 | } |
678 | |
679 | //===----------------------------------------------------------------------===// |
680 | // Methods on LengthModifier. |
681 | //===----------------------------------------------------------------------===// |
682 | |
683 | const char * |
684 | analyze_format_string::LengthModifier::toString() const { |
685 | switch (kind) { |
686 | case AsChar: |
687 | return "hh" ; |
688 | case AsShort: |
689 | return "h" ; |
690 | case AsShortLong: |
691 | return "hl" ; |
692 | case AsLong: // or AsWideChar |
693 | return "l" ; |
694 | case AsLongLong: |
695 | return "ll" ; |
696 | case AsQuad: |
697 | return "q" ; |
698 | case AsIntMax: |
699 | return "j" ; |
700 | case AsSizeT: |
701 | return "z" ; |
702 | case AsPtrDiff: |
703 | return "t" ; |
704 | case AsInt32: |
705 | return "I32" ; |
706 | case AsInt3264: |
707 | return "I" ; |
708 | case AsInt64: |
709 | return "I64" ; |
710 | case AsLongDouble: |
711 | return "L" ; |
712 | case AsAllocate: |
713 | return "a" ; |
714 | case AsMAllocate: |
715 | return "m" ; |
716 | case AsWide: |
717 | return "w" ; |
718 | case None: |
719 | return "" ; |
720 | } |
721 | return nullptr; |
722 | } |
723 | |
724 | //===----------------------------------------------------------------------===// |
725 | // Methods on ConversionSpecifier. |
726 | //===----------------------------------------------------------------------===// |
727 | |
728 | const char *ConversionSpecifier::toString() const { |
729 | switch (kind) { |
730 | case bArg: return "b" ; |
731 | case BArg: return "B" ; |
732 | case dArg: return "d" ; |
733 | case DArg: return "D" ; |
734 | case iArg: return "i" ; |
735 | case oArg: return "o" ; |
736 | case OArg: return "O" ; |
737 | case uArg: return "u" ; |
738 | case UArg: return "U" ; |
739 | case xArg: return "x" ; |
740 | case XArg: return "X" ; |
741 | case fArg: return "f" ; |
742 | case FArg: return "F" ; |
743 | case eArg: return "e" ; |
744 | case EArg: return "E" ; |
745 | case gArg: return "g" ; |
746 | case GArg: return "G" ; |
747 | case aArg: return "a" ; |
748 | case AArg: return "A" ; |
749 | case cArg: return "c" ; |
750 | case sArg: return "s" ; |
751 | case pArg: return "p" ; |
752 | case PArg: |
753 | return "P" ; |
754 | case nArg: return "n" ; |
755 | case PercentArg: return "%" ; |
756 | case ScanListArg: return "[" ; |
757 | case InvalidSpecifier: return nullptr; |
758 | |
759 | // POSIX unicode extensions. |
760 | case CArg: return "C" ; |
761 | case SArg: return "S" ; |
762 | |
763 | // Objective-C specific specifiers. |
764 | case ObjCObjArg: return "@" ; |
765 | |
766 | // FreeBSD kernel specific specifiers. |
767 | case FreeBSDbArg: return "b" ; |
768 | case FreeBSDDArg: return "D" ; |
769 | case FreeBSDrArg: return "r" ; |
770 | case FreeBSDyArg: return "y" ; |
771 | |
772 | // GlibC specific specifiers. |
773 | case PrintErrno: return "m" ; |
774 | |
775 | // MS specific specifiers. |
776 | case ZArg: return "Z" ; |
777 | |
778 | // ISO/IEC TR 18037 (fixed-point) specific specifiers. |
779 | case rArg: |
780 | return "r" ; |
781 | case RArg: |
782 | return "R" ; |
783 | case kArg: |
784 | return "k" ; |
785 | case KArg: |
786 | return "K" ; |
787 | } |
788 | return nullptr; |
789 | } |
790 | |
791 | std::optional<ConversionSpecifier> |
792 | ConversionSpecifier::getStandardSpecifier() const { |
793 | ConversionSpecifier::Kind NewKind; |
794 | |
795 | switch (getKind()) { |
796 | default: |
797 | return std::nullopt; |
798 | case DArg: |
799 | NewKind = dArg; |
800 | break; |
801 | case UArg: |
802 | NewKind = uArg; |
803 | break; |
804 | case OArg: |
805 | NewKind = oArg; |
806 | break; |
807 | } |
808 | |
809 | ConversionSpecifier FixedCS(*this); |
810 | FixedCS.setKind(NewKind); |
811 | return FixedCS; |
812 | } |
813 | |
814 | //===----------------------------------------------------------------------===// |
815 | // Methods on OptionalAmount. |
816 | //===----------------------------------------------------------------------===// |
817 | |
818 | void OptionalAmount::toString(raw_ostream &os) const { |
819 | switch (hs) { |
820 | case Invalid: |
821 | case NotSpecified: |
822 | return; |
823 | case Arg: |
824 | if (UsesDotPrefix) |
825 | os << "." ; |
826 | if (usesPositionalArg()) |
827 | os << "*" << getPositionalArgIndex() << "$" ; |
828 | else |
829 | os << "*" ; |
830 | break; |
831 | case Constant: |
832 | if (UsesDotPrefix) |
833 | os << "." ; |
834 | os << amt; |
835 | break; |
836 | } |
837 | } |
838 | |
839 | bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target, |
840 | const LangOptions &LO) const { |
841 | switch (LM.getKind()) { |
842 | case LengthModifier::None: |
843 | return true; |
844 | |
845 | // Handle most integer flags |
846 | case LengthModifier::AsShort: |
847 | // Length modifier only applies to FP vectors. |
848 | if (LO.OpenCL && CS.isDoubleArg()) |
849 | return !VectorNumElts.isInvalid(); |
850 | |
851 | if (CS.isFixedPointArg()) |
852 | return true; |
853 | |
854 | if (Target.getTriple().isOSMSVCRT()) { |
855 | switch (CS.getKind()) { |
856 | case ConversionSpecifier::cArg: |
857 | case ConversionSpecifier::CArg: |
858 | case ConversionSpecifier::sArg: |
859 | case ConversionSpecifier::SArg: |
860 | case ConversionSpecifier::ZArg: |
861 | return true; |
862 | default: |
863 | break; |
864 | } |
865 | } |
866 | [[fallthrough]]; |
867 | case LengthModifier::AsChar: |
868 | case LengthModifier::AsLongLong: |
869 | case LengthModifier::AsQuad: |
870 | case LengthModifier::AsIntMax: |
871 | case LengthModifier::AsSizeT: |
872 | case LengthModifier::AsPtrDiff: |
873 | switch (CS.getKind()) { |
874 | case ConversionSpecifier::bArg: |
875 | case ConversionSpecifier::BArg: |
876 | case ConversionSpecifier::dArg: |
877 | case ConversionSpecifier::DArg: |
878 | case ConversionSpecifier::iArg: |
879 | case ConversionSpecifier::oArg: |
880 | case ConversionSpecifier::OArg: |
881 | case ConversionSpecifier::uArg: |
882 | case ConversionSpecifier::UArg: |
883 | case ConversionSpecifier::xArg: |
884 | case ConversionSpecifier::XArg: |
885 | case ConversionSpecifier::nArg: |
886 | return true; |
887 | case ConversionSpecifier::FreeBSDrArg: |
888 | case ConversionSpecifier::FreeBSDyArg: |
889 | return Target.getTriple().isOSFreeBSD() || Target.getTriple().isPS(); |
890 | default: |
891 | return false; |
892 | } |
893 | |
894 | case LengthModifier::AsShortLong: |
895 | return LO.OpenCL && !VectorNumElts.isInvalid(); |
896 | |
897 | // Handle 'l' flag |
898 | case LengthModifier::AsLong: // or AsWideChar |
899 | if (CS.isDoubleArg()) { |
900 | // Invalid for OpenCL FP scalars. |
901 | if (LO.OpenCL && VectorNumElts.isInvalid()) |
902 | return false; |
903 | return true; |
904 | } |
905 | |
906 | if (CS.isFixedPointArg()) |
907 | return true; |
908 | |
909 | switch (CS.getKind()) { |
910 | case ConversionSpecifier::bArg: |
911 | case ConversionSpecifier::BArg: |
912 | case ConversionSpecifier::dArg: |
913 | case ConversionSpecifier::DArg: |
914 | case ConversionSpecifier::iArg: |
915 | case ConversionSpecifier::oArg: |
916 | case ConversionSpecifier::OArg: |
917 | case ConversionSpecifier::uArg: |
918 | case ConversionSpecifier::UArg: |
919 | case ConversionSpecifier::xArg: |
920 | case ConversionSpecifier::XArg: |
921 | case ConversionSpecifier::nArg: |
922 | case ConversionSpecifier::cArg: |
923 | case ConversionSpecifier::sArg: |
924 | case ConversionSpecifier::ScanListArg: |
925 | case ConversionSpecifier::ZArg: |
926 | return true; |
927 | case ConversionSpecifier::FreeBSDrArg: |
928 | case ConversionSpecifier::FreeBSDyArg: |
929 | return Target.getTriple().isOSFreeBSD() || Target.getTriple().isPS(); |
930 | default: |
931 | return false; |
932 | } |
933 | |
934 | case LengthModifier::AsLongDouble: |
935 | switch (CS.getKind()) { |
936 | case ConversionSpecifier::aArg: |
937 | case ConversionSpecifier::AArg: |
938 | case ConversionSpecifier::fArg: |
939 | case ConversionSpecifier::FArg: |
940 | case ConversionSpecifier::eArg: |
941 | case ConversionSpecifier::EArg: |
942 | case ConversionSpecifier::gArg: |
943 | case ConversionSpecifier::GArg: |
944 | return true; |
945 | // GNU libc extension. |
946 | case ConversionSpecifier::dArg: |
947 | case ConversionSpecifier::iArg: |
948 | case ConversionSpecifier::oArg: |
949 | case ConversionSpecifier::uArg: |
950 | case ConversionSpecifier::xArg: |
951 | case ConversionSpecifier::XArg: |
952 | return !Target.getTriple().isOSDarwin() && |
953 | !Target.getTriple().isOSWindows(); |
954 | default: |
955 | return false; |
956 | } |
957 | |
958 | case LengthModifier::AsAllocate: |
959 | switch (CS.getKind()) { |
960 | case ConversionSpecifier::sArg: |
961 | case ConversionSpecifier::SArg: |
962 | case ConversionSpecifier::ScanListArg: |
963 | return true; |
964 | default: |
965 | return false; |
966 | } |
967 | |
968 | case LengthModifier::AsMAllocate: |
969 | switch (CS.getKind()) { |
970 | case ConversionSpecifier::cArg: |
971 | case ConversionSpecifier::CArg: |
972 | case ConversionSpecifier::sArg: |
973 | case ConversionSpecifier::SArg: |
974 | case ConversionSpecifier::ScanListArg: |
975 | return true; |
976 | default: |
977 | return false; |
978 | } |
979 | case LengthModifier::AsInt32: |
980 | case LengthModifier::AsInt3264: |
981 | case LengthModifier::AsInt64: |
982 | switch (CS.getKind()) { |
983 | case ConversionSpecifier::dArg: |
984 | case ConversionSpecifier::iArg: |
985 | case ConversionSpecifier::oArg: |
986 | case ConversionSpecifier::uArg: |
987 | case ConversionSpecifier::xArg: |
988 | case ConversionSpecifier::XArg: |
989 | return Target.getTriple().isOSMSVCRT(); |
990 | default: |
991 | return false; |
992 | } |
993 | case LengthModifier::AsWide: |
994 | switch (CS.getKind()) { |
995 | case ConversionSpecifier::cArg: |
996 | case ConversionSpecifier::CArg: |
997 | case ConversionSpecifier::sArg: |
998 | case ConversionSpecifier::SArg: |
999 | case ConversionSpecifier::ZArg: |
1000 | return Target.getTriple().isOSMSVCRT(); |
1001 | default: |
1002 | return false; |
1003 | } |
1004 | } |
1005 | llvm_unreachable("Invalid LengthModifier Kind!" ); |
1006 | } |
1007 | |
1008 | bool FormatSpecifier::hasStandardLengthModifier() const { |
1009 | switch (LM.getKind()) { |
1010 | case LengthModifier::None: |
1011 | case LengthModifier::AsChar: |
1012 | case LengthModifier::AsShort: |
1013 | case LengthModifier::AsLong: |
1014 | case LengthModifier::AsLongLong: |
1015 | case LengthModifier::AsIntMax: |
1016 | case LengthModifier::AsSizeT: |
1017 | case LengthModifier::AsPtrDiff: |
1018 | case LengthModifier::AsLongDouble: |
1019 | return true; |
1020 | case LengthModifier::AsAllocate: |
1021 | case LengthModifier::AsMAllocate: |
1022 | case LengthModifier::AsQuad: |
1023 | case LengthModifier::AsInt32: |
1024 | case LengthModifier::AsInt3264: |
1025 | case LengthModifier::AsInt64: |
1026 | case LengthModifier::AsWide: |
1027 | case LengthModifier::AsShortLong: // ??? |
1028 | return false; |
1029 | } |
1030 | llvm_unreachable("Invalid LengthModifier Kind!" ); |
1031 | } |
1032 | |
1033 | bool FormatSpecifier::hasStandardConversionSpecifier( |
1034 | const LangOptions &LangOpt) const { |
1035 | switch (CS.getKind()) { |
1036 | case ConversionSpecifier::bArg: |
1037 | case ConversionSpecifier::BArg: |
1038 | case ConversionSpecifier::cArg: |
1039 | case ConversionSpecifier::dArg: |
1040 | case ConversionSpecifier::iArg: |
1041 | case ConversionSpecifier::oArg: |
1042 | case ConversionSpecifier::uArg: |
1043 | case ConversionSpecifier::xArg: |
1044 | case ConversionSpecifier::XArg: |
1045 | case ConversionSpecifier::fArg: |
1046 | case ConversionSpecifier::FArg: |
1047 | case ConversionSpecifier::eArg: |
1048 | case ConversionSpecifier::EArg: |
1049 | case ConversionSpecifier::gArg: |
1050 | case ConversionSpecifier::GArg: |
1051 | case ConversionSpecifier::aArg: |
1052 | case ConversionSpecifier::AArg: |
1053 | case ConversionSpecifier::sArg: |
1054 | case ConversionSpecifier::pArg: |
1055 | case ConversionSpecifier::nArg: |
1056 | case ConversionSpecifier::ObjCObjArg: |
1057 | case ConversionSpecifier::ScanListArg: |
1058 | case ConversionSpecifier::PercentArg: |
1059 | case ConversionSpecifier::PArg: |
1060 | return true; |
1061 | case ConversionSpecifier::CArg: |
1062 | case ConversionSpecifier::SArg: |
1063 | return LangOpt.ObjC; |
1064 | case ConversionSpecifier::InvalidSpecifier: |
1065 | case ConversionSpecifier::FreeBSDbArg: |
1066 | case ConversionSpecifier::FreeBSDDArg: |
1067 | case ConversionSpecifier::FreeBSDrArg: |
1068 | case ConversionSpecifier::FreeBSDyArg: |
1069 | case ConversionSpecifier::PrintErrno: |
1070 | case ConversionSpecifier::DArg: |
1071 | case ConversionSpecifier::OArg: |
1072 | case ConversionSpecifier::UArg: |
1073 | case ConversionSpecifier::ZArg: |
1074 | return false; |
1075 | case ConversionSpecifier::rArg: |
1076 | case ConversionSpecifier::RArg: |
1077 | case ConversionSpecifier::kArg: |
1078 | case ConversionSpecifier::KArg: |
1079 | return LangOpt.FixedPoint; |
1080 | } |
1081 | llvm_unreachable("Invalid ConversionSpecifier Kind!" ); |
1082 | } |
1083 | |
1084 | bool FormatSpecifier::hasStandardLengthConversionCombination() const { |
1085 | if (LM.getKind() == LengthModifier::AsLongDouble) { |
1086 | switch(CS.getKind()) { |
1087 | case ConversionSpecifier::dArg: |
1088 | case ConversionSpecifier::iArg: |
1089 | case ConversionSpecifier::oArg: |
1090 | case ConversionSpecifier::uArg: |
1091 | case ConversionSpecifier::xArg: |
1092 | case ConversionSpecifier::XArg: |
1093 | return false; |
1094 | default: |
1095 | return true; |
1096 | } |
1097 | } |
1098 | return true; |
1099 | } |
1100 | |
1101 | std::optional<LengthModifier> |
1102 | FormatSpecifier::getCorrectedLengthModifier() const { |
1103 | if (CS.isAnyIntArg() || CS.getKind() == ConversionSpecifier::nArg) { |
1104 | if (LM.getKind() == LengthModifier::AsLongDouble || |
1105 | LM.getKind() == LengthModifier::AsQuad) { |
1106 | LengthModifier FixedLM(LM); |
1107 | FixedLM.setKind(LengthModifier::AsLongLong); |
1108 | return FixedLM; |
1109 | } |
1110 | } |
1111 | |
1112 | return std::nullopt; |
1113 | } |
1114 | |
1115 | bool FormatSpecifier::namedTypeToLengthModifier(QualType QT, |
1116 | LengthModifier &LM) { |
1117 | for (/**/; const auto *TT = QT->getAs<TypedefType>(); |
1118 | QT = TT->getDecl()->getUnderlyingType()) { |
1119 | const TypedefNameDecl *Typedef = TT->getDecl(); |
1120 | const IdentifierInfo *Identifier = Typedef->getIdentifier(); |
1121 | if (Identifier->getName() == "size_t" ) { |
1122 | LM.setKind(LengthModifier::AsSizeT); |
1123 | return true; |
1124 | } else if (Identifier->getName() == "ssize_t" ) { |
1125 | // Not C99, but common in Unix. |
1126 | LM.setKind(LengthModifier::AsSizeT); |
1127 | return true; |
1128 | } else if (Identifier->getName() == "intmax_t" ) { |
1129 | LM.setKind(LengthModifier::AsIntMax); |
1130 | return true; |
1131 | } else if (Identifier->getName() == "uintmax_t" ) { |
1132 | LM.setKind(LengthModifier::AsIntMax); |
1133 | return true; |
1134 | } else if (Identifier->getName() == "ptrdiff_t" ) { |
1135 | LM.setKind(LengthModifier::AsPtrDiff); |
1136 | return true; |
1137 | } |
1138 | } |
1139 | return false; |
1140 | } |
1141 | |