1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handling of format string in printf and friends. The structure of format
10// strings for fprintf() are described in C99 7.19.6.1.
11//
12//===----------------------------------------------------------------------===//
13
14#include "FormatStringParsing.h"
15#include "clang/AST/FormatString.h"
16#include "clang/AST/OSLog.h"
17#include "clang/Basic/TargetInfo.h"
18#include "llvm/Support/Regex.h"
19
20using clang::analyze_format_string::ArgType;
21using clang::analyze_format_string::FormatStringHandler;
22using clang::analyze_format_string::LengthModifier;
23using clang::analyze_format_string::OptionalAmount;
24using clang::analyze_format_string::ConversionSpecifier;
25using clang::analyze_printf::PrintfSpecifier;
26
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
30 PrintfSpecifierResult;
31
32//===----------------------------------------------------------------------===//
33// Methods for parsing format strings.
34//===----------------------------------------------------------------------===//
35
36using analyze_format_string::ParseNonPositionAmount;
37
38static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
39 const char *Start, const char *&Beg, const char *E,
40 unsigned *argIndex) {
41 if (argIndex) {
42 FS.setPrecision(ParseNonPositionAmount(Beg, E, argIndex&: *argIndex));
43 } else {
44 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
45 p: analyze_format_string::PrecisionPos);
46 if (Amt.isInvalid())
47 return true;
48 FS.setPrecision(Amt);
49 }
50 return false;
51}
52
53static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS,
54 const char *FlagBeg, const char *E, bool Warn) {
55 StringRef Flag(FlagBeg, E - FlagBeg);
56 // Currently there is only one flag.
57 if (Flag == "tt") {
58 FS.setHasObjCTechnicalTerm(FlagBeg);
59 return false;
60 }
61 // Handle either the case of no flag or an invalid flag.
62 if (Warn) {
63 if (Flag == "")
64 H.HandleEmptyObjCModifierFlag(startFlags: FlagBeg, flagsLen: E - FlagBeg);
65 else
66 H.HandleInvalidObjCModifierFlag(startFlag: FlagBeg, flagLen: E - FlagBeg);
67 }
68 return true;
69}
70
71static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
72 const char *&Beg,
73 const char *E,
74 unsigned &argIndex,
75 const LangOptions &LO,
76 const TargetInfo &Target,
77 bool Warn,
78 bool isFreeBSDKPrintf) {
79
80 using namespace clang::analyze_format_string;
81 using namespace clang::analyze_printf;
82
83 const char *I = Beg;
84 const char *Start = nullptr;
85 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86
87 // Look for a '%' character that indicates the start of a format specifier.
88 for ( ; I != E ; ++I) {
89 char c = *I;
90 if (c == '\0') {
91 // Detect spurious null characters, which are likely errors.
92 H.HandleNullChar(nullCharacter: I);
93 return true;
94 }
95 if (c == '%') {
96 Start = I++; // Record the start of the format specifier.
97 break;
98 }
99 }
100
101 // No format specifier found?
102 if (!Start)
103 return false;
104
105 if (I == E) {
106 // No more characters left?
107 if (Warn)
108 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
109 return true;
110 }
111
112 PrintfSpecifier FS;
113 if (ParseArgPosition(H, CS&: FS, Start, Beg&: I, E))
114 return true;
115
116 if (I == E) {
117 // No more characters left?
118 if (Warn)
119 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
120 return true;
121 }
122
123 if (*I == '{') {
124 ++I;
125 unsigned char PrivacyFlags = 0;
126 StringRef MatchedStr;
127
128 do {
129 StringRef Str(I, E - I);
130 std::string Match = "^[[:space:]]*"
131 "(private|public|sensitive|mask\\.[^[:space:],}]*)"
132 "[[:space:]]*(,|})";
133 llvm::Regex R(Match);
134 SmallVector<StringRef, 2> Matches;
135
136 if (R.match(String: Str, Matches: &Matches)) {
137 MatchedStr = Matches[1];
138 I += Matches[0].size();
139
140 // Set the privacy flag if the privacy annotation in the
141 // comma-delimited segment is at least as strict as the privacy
142 // annotations in previous comma-delimited segments.
143 if (MatchedStr.starts_with(Prefix: "mask")) {
144 StringRef MaskType = MatchedStr.substr(Start: sizeof("mask.") - 1);
145 unsigned Size = MaskType.size();
146 if (Warn && (Size == 0 || Size > 8))
147 H.handleInvalidMaskType(MaskType);
148 FS.setMaskType(MaskType);
149 } else if (MatchedStr.equals(RHS: "sensitive"))
150 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive;
151 else if (PrivacyFlags !=
152 clang::analyze_os_log::OSLogBufferItem::IsSensitive &&
153 MatchedStr.equals(RHS: "private"))
154 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate;
155 else if (PrivacyFlags == 0 && MatchedStr.equals(RHS: "public"))
156 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic;
157 } else {
158 size_t CommaOrBracePos =
159 Str.find_if(F: [](char c) { return c == ',' || c == '}'; });
160
161 if (CommaOrBracePos == StringRef::npos) {
162 // Neither a comma nor the closing brace was found.
163 if (Warn)
164 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
165 return true;
166 }
167
168 I += CommaOrBracePos + 1;
169 }
170 // Continue until the closing brace is found.
171 } while (*(I - 1) == ',');
172
173 // Set the privacy flag.
174 switch (PrivacyFlags) {
175 case 0:
176 break;
177 case clang::analyze_os_log::OSLogBufferItem::IsPrivate:
178 FS.setIsPrivate(MatchedStr.data());
179 break;
180 case clang::analyze_os_log::OSLogBufferItem::IsPublic:
181 FS.setIsPublic(MatchedStr.data());
182 break;
183 case clang::analyze_os_log::OSLogBufferItem::IsSensitive:
184 FS.setIsSensitive(MatchedStr.data());
185 break;
186 default:
187 llvm_unreachable("Unexpected privacy flag value");
188 }
189 }
190
191 // Look for flags (if any).
192 bool hasMore = true;
193 for ( ; I != E; ++I) {
194 switch (*I) {
195 default: hasMore = false; break;
196 case '\'':
197 // FIXME: POSIX specific. Always accept?
198 FS.setHasThousandsGrouping(I);
199 break;
200 case '-': FS.setIsLeftJustified(I); break;
201 case '+': FS.setHasPlusPrefix(I); break;
202 case ' ': FS.setHasSpacePrefix(I); break;
203 case '#': FS.setHasAlternativeForm(I); break;
204 case '0': FS.setHasLeadingZeros(I); break;
205 }
206 if (!hasMore)
207 break;
208 }
209
210 if (I == E) {
211 // No more characters left?
212 if (Warn)
213 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
214 return true;
215 }
216
217 // Look for the field width (if any).
218 if (ParseFieldWidth(H, CS&: FS, Start, Beg&: I, E,
219 argIndex: FS.usesPositionalArg() ? nullptr : &argIndex))
220 return true;
221
222 if (I == E) {
223 // No more characters left?
224 if (Warn)
225 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
226 return true;
227 }
228
229 // Look for the precision (if any).
230 if (*I == '.') {
231 ++I;
232 if (I == E) {
233 if (Warn)
234 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
235 return true;
236 }
237
238 if (ParsePrecision(H, FS, Start, Beg&: I, E,
239 argIndex: FS.usesPositionalArg() ? nullptr : &argIndex))
240 return true;
241
242 if (I == E) {
243 // No more characters left?
244 if (Warn)
245 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
246 return true;
247 }
248 }
249
250 if (ParseVectorModifier(H, FS, Beg&: I, E, LO))
251 return true;
252
253 // Look for the length modifier.
254 if (ParseLengthModifier(FS, Beg&: I, E, LO) && I == E) {
255 // No more characters left?
256 if (Warn)
257 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
258 return true;
259 }
260
261 // Look for the Objective-C modifier flags, if any.
262 // We parse these here, even if they don't apply to
263 // the conversion specifier, and then emit an error
264 // later if the conversion specifier isn't '@'. This
265 // enables better recovery, and we don't know if
266 // these flags are applicable until later.
267 const char *ObjCModifierFlagsStart = nullptr,
268 *ObjCModifierFlagsEnd = nullptr;
269 if (*I == '[') {
270 ObjCModifierFlagsStart = I;
271 ++I;
272 auto flagStart = I;
273 for (;; ++I) {
274 ObjCModifierFlagsEnd = I;
275 if (I == E) {
276 if (Warn)
277 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
278 return true;
279 }
280 // Did we find the closing ']'?
281 if (*I == ']') {
282 if (ParseObjCFlags(H, FS, FlagBeg: flagStart, E: I, Warn))
283 return true;
284 ++I;
285 break;
286 }
287 // There are no separators defined yet for multiple
288 // Objective-C modifier flags. When those are
289 // defined, this is the place to check.
290 }
291 }
292
293 if (*I == '\0') {
294 // Detect spurious null characters, which are likely errors.
295 H.HandleNullChar(nullCharacter: I);
296 return true;
297 }
298
299 // Finally, look for the conversion specifier.
300 const char *conversionPosition = I++;
301 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
302 switch (*conversionPosition) {
303 default:
304 break;
305 // C99: 7.19.6.1 (section 8).
306 case '%': k = ConversionSpecifier::PercentArg; break;
307 case 'A': k = ConversionSpecifier::AArg; break;
308 case 'E': k = ConversionSpecifier::EArg; break;
309 case 'F': k = ConversionSpecifier::FArg; break;
310 case 'G': k = ConversionSpecifier::GArg; break;
311 case 'X': k = ConversionSpecifier::XArg; break;
312 case 'a': k = ConversionSpecifier::aArg; break;
313 case 'c': k = ConversionSpecifier::cArg; break;
314 case 'd': k = ConversionSpecifier::dArg; break;
315 case 'e': k = ConversionSpecifier::eArg; break;
316 case 'f': k = ConversionSpecifier::fArg; break;
317 case 'g': k = ConversionSpecifier::gArg; break;
318 case 'i': k = ConversionSpecifier::iArg; break;
319 case 'n':
320 // Not handled, but reserved in OpenCL.
321 if (!LO.OpenCL)
322 k = ConversionSpecifier::nArg;
323 break;
324 case 'o': k = ConversionSpecifier::oArg; break;
325 case 'p': k = ConversionSpecifier::pArg; break;
326 case 's': k = ConversionSpecifier::sArg; break;
327 case 'u': k = ConversionSpecifier::uArg; break;
328 case 'x': k = ConversionSpecifier::xArg; break;
329 // C23.
330 case 'b':
331 if (isFreeBSDKPrintf)
332 k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
333 else
334 k = ConversionSpecifier::bArg;
335 break;
336 case 'B': k = ConversionSpecifier::BArg; break;
337 // POSIX specific.
338 case 'C': k = ConversionSpecifier::CArg; break;
339 case 'S': k = ConversionSpecifier::SArg; break;
340 // Apple extension for os_log
341 case 'P':
342 k = ConversionSpecifier::PArg;
343 break;
344 // Objective-C.
345 case '@': k = ConversionSpecifier::ObjCObjArg; break;
346 // Glibc specific.
347 case 'm': k = ConversionSpecifier::PrintErrno; break;
348 case 'r':
349 if (isFreeBSDKPrintf)
350 k = ConversionSpecifier::FreeBSDrArg; // int
351 else if (LO.FixedPoint)
352 k = ConversionSpecifier::rArg;
353 break;
354 case 'y':
355 if (isFreeBSDKPrintf)
356 k = ConversionSpecifier::FreeBSDyArg; // int
357 break;
358 // Apple-specific.
359 case 'D':
360 if (isFreeBSDKPrintf)
361 k = ConversionSpecifier::FreeBSDDArg; // void * followed by char *
362 else if (Target.getTriple().isOSDarwin())
363 k = ConversionSpecifier::DArg;
364 break;
365 case 'O':
366 if (Target.getTriple().isOSDarwin())
367 k = ConversionSpecifier::OArg;
368 break;
369 case 'U':
370 if (Target.getTriple().isOSDarwin())
371 k = ConversionSpecifier::UArg;
372 break;
373 // MS specific.
374 case 'Z':
375 if (Target.getTriple().isOSMSVCRT())
376 k = ConversionSpecifier::ZArg;
377 break;
378 // ISO/IEC TR 18037 (fixed-point) specific.
379 // NOTE: 'r' is handled up above since FreeBSD also supports %r.
380 case 'k':
381 if (LO.FixedPoint)
382 k = ConversionSpecifier::kArg;
383 break;
384 case 'K':
385 if (LO.FixedPoint)
386 k = ConversionSpecifier::KArg;
387 break;
388 case 'R':
389 if (LO.FixedPoint)
390 k = ConversionSpecifier::RArg;
391 break;
392 }
393
394 // Check to see if we used the Objective-C modifier flags with
395 // a conversion specifier other than '@'.
396 if (k != ConversionSpecifier::ObjCObjArg &&
397 k != ConversionSpecifier::InvalidSpecifier &&
398 ObjCModifierFlagsStart) {
399 H.HandleObjCFlagsWithNonObjCConversion(flagsStart: ObjCModifierFlagsStart,
400 flagsEnd: ObjCModifierFlagsEnd + 1,
401 conversionPosition);
402 return true;
403 }
404
405 PrintfConversionSpecifier CS(conversionPosition, k);
406 FS.setConversionSpecifier(CS);
407 if (CS.consumesDataArgument() && !FS.usesPositionalArg())
408 FS.setArgIndex(argIndex++);
409 // FreeBSD kernel specific.
410 if (k == ConversionSpecifier::FreeBSDbArg ||
411 k == ConversionSpecifier::FreeBSDDArg)
412 argIndex++;
413
414 if (k == ConversionSpecifier::InvalidSpecifier) {
415 unsigned Len = I - Start;
416 if (ParseUTF8InvalidSpecifier(SpecifierBegin: Start, FmtStrEnd: E, Len)) {
417 CS.setEndScanList(Start + Len);
418 FS.setConversionSpecifier(CS);
419 }
420 // Assume the conversion takes one argument.
421 return !H.HandleInvalidPrintfConversionSpecifier(FS, startSpecifier: Start, specifierLen: Len);
422 }
423 return PrintfSpecifierResult(Start, FS);
424}
425
426bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
427 const char *I,
428 const char *E,
429 const LangOptions &LO,
430 const TargetInfo &Target,
431 bool isFreeBSDKPrintf) {
432
433 unsigned argIndex = 0;
434
435 // Keep looking for a format specifier until we have exhausted the string.
436 while (I != E) {
437 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, Beg&: I, E, argIndex,
438 LO, Target, Warn: true,
439 isFreeBSDKPrintf);
440 // Did a fail-stop error of any kind occur when parsing the specifier?
441 // If so, don't do any more processing.
442 if (FSR.shouldStop())
443 return true;
444 // Did we exhaust the string or encounter an error that
445 // we can recover from?
446 if (!FSR.hasValue())
447 continue;
448 // We have a format specifier. Pass it to the callback.
449 if (!H.HandlePrintfSpecifier(FS: FSR.getValue(), startSpecifier: FSR.getStart(),
450 specifierLen: I - FSR.getStart(), Target))
451 return true;
452 }
453 assert(I == E && "Format string not exhausted");
454 return false;
455}
456
457bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
458 const char *E,
459 const LangOptions &LO,
460 const TargetInfo &Target) {
461
462 unsigned argIndex = 0;
463
464 // Keep looking for a %s format specifier until we have exhausted the string.
465 FormatStringHandler H;
466 while (I != E) {
467 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, Beg&: I, E, argIndex,
468 LO, Target, Warn: false,
469 isFreeBSDKPrintf: false);
470 // Did a fail-stop error of any kind occur when parsing the specifier?
471 // If so, don't do any more processing.
472 if (FSR.shouldStop())
473 return false;
474 // Did we exhaust the string or encounter an error that
475 // we can recover from?
476 if (!FSR.hasValue())
477 continue;
478 const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
479 // Return true if this a %s format specifier.
480 if (FS.getConversionSpecifier().getKind() == ConversionSpecifier::Kind::sArg)
481 return true;
482 }
483 return false;
484}
485
486bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
487 const char *Begin, const char *End, const LangOptions &LO,
488 const TargetInfo &Target) {
489 unsigned ArgIndex = 0;
490 // Keep looking for a formatting specifier until we have exhausted the string.
491 FormatStringHandler H;
492 while (Begin != End) {
493 const PrintfSpecifierResult &FSR =
494 ParsePrintfSpecifier(H, Beg&: Begin, E: End, argIndex&: ArgIndex, LO, Target, Warn: false, isFreeBSDKPrintf: false);
495 if (FSR.shouldStop())
496 break;
497 if (FSR.hasValue())
498 return true;
499 }
500 return false;
501}
502
503//===----------------------------------------------------------------------===//
504// Methods on PrintfSpecifier.
505//===----------------------------------------------------------------------===//
506
507ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
508 bool IsObjCLiteral) const {
509 if (CS.getKind() == ConversionSpecifier::cArg)
510 switch (LM.getKind()) {
511 case LengthModifier::None:
512 return Ctx.IntTy;
513 case LengthModifier::AsLong:
514 case LengthModifier::AsWide:
515 return ArgType(ArgType::WIntTy, "wint_t");
516 case LengthModifier::AsShort:
517 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
518 return Ctx.IntTy;
519 [[fallthrough]];
520 default:
521 return ArgType::Invalid();
522 }
523
524 if (CS.isIntArg())
525 switch (LM.getKind()) {
526 case LengthModifier::AsLongDouble:
527 // GNU extension.
528 return Ctx.LongLongTy;
529 case LengthModifier::None:
530 case LengthModifier::AsShortLong:
531 return Ctx.IntTy;
532 case LengthModifier::AsInt32:
533 return ArgType(Ctx.IntTy, "__int32");
534 case LengthModifier::AsChar:
535 return ArgType::AnyCharTy;
536 case LengthModifier::AsShort: return Ctx.ShortTy;
537 case LengthModifier::AsLong: return Ctx.LongTy;
538 case LengthModifier::AsLongLong:
539 case LengthModifier::AsQuad:
540 return Ctx.LongLongTy;
541 case LengthModifier::AsInt64:
542 return ArgType(Ctx.LongLongTy, "__int64");
543 case LengthModifier::AsIntMax:
544 return ArgType(Ctx.getIntMaxType(), "intmax_t");
545 case LengthModifier::AsSizeT:
546 return ArgType::makeSizeT(A: ArgType(Ctx.getSignedSizeType(), "ssize_t"));
547 case LengthModifier::AsInt3264:
548 return Ctx.getTargetInfo().getTriple().isArch64Bit()
549 ? ArgType(Ctx.LongLongTy, "__int64")
550 : ArgType(Ctx.IntTy, "__int32");
551 case LengthModifier::AsPtrDiff:
552 return ArgType::makePtrdiffT(
553 A: ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
554 case LengthModifier::AsAllocate:
555 case LengthModifier::AsMAllocate:
556 case LengthModifier::AsWide:
557 return ArgType::Invalid();
558 }
559
560 if (CS.isUIntArg())
561 switch (LM.getKind()) {
562 case LengthModifier::AsLongDouble:
563 // GNU extension.
564 return Ctx.UnsignedLongLongTy;
565 case LengthModifier::None:
566 case LengthModifier::AsShortLong:
567 return Ctx.UnsignedIntTy;
568 case LengthModifier::AsInt32:
569 return ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
570 case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
571 case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
572 case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
573 case LengthModifier::AsLongLong:
574 case LengthModifier::AsQuad:
575 return Ctx.UnsignedLongLongTy;
576 case LengthModifier::AsInt64:
577 return ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64");
578 case LengthModifier::AsIntMax:
579 return ArgType(Ctx.getUIntMaxType(), "uintmax_t");
580 case LengthModifier::AsSizeT:
581 return ArgType::makeSizeT(A: ArgType(Ctx.getSizeType(), "size_t"));
582 case LengthModifier::AsInt3264:
583 return Ctx.getTargetInfo().getTriple().isArch64Bit()
584 ? ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")
585 : ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
586 case LengthModifier::AsPtrDiff:
587 return ArgType::makePtrdiffT(
588 A: ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
589 case LengthModifier::AsAllocate:
590 case LengthModifier::AsMAllocate:
591 case LengthModifier::AsWide:
592 return ArgType::Invalid();
593 }
594
595 if (CS.isDoubleArg()) {
596 if (!VectorNumElts.isInvalid()) {
597 switch (LM.getKind()) {
598 case LengthModifier::AsShort:
599 return Ctx.HalfTy;
600 case LengthModifier::AsShortLong:
601 return Ctx.FloatTy;
602 case LengthModifier::AsLong:
603 default:
604 return Ctx.DoubleTy;
605 }
606 }
607
608 if (LM.getKind() == LengthModifier::AsLongDouble)
609 return Ctx.LongDoubleTy;
610 return Ctx.DoubleTy;
611 }
612
613 if (CS.getKind() == ConversionSpecifier::nArg) {
614 switch (LM.getKind()) {
615 case LengthModifier::None:
616 return ArgType::PtrTo(A: Ctx.IntTy);
617 case LengthModifier::AsChar:
618 return ArgType::PtrTo(A: Ctx.SignedCharTy);
619 case LengthModifier::AsShort:
620 return ArgType::PtrTo(A: Ctx.ShortTy);
621 case LengthModifier::AsLong:
622 return ArgType::PtrTo(A: Ctx.LongTy);
623 case LengthModifier::AsLongLong:
624 case LengthModifier::AsQuad:
625 return ArgType::PtrTo(A: Ctx.LongLongTy);
626 case LengthModifier::AsIntMax:
627 return ArgType::PtrTo(A: ArgType(Ctx.getIntMaxType(), "intmax_t"));
628 case LengthModifier::AsSizeT:
629 return ArgType::PtrTo(A: ArgType(Ctx.getSignedSizeType(), "ssize_t"));
630 case LengthModifier::AsPtrDiff:
631 return ArgType::PtrTo(A: ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
632 case LengthModifier::AsLongDouble:
633 return ArgType(); // FIXME: Is this a known extension?
634 case LengthModifier::AsAllocate:
635 case LengthModifier::AsMAllocate:
636 case LengthModifier::AsInt32:
637 case LengthModifier::AsInt3264:
638 case LengthModifier::AsInt64:
639 case LengthModifier::AsWide:
640 return ArgType::Invalid();
641 case LengthModifier::AsShortLong:
642 llvm_unreachable("only used for OpenCL which doesn not handle nArg");
643 }
644 }
645
646 if (CS.isFixedPointArg() && !Ctx.getLangOpts().FixedPoint)
647 return ArgType::Invalid();
648
649 switch (CS.getKind()) {
650 case ConversionSpecifier::sArg:
651 if (LM.getKind() == LengthModifier::AsWideChar) {
652 if (IsObjCLiteral)
653 return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
654 "const unichar *");
655 return ArgType(ArgType::WCStrTy, "wchar_t *");
656 }
657 if (LM.getKind() == LengthModifier::AsWide)
658 return ArgType(ArgType::WCStrTy, "wchar_t *");
659 return ArgType::CStrTy;
660 case ConversionSpecifier::SArg:
661 if (IsObjCLiteral)
662 return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
663 "const unichar *");
664 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
665 LM.getKind() == LengthModifier::AsShort)
666 return ArgType::CStrTy;
667 return ArgType(ArgType::WCStrTy, "wchar_t *");
668 case ConversionSpecifier::CArg:
669 if (IsObjCLiteral)
670 return ArgType(Ctx.UnsignedShortTy, "unichar");
671 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
672 LM.getKind() == LengthModifier::AsShort)
673 return Ctx.IntTy;
674 return ArgType(Ctx.WideCharTy, "wchar_t");
675 case ConversionSpecifier::pArg:
676 case ConversionSpecifier::PArg:
677 return ArgType::CPointerTy;
678 case ConversionSpecifier::ObjCObjArg:
679 return ArgType::ObjCPointerTy;
680 case ConversionSpecifier::kArg:
681 switch (LM.getKind()) {
682 case LengthModifier::None:
683 return Ctx.AccumTy;
684 case LengthModifier::AsShort:
685 return Ctx.ShortAccumTy;
686 case LengthModifier::AsLong:
687 return Ctx.LongAccumTy;
688 default:
689 return ArgType::Invalid();
690 }
691 case ConversionSpecifier::KArg:
692 switch (LM.getKind()) {
693 case LengthModifier::None:
694 return Ctx.UnsignedAccumTy;
695 case LengthModifier::AsShort:
696 return Ctx.UnsignedShortAccumTy;
697 case LengthModifier::AsLong:
698 return Ctx.UnsignedLongAccumTy;
699 default:
700 return ArgType::Invalid();
701 }
702 case ConversionSpecifier::rArg:
703 switch (LM.getKind()) {
704 case LengthModifier::None:
705 return Ctx.FractTy;
706 case LengthModifier::AsShort:
707 return Ctx.ShortFractTy;
708 case LengthModifier::AsLong:
709 return Ctx.LongFractTy;
710 default:
711 return ArgType::Invalid();
712 }
713 case ConversionSpecifier::RArg:
714 switch (LM.getKind()) {
715 case LengthModifier::None:
716 return Ctx.UnsignedFractTy;
717 case LengthModifier::AsShort:
718 return Ctx.UnsignedShortFractTy;
719 case LengthModifier::AsLong:
720 return Ctx.UnsignedLongFractTy;
721 default:
722 return ArgType::Invalid();
723 }
724 default:
725 break;
726 }
727
728 // FIXME: Handle other cases.
729 return ArgType();
730}
731
732
733ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
734 bool IsObjCLiteral) const {
735 const PrintfConversionSpecifier &CS = getConversionSpecifier();
736
737 if (!CS.consumesDataArgument())
738 return ArgType::Invalid();
739
740 ArgType ScalarTy = getScalarArgType(Ctx, IsObjCLiteral);
741 if (!ScalarTy.isValid() || VectorNumElts.isInvalid())
742 return ScalarTy;
743
744 return ScalarTy.makeVectorType(C&: Ctx, NumElts: VectorNumElts.getConstantAmount());
745}
746
747bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
748 ASTContext &Ctx, bool IsObjCLiteral) {
749 // %n is different from other conversion specifiers; don't try to fix it.
750 if (CS.getKind() == ConversionSpecifier::nArg)
751 return false;
752
753 // Handle Objective-C objects first. Note that while the '%@' specifier will
754 // not warn for structure pointer or void pointer arguments (because that's
755 // how CoreFoundation objects are implemented), we only show a fixit for '%@'
756 // if we know it's an object (block, id, class, or __attribute__((NSObject))).
757 if (QT->isObjCRetainableType()) {
758 if (!IsObjCLiteral)
759 return false;
760
761 CS.setKind(ConversionSpecifier::ObjCObjArg);
762
763 // Disable irrelevant flags
764 HasThousandsGrouping = false;
765 HasPlusPrefix = false;
766 HasSpacePrefix = false;
767 HasAlternativeForm = false;
768 HasLeadingZeroes = false;
769 Precision.setHowSpecified(OptionalAmount::NotSpecified);
770 LM.setKind(LengthModifier::None);
771
772 return true;
773 }
774
775 // Handle strings next (char *, wchar_t *)
776 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
777 CS.setKind(ConversionSpecifier::sArg);
778
779 // Disable irrelevant flags
780 HasAlternativeForm = false;
781 HasLeadingZeroes = false;
782
783 // Set the long length modifier for wide characters
784 if (QT->getPointeeType()->isWideCharType())
785 LM.setKind(LengthModifier::AsWideChar);
786 else
787 LM.setKind(LengthModifier::None);
788
789 return true;
790 }
791
792 // If it's an enum, get its underlying type.
793 if (const EnumType *ETy = QT->getAs<EnumType>())
794 QT = ETy->getDecl()->getIntegerType();
795
796 const BuiltinType *BT = QT->getAs<BuiltinType>();
797 if (!BT) {
798 const VectorType *VT = QT->getAs<VectorType>();
799 if (VT) {
800 QT = VT->getElementType();
801 BT = QT->getAs<BuiltinType>();
802 VectorNumElts = OptionalAmount(VT->getNumElements());
803 }
804 }
805
806 // We can only work with builtin types.
807 if (!BT)
808 return false;
809
810 // Set length modifier
811 switch (BT->getKind()) {
812 case BuiltinType::Bool:
813 case BuiltinType::WChar_U:
814 case BuiltinType::WChar_S:
815 case BuiltinType::Char8: // FIXME: Treat like 'char'?
816 case BuiltinType::Char16:
817 case BuiltinType::Char32:
818 case BuiltinType::UInt128:
819 case BuiltinType::Int128:
820 case BuiltinType::Half:
821 case BuiltinType::BFloat16:
822 case BuiltinType::Float16:
823 case BuiltinType::Float128:
824 case BuiltinType::Ibm128:
825 case BuiltinType::ShortAccum:
826 case BuiltinType::Accum:
827 case BuiltinType::LongAccum:
828 case BuiltinType::UShortAccum:
829 case BuiltinType::UAccum:
830 case BuiltinType::ULongAccum:
831 case BuiltinType::ShortFract:
832 case BuiltinType::Fract:
833 case BuiltinType::LongFract:
834 case BuiltinType::UShortFract:
835 case BuiltinType::UFract:
836 case BuiltinType::ULongFract:
837 case BuiltinType::SatShortAccum:
838 case BuiltinType::SatAccum:
839 case BuiltinType::SatLongAccum:
840 case BuiltinType::SatUShortAccum:
841 case BuiltinType::SatUAccum:
842 case BuiltinType::SatULongAccum:
843 case BuiltinType::SatShortFract:
844 case BuiltinType::SatFract:
845 case BuiltinType::SatLongFract:
846 case BuiltinType::SatUShortFract:
847 case BuiltinType::SatUFract:
848 case BuiltinType::SatULongFract:
849 // Various types which are non-trivial to correct.
850 return false;
851
852#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
853 case BuiltinType::Id:
854#include "clang/Basic/OpenCLImageTypes.def"
855#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
856 case BuiltinType::Id:
857#include "clang/Basic/OpenCLExtensionTypes.def"
858#define SVE_TYPE(Name, Id, SingletonId) \
859 case BuiltinType::Id:
860#include "clang/Basic/AArch64SVEACLETypes.def"
861#define PPC_VECTOR_TYPE(Name, Id, Size) \
862 case BuiltinType::Id:
863#include "clang/Basic/PPCTypes.def"
864#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
865#include "clang/Basic/RISCVVTypes.def"
866#define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
867#include "clang/Basic/WebAssemblyReferenceTypes.def"
868#define SIGNED_TYPE(Id, SingletonId)
869#define UNSIGNED_TYPE(Id, SingletonId)
870#define FLOATING_TYPE(Id, SingletonId)
871#define BUILTIN_TYPE(Id, SingletonId) \
872 case BuiltinType::Id:
873#include "clang/AST/BuiltinTypes.def"
874 // Misc other stuff which doesn't make sense here.
875 return false;
876
877 case BuiltinType::UInt:
878 case BuiltinType::Int:
879 case BuiltinType::Float:
880 LM.setKind(VectorNumElts.isInvalid() ?
881 LengthModifier::None : LengthModifier::AsShortLong);
882 break;
883 case BuiltinType::Double:
884 LM.setKind(VectorNumElts.isInvalid() ?
885 LengthModifier::None : LengthModifier::AsLong);
886 break;
887 case BuiltinType::Char_U:
888 case BuiltinType::UChar:
889 case BuiltinType::Char_S:
890 case BuiltinType::SChar:
891 LM.setKind(LengthModifier::AsChar);
892 break;
893
894 case BuiltinType::Short:
895 case BuiltinType::UShort:
896 LM.setKind(LengthModifier::AsShort);
897 break;
898
899 case BuiltinType::Long:
900 case BuiltinType::ULong:
901 LM.setKind(LengthModifier::AsLong);
902 break;
903
904 case BuiltinType::LongLong:
905 case BuiltinType::ULongLong:
906 LM.setKind(LengthModifier::AsLongLong);
907 break;
908
909 case BuiltinType::LongDouble:
910 LM.setKind(LengthModifier::AsLongDouble);
911 break;
912 }
913
914 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
915 if (LangOpt.C99 || LangOpt.CPlusPlus11)
916 namedTypeToLengthModifier(QT, LM);
917
918 // If fixing the length modifier was enough, we might be done.
919 if (hasValidLengthModifier(Target: Ctx.getTargetInfo(), LO: LangOpt)) {
920 // If we're going to offer a fix anyway, make sure the sign matches.
921 switch (CS.getKind()) {
922 case ConversionSpecifier::uArg:
923 case ConversionSpecifier::UArg:
924 if (QT->isSignedIntegerType())
925 CS.setKind(clang::analyze_format_string::ConversionSpecifier::dArg);
926 break;
927 case ConversionSpecifier::dArg:
928 case ConversionSpecifier::DArg:
929 case ConversionSpecifier::iArg:
930 if (QT->isUnsignedIntegerType() && !HasPlusPrefix)
931 CS.setKind(clang::analyze_format_string::ConversionSpecifier::uArg);
932 break;
933 default:
934 // Other specifiers do not have signed/unsigned variants.
935 break;
936 }
937
938 const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral);
939 if (ATR.isValid() && ATR.matchesType(C&: Ctx, argTy: QT))
940 return true;
941 }
942
943 // Set conversion specifier and disable any flags which do not apply to it.
944 // Let typedefs to char fall through to int, as %c is silly for uint8_t.
945 if (!QT->getAs<TypedefType>() && QT->isCharType()) {
946 CS.setKind(ConversionSpecifier::cArg);
947 LM.setKind(LengthModifier::None);
948 Precision.setHowSpecified(OptionalAmount::NotSpecified);
949 HasAlternativeForm = false;
950 HasLeadingZeroes = false;
951 HasPlusPrefix = false;
952 }
953 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
954 else if (QT->isRealFloatingType()) {
955 CS.setKind(ConversionSpecifier::fArg);
956 } else if (QT->isSignedIntegerType()) {
957 CS.setKind(ConversionSpecifier::dArg);
958 HasAlternativeForm = false;
959 } else if (QT->isUnsignedIntegerType()) {
960 CS.setKind(ConversionSpecifier::uArg);
961 HasAlternativeForm = false;
962 HasPlusPrefix = false;
963 } else {
964 llvm_unreachable("Unexpected type");
965 }
966
967 return true;
968}
969
970void PrintfSpecifier::toString(raw_ostream &os) const {
971 // Whilst some features have no defined order, we are using the order
972 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
973 os << "%";
974
975 // Positional args
976 if (usesPositionalArg()) {
977 os << getPositionalArgIndex() << "$";
978 }
979
980 // Conversion flags
981 if (IsLeftJustified) os << "-";
982 if (HasPlusPrefix) os << "+";
983 if (HasSpacePrefix) os << " ";
984 if (HasAlternativeForm) os << "#";
985 if (HasLeadingZeroes) os << "0";
986
987 // Minimum field width
988 FieldWidth.toString(os);
989 // Precision
990 Precision.toString(os);
991
992 // Vector modifier
993 if (!VectorNumElts.isInvalid())
994 os << 'v' << VectorNumElts.getConstantAmount();
995
996 // Length modifier
997 os << LM.toString();
998 // Conversion specifier
999 os << CS.toString();
1000}
1001
1002bool PrintfSpecifier::hasValidPlusPrefix() const {
1003 if (!HasPlusPrefix)
1004 return true;
1005
1006 // The plus prefix only makes sense for signed conversions
1007 switch (CS.getKind()) {
1008 case ConversionSpecifier::dArg:
1009 case ConversionSpecifier::DArg:
1010 case ConversionSpecifier::iArg:
1011 case ConversionSpecifier::fArg:
1012 case ConversionSpecifier::FArg:
1013 case ConversionSpecifier::eArg:
1014 case ConversionSpecifier::EArg:
1015 case ConversionSpecifier::gArg:
1016 case ConversionSpecifier::GArg:
1017 case ConversionSpecifier::aArg:
1018 case ConversionSpecifier::AArg:
1019 case ConversionSpecifier::FreeBSDrArg:
1020 case ConversionSpecifier::FreeBSDyArg:
1021 case ConversionSpecifier::rArg:
1022 case ConversionSpecifier::kArg:
1023 return true;
1024
1025 default:
1026 return false;
1027 }
1028}
1029
1030bool PrintfSpecifier::hasValidAlternativeForm() const {
1031 if (!HasAlternativeForm)
1032 return true;
1033
1034 // Alternate form flag only valid with the bBoxXaAeEfFgGrRkK conversions
1035 switch (CS.getKind()) {
1036 case ConversionSpecifier::bArg:
1037 case ConversionSpecifier::BArg:
1038 case ConversionSpecifier::oArg:
1039 case ConversionSpecifier::OArg:
1040 case ConversionSpecifier::xArg:
1041 case ConversionSpecifier::XArg:
1042 case ConversionSpecifier::aArg:
1043 case ConversionSpecifier::AArg:
1044 case ConversionSpecifier::eArg:
1045 case ConversionSpecifier::EArg:
1046 case ConversionSpecifier::fArg:
1047 case ConversionSpecifier::FArg:
1048 case ConversionSpecifier::gArg:
1049 case ConversionSpecifier::GArg:
1050 case ConversionSpecifier::FreeBSDrArg:
1051 case ConversionSpecifier::FreeBSDyArg:
1052 case ConversionSpecifier::rArg:
1053 case ConversionSpecifier::RArg:
1054 case ConversionSpecifier::kArg:
1055 case ConversionSpecifier::KArg:
1056 return true;
1057
1058 default:
1059 return false;
1060 }
1061}
1062
1063bool PrintfSpecifier::hasValidLeadingZeros() const {
1064 if (!HasLeadingZeroes)
1065 return true;
1066
1067 // Leading zeroes flag only valid with the bBdiouxXaAeEfFgGrRkK conversions
1068 switch (CS.getKind()) {
1069 case ConversionSpecifier::bArg:
1070 case ConversionSpecifier::BArg:
1071 case ConversionSpecifier::dArg:
1072 case ConversionSpecifier::DArg:
1073 case ConversionSpecifier::iArg:
1074 case ConversionSpecifier::oArg:
1075 case ConversionSpecifier::OArg:
1076 case ConversionSpecifier::uArg:
1077 case ConversionSpecifier::UArg:
1078 case ConversionSpecifier::xArg:
1079 case ConversionSpecifier::XArg:
1080 case ConversionSpecifier::aArg:
1081 case ConversionSpecifier::AArg:
1082 case ConversionSpecifier::eArg:
1083 case ConversionSpecifier::EArg:
1084 case ConversionSpecifier::fArg:
1085 case ConversionSpecifier::FArg:
1086 case ConversionSpecifier::gArg:
1087 case ConversionSpecifier::GArg:
1088 case ConversionSpecifier::FreeBSDrArg:
1089 case ConversionSpecifier::FreeBSDyArg:
1090 case ConversionSpecifier::rArg:
1091 case ConversionSpecifier::RArg:
1092 case ConversionSpecifier::kArg:
1093 case ConversionSpecifier::KArg:
1094 return true;
1095
1096 default:
1097 return false;
1098 }
1099}
1100
1101bool PrintfSpecifier::hasValidSpacePrefix() const {
1102 if (!HasSpacePrefix)
1103 return true;
1104
1105 // The space prefix only makes sense for signed conversions
1106 switch (CS.getKind()) {
1107 case ConversionSpecifier::dArg:
1108 case ConversionSpecifier::DArg:
1109 case ConversionSpecifier::iArg:
1110 case ConversionSpecifier::fArg:
1111 case ConversionSpecifier::FArg:
1112 case ConversionSpecifier::eArg:
1113 case ConversionSpecifier::EArg:
1114 case ConversionSpecifier::gArg:
1115 case ConversionSpecifier::GArg:
1116 case ConversionSpecifier::aArg:
1117 case ConversionSpecifier::AArg:
1118 case ConversionSpecifier::FreeBSDrArg:
1119 case ConversionSpecifier::FreeBSDyArg:
1120 case ConversionSpecifier::rArg:
1121 case ConversionSpecifier::kArg:
1122 return true;
1123
1124 default:
1125 return false;
1126 }
1127}
1128
1129bool PrintfSpecifier::hasValidLeftJustified() const {
1130 if (!IsLeftJustified)
1131 return true;
1132
1133 // The left justified flag is valid for all conversions except n
1134 switch (CS.getKind()) {
1135 case ConversionSpecifier::nArg:
1136 return false;
1137
1138 default:
1139 return true;
1140 }
1141}
1142
1143bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
1144 if (!HasThousandsGrouping)
1145 return true;
1146
1147 switch (CS.getKind()) {
1148 case ConversionSpecifier::dArg:
1149 case ConversionSpecifier::DArg:
1150 case ConversionSpecifier::iArg:
1151 case ConversionSpecifier::uArg:
1152 case ConversionSpecifier::UArg:
1153 case ConversionSpecifier::fArg:
1154 case ConversionSpecifier::FArg:
1155 case ConversionSpecifier::gArg:
1156 case ConversionSpecifier::GArg:
1157 return true;
1158 default:
1159 return false;
1160 }
1161}
1162
1163bool PrintfSpecifier::hasValidPrecision() const {
1164 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
1165 return true;
1166
1167 // Precision is only valid with the bBdiouxXaAeEfFgGsPrRkK conversions
1168 switch (CS.getKind()) {
1169 case ConversionSpecifier::bArg:
1170 case ConversionSpecifier::BArg:
1171 case ConversionSpecifier::dArg:
1172 case ConversionSpecifier::DArg:
1173 case ConversionSpecifier::iArg:
1174 case ConversionSpecifier::oArg:
1175 case ConversionSpecifier::OArg:
1176 case ConversionSpecifier::uArg:
1177 case ConversionSpecifier::UArg:
1178 case ConversionSpecifier::xArg:
1179 case ConversionSpecifier::XArg:
1180 case ConversionSpecifier::aArg:
1181 case ConversionSpecifier::AArg:
1182 case ConversionSpecifier::eArg:
1183 case ConversionSpecifier::EArg:
1184 case ConversionSpecifier::fArg:
1185 case ConversionSpecifier::FArg:
1186 case ConversionSpecifier::gArg:
1187 case ConversionSpecifier::GArg:
1188 case ConversionSpecifier::sArg:
1189 case ConversionSpecifier::FreeBSDrArg:
1190 case ConversionSpecifier::FreeBSDyArg:
1191 case ConversionSpecifier::PArg:
1192 case ConversionSpecifier::rArg:
1193 case ConversionSpecifier::RArg:
1194 case ConversionSpecifier::kArg:
1195 case ConversionSpecifier::KArg:
1196 return true;
1197
1198 default:
1199 return false;
1200 }
1201}
1202bool PrintfSpecifier::hasValidFieldWidth() const {
1203 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
1204 return true;
1205
1206 // The field width is valid for all conversions except n
1207 switch (CS.getKind()) {
1208 case ConversionSpecifier::nArg:
1209 return false;
1210
1211 default:
1212 return true;
1213 }
1214}
1215

source code of clang/lib/AST/PrintfFormatString.cpp