1 | //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Scanf/printf implementation for use in *Sanitizer interceptors. |
10 | // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html |
11 | // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html |
12 | // with a few common GNU extensions. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include <stdarg.h> |
17 | |
18 | static const char *parse_number(const char *p, int *out) { |
19 | *out = internal_atoll(nptr: p); |
20 | while (*p >= '0' && *p <= '9') |
21 | ++p; |
22 | return p; |
23 | } |
24 | |
25 | static const char *maybe_parse_param_index(const char *p, int *out) { |
26 | // n$ |
27 | if (*p >= '0' && *p <= '9') { |
28 | int number; |
29 | const char *q = parse_number(p, out: &number); |
30 | CHECK(q); |
31 | if (*q == '$') { |
32 | *out = number; |
33 | p = q + 1; |
34 | } |
35 | } |
36 | |
37 | // Otherwise, do not change p. This will be re-parsed later as the field |
38 | // width. |
39 | return p; |
40 | } |
41 | |
42 | static bool char_is_one_of(char c, const char *s) { |
43 | return !!internal_strchr(s, c); |
44 | } |
45 | |
46 | static const char *maybe_parse_length_modifier(const char *p, char ll[2]) { |
47 | if (char_is_one_of(c: *p, s: "jztLq" )) { |
48 | ll[0] = *p; |
49 | ++p; |
50 | } else if (*p == 'h') { |
51 | ll[0] = 'h'; |
52 | ++p; |
53 | if (*p == 'h') { |
54 | ll[1] = 'h'; |
55 | ++p; |
56 | } |
57 | } else if (*p == 'l') { |
58 | ll[0] = 'l'; |
59 | ++p; |
60 | if (*p == 'l') { |
61 | ll[1] = 'l'; |
62 | ++p; |
63 | } |
64 | } |
65 | return p; |
66 | } |
67 | |
68 | // Returns true if the character is an integer conversion specifier. |
69 | static bool format_is_integer_conv(char c) { |
70 | return char_is_one_of(c, s: "diouxXn" ); |
71 | } |
72 | |
73 | // Returns true if the character is an floating point conversion specifier. |
74 | static bool format_is_float_conv(char c) { |
75 | return char_is_one_of(c, s: "aAeEfFgG" ); |
76 | } |
77 | |
78 | // Returns string output character size for string-like conversions, |
79 | // or 0 if the conversion is invalid. |
80 | static int format_get_char_size(char convSpecifier, |
81 | const char lengthModifier[2]) { |
82 | if (char_is_one_of(c: convSpecifier, s: "CS" )) { |
83 | return sizeof(wchar_t); |
84 | } |
85 | |
86 | if (char_is_one_of(c: convSpecifier, s: "cs[" )) { |
87 | if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0') |
88 | return sizeof(wchar_t); |
89 | else if (lengthModifier[0] == '\0') |
90 | return sizeof(char); |
91 | } |
92 | |
93 | return 0; |
94 | } |
95 | |
96 | enum FormatStoreSize { |
97 | // Store size not known in advance; can be calculated as wcslen() of the |
98 | // destination buffer. |
99 | FSS_WCSLEN = -2, |
100 | // Store size not known in advance; can be calculated as strlen() of the |
101 | // destination buffer. |
102 | FSS_STRLEN = -1, |
103 | // Invalid conversion specifier. |
104 | FSS_INVALID = 0 |
105 | }; |
106 | |
107 | // Returns the memory size of a format directive (if >0), or a value of |
108 | // FormatStoreSize. |
109 | static int format_get_value_size(char convSpecifier, |
110 | const char lengthModifier[2], |
111 | bool promote_float) { |
112 | if (format_is_integer_conv(c: convSpecifier)) { |
113 | switch (lengthModifier[0]) { |
114 | case 'h': |
115 | return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); |
116 | case 'l': |
117 | return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); |
118 | case 'q': |
119 | return sizeof(long long); |
120 | case 'L': |
121 | return sizeof(long long); |
122 | case 'j': |
123 | return sizeof(INTMAX_T); |
124 | case 'z': |
125 | return sizeof(SIZE_T); |
126 | case 't': |
127 | return sizeof(PTRDIFF_T); |
128 | case 0: |
129 | return sizeof(int); |
130 | default: |
131 | return FSS_INVALID; |
132 | } |
133 | } |
134 | |
135 | if (format_is_float_conv(c: convSpecifier)) { |
136 | switch (lengthModifier[0]) { |
137 | case 'L': |
138 | case 'q': |
139 | return sizeof(long double); |
140 | case 'l': |
141 | return lengthModifier[1] == 'l' ? sizeof(long double) |
142 | : sizeof(double); |
143 | case 0: |
144 | // Printf promotes floats to doubles but scanf does not |
145 | return promote_float ? sizeof(double) : sizeof(float); |
146 | default: |
147 | return FSS_INVALID; |
148 | } |
149 | } |
150 | |
151 | if (convSpecifier == 'p') { |
152 | if (lengthModifier[0] != 0) |
153 | return FSS_INVALID; |
154 | return sizeof(void *); |
155 | } |
156 | |
157 | return FSS_INVALID; |
158 | } |
159 | |
160 | struct ScanfDirective { |
161 | int argIdx; // argument index, or -1 if not specified ("%n$") |
162 | int fieldWidth; |
163 | const char *begin; |
164 | const char *end; |
165 | bool suppressed; // suppress assignment ("*") |
166 | bool allocate; // allocate space ("m") |
167 | char lengthModifier[2]; |
168 | char convSpecifier; |
169 | bool maybeGnuMalloc; |
170 | }; |
171 | |
172 | // Parse scanf format string. If a valid directive in encountered, it is |
173 | // returned in dir. This function returns the pointer to the first |
174 | // unprocessed character, or 0 in case of error. |
175 | // In case of the end-of-string, a pointer to the closing \0 is returned. |
176 | static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, |
177 | ScanfDirective *dir) { |
178 | internal_memset(s: dir, c: 0, n: sizeof(*dir)); |
179 | dir->argIdx = -1; |
180 | |
181 | while (*p) { |
182 | if (*p != '%') { |
183 | ++p; |
184 | continue; |
185 | } |
186 | dir->begin = p; |
187 | ++p; |
188 | // %% |
189 | if (*p == '%') { |
190 | ++p; |
191 | continue; |
192 | } |
193 | if (*p == '\0') { |
194 | return nullptr; |
195 | } |
196 | // %n$ |
197 | p = maybe_parse_param_index(p, out: &dir->argIdx); |
198 | CHECK(p); |
199 | // * |
200 | if (*p == '*') { |
201 | dir->suppressed = true; |
202 | ++p; |
203 | } |
204 | // Field width |
205 | if (*p >= '0' && *p <= '9') { |
206 | p = parse_number(p, out: &dir->fieldWidth); |
207 | CHECK(p); |
208 | if (dir->fieldWidth <= 0) // Width if at all must be non-zero |
209 | return nullptr; |
210 | } |
211 | // m |
212 | if (*p == 'm') { |
213 | dir->allocate = true; |
214 | ++p; |
215 | } |
216 | // Length modifier. |
217 | p = maybe_parse_length_modifier(p, ll: dir->lengthModifier); |
218 | // Conversion specifier. |
219 | dir->convSpecifier = *p++; |
220 | // Consume %[...] expression. |
221 | if (dir->convSpecifier == '[') { |
222 | if (*p == '^') |
223 | ++p; |
224 | if (*p == ']') |
225 | ++p; |
226 | while (*p && *p != ']') |
227 | ++p; |
228 | if (*p == 0) |
229 | return nullptr; // unexpected end of string |
230 | // Consume the closing ']'. |
231 | ++p; |
232 | } |
233 | // This is unfortunately ambiguous between old GNU extension |
234 | // of %as, %aS and %a[...] and newer POSIX %a followed by |
235 | // letters s, S or [. |
236 | if (allowGnuMalloc && dir->convSpecifier == 'a' && |
237 | !dir->lengthModifier[0]) { |
238 | if (*p == 's' || *p == 'S') { |
239 | dir->maybeGnuMalloc = true; |
240 | ++p; |
241 | } else if (*p == '[') { |
242 | // Watch for %a[h-j%d], if % appears in the |
243 | // [...] range, then we need to give up, we don't know |
244 | // if scanf will parse it as POSIX %a [h-j %d ] or |
245 | // GNU allocation of string with range dh-j plus %. |
246 | const char *q = p + 1; |
247 | if (*q == '^') |
248 | ++q; |
249 | if (*q == ']') |
250 | ++q; |
251 | while (*q && *q != ']' && *q != '%') |
252 | ++q; |
253 | if (*q == 0 || *q == '%') |
254 | return nullptr; |
255 | p = q + 1; // Consume the closing ']'. |
256 | dir->maybeGnuMalloc = true; |
257 | } |
258 | } |
259 | dir->end = p; |
260 | break; |
261 | } |
262 | return p; |
263 | } |
264 | |
265 | static int scanf_get_value_size(ScanfDirective *dir) { |
266 | if (dir->allocate) { |
267 | if (!char_is_one_of(c: dir->convSpecifier, s: "cCsS[" )) |
268 | return FSS_INVALID; |
269 | return sizeof(char *); |
270 | } |
271 | |
272 | if (dir->maybeGnuMalloc) { |
273 | if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) |
274 | return FSS_INVALID; |
275 | // This is ambiguous, so check the smaller size of char * (if it is |
276 | // a GNU extension of %as, %aS or %a[...]) and float (if it is |
277 | // POSIX %a followed by s, S or [ letters). |
278 | return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); |
279 | } |
280 | |
281 | if (char_is_one_of(c: dir->convSpecifier, s: "cCsS[" )) { |
282 | bool needsTerminator = char_is_one_of(c: dir->convSpecifier, s: "sS[" ); |
283 | unsigned charSize = |
284 | format_get_char_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier); |
285 | if (charSize == 0) |
286 | return FSS_INVALID; |
287 | if (dir->fieldWidth == 0) { |
288 | if (!needsTerminator) |
289 | return charSize; |
290 | return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; |
291 | } |
292 | return (dir->fieldWidth + needsTerminator) * charSize; |
293 | } |
294 | |
295 | return format_get_value_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier, promote_float: false); |
296 | } |
297 | |
298 | // Common part of *scanf interceptors. |
299 | // Process format string and va_list, and report all store ranges. |
300 | // Stops when "consuming" n_inputs input items. |
301 | static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, |
302 | const char *format, va_list aq) { |
303 | CHECK_GT(n_inputs, 0); |
304 | const char *p = format; |
305 | |
306 | COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); |
307 | |
308 | while (*p) { |
309 | ScanfDirective dir; |
310 | p = scanf_parse_next(p, allowGnuMalloc, dir: &dir); |
311 | if (!p) |
312 | break; |
313 | if (dir.convSpecifier == 0) { |
314 | // This can only happen at the end of the format string. |
315 | CHECK_EQ(*p, 0); |
316 | break; |
317 | } |
318 | // Here the directive is valid. Do what it says. |
319 | if (dir.argIdx != -1) { |
320 | // Unsupported. |
321 | break; |
322 | } |
323 | if (dir.suppressed) |
324 | continue; |
325 | int size = scanf_get_value_size(dir: &dir); |
326 | if (size == FSS_INVALID) { |
327 | Report(format: "%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n" , |
328 | SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin); |
329 | break; |
330 | } |
331 | void *argp = va_arg(aq, void *); |
332 | if (dir.convSpecifier != 'n') |
333 | --n_inputs; |
334 | if (n_inputs < 0) |
335 | break; |
336 | if (size == FSS_STRLEN) { |
337 | size = internal_strlen(s: (const char *)argp) + 1; |
338 | } else if (size == FSS_WCSLEN) { |
339 | // FIXME: actually use wcslen() to calculate it. |
340 | size = 0; |
341 | } |
342 | COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); |
343 | // For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well. |
344 | if (dir.allocate) { |
345 | if (char *buf = *(char **)argp) { |
346 | if (dir.convSpecifier == 'c') |
347 | size = 1; |
348 | else if (dir.convSpecifier == 'C') |
349 | size = sizeof(wchar_t); |
350 | else if (dir.convSpecifier == 'S') |
351 | size = (internal_wcslen(s: (wchar_t *)buf) + 1) * sizeof(wchar_t); |
352 | else // 's' or '[' |
353 | size = internal_strlen(s: buf) + 1; |
354 | COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size); |
355 | } |
356 | } |
357 | } |
358 | } |
359 | |
360 | #if SANITIZER_INTERCEPT_PRINTF |
361 | |
362 | struct PrintfDirective { |
363 | int fieldWidth; |
364 | int fieldPrecision; |
365 | int argIdx; // width argument index, or -1 if not specified ("%*n$") |
366 | int precisionIdx; // precision argument index, or -1 if not specified (".*n$") |
367 | const char *begin; |
368 | const char *end; |
369 | bool starredWidth; |
370 | bool starredPrecision; |
371 | char lengthModifier[2]; |
372 | char convSpecifier; |
373 | }; |
374 | |
375 | static const char *maybe_parse_number(const char *p, int *out) { |
376 | if (*p >= '0' && *p <= '9') |
377 | p = parse_number(p, out); |
378 | return p; |
379 | } |
380 | |
381 | static const char *maybe_parse_number_or_star(const char *p, int *out, |
382 | bool *star) { |
383 | if (*p == '*') { |
384 | *star = true; |
385 | ++p; |
386 | } else { |
387 | *star = false; |
388 | p = maybe_parse_number(p, out); |
389 | } |
390 | return p; |
391 | } |
392 | |
393 | // Parse printf format string. Same as scanf_parse_next. |
394 | static const char *printf_parse_next(const char *p, PrintfDirective *dir) { |
395 | internal_memset(s: dir, c: 0, n: sizeof(*dir)); |
396 | dir->argIdx = -1; |
397 | dir->precisionIdx = -1; |
398 | |
399 | while (*p) { |
400 | if (*p != '%') { |
401 | ++p; |
402 | continue; |
403 | } |
404 | dir->begin = p; |
405 | ++p; |
406 | // %% |
407 | if (*p == '%') { |
408 | ++p; |
409 | continue; |
410 | } |
411 | if (*p == '\0') { |
412 | return nullptr; |
413 | } |
414 | // %n$ |
415 | p = maybe_parse_param_index(p, out: &dir->precisionIdx); |
416 | CHECK(p); |
417 | // Flags |
418 | while (char_is_one_of(c: *p, s: "'-+ #0" )) { |
419 | ++p; |
420 | } |
421 | // Field width |
422 | p = maybe_parse_number_or_star(p, out: &dir->fieldWidth, |
423 | star: &dir->starredWidth); |
424 | if (!p) |
425 | return nullptr; |
426 | // Precision |
427 | if (*p == '.') { |
428 | ++p; |
429 | // Actual precision is optional (surprise!) |
430 | p = maybe_parse_number_or_star(p, out: &dir->fieldPrecision, |
431 | star: &dir->starredPrecision); |
432 | if (!p) |
433 | return nullptr; |
434 | // m$ |
435 | if (dir->starredPrecision) { |
436 | p = maybe_parse_param_index(p, out: &dir->precisionIdx); |
437 | CHECK(p); |
438 | } |
439 | } |
440 | // Length modifier. |
441 | p = maybe_parse_length_modifier(p, ll: dir->lengthModifier); |
442 | // Conversion specifier. |
443 | dir->convSpecifier = *p++; |
444 | dir->end = p; |
445 | break; |
446 | } |
447 | return p; |
448 | } |
449 | |
450 | static int printf_get_value_size(PrintfDirective *dir) { |
451 | if (char_is_one_of(c: dir->convSpecifier, s: "cCsS" )) { |
452 | unsigned charSize = |
453 | format_get_char_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier); |
454 | if (charSize == 0) |
455 | return FSS_INVALID; |
456 | if (char_is_one_of(c: dir->convSpecifier, s: "sS" )) { |
457 | return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; |
458 | } |
459 | return charSize; |
460 | } |
461 | |
462 | return format_get_value_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier, promote_float: true); |
463 | } |
464 | |
465 | #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \ |
466 | do { \ |
467 | if (format_is_float_conv(convSpecifier)) { \ |
468 | switch (size) { \ |
469 | case 8: \ |
470 | va_arg(*aq, double); \ |
471 | break; \ |
472 | case 12: \ |
473 | va_arg(*aq, long double); \ |
474 | break; \ |
475 | case 16: \ |
476 | va_arg(*aq, long double); \ |
477 | break; \ |
478 | default: \ |
479 | Report("WARNING: unexpected floating-point arg size" \ |
480 | " in printf interceptor: %zu\n", static_cast<uptr>(size)); \ |
481 | return; \ |
482 | } \ |
483 | } else { \ |
484 | switch (size) { \ |
485 | case 1: \ |
486 | case 2: \ |
487 | case 4: \ |
488 | va_arg(*aq, u32); \ |
489 | break; \ |
490 | case 8: \ |
491 | va_arg(*aq, u64); \ |
492 | break; \ |
493 | default: \ |
494 | Report("WARNING: unexpected arg size" \ |
495 | " in printf interceptor: %zu\n", static_cast<uptr>(size)); \ |
496 | return; \ |
497 | } \ |
498 | } \ |
499 | } while (0) |
500 | |
501 | // Common part of *printf interceptors. |
502 | // Process format string and va_list, and report all load ranges. |
503 | static void printf_common(void *ctx, const char *format, va_list aq) { |
504 | COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); |
505 | |
506 | const char *p = format; |
507 | |
508 | while (*p) { |
509 | PrintfDirective dir; |
510 | p = printf_parse_next(p, dir: &dir); |
511 | if (!p) |
512 | break; |
513 | if (dir.convSpecifier == 0) { |
514 | // This can only happen at the end of the format string. |
515 | CHECK_EQ(*p, 0); |
516 | break; |
517 | } |
518 | // Here the directive is valid. Do what it says. |
519 | if (dir.argIdx != -1 || dir.precisionIdx != -1) { |
520 | // Unsupported. |
521 | break; |
522 | } |
523 | if (dir.starredWidth) { |
524 | // Dynamic width |
525 | SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); |
526 | } |
527 | if (dir.starredPrecision) { |
528 | // Dynamic precision |
529 | SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); |
530 | } |
531 | // %m does not require an argument: strlen(errno). |
532 | if (dir.convSpecifier == 'm') |
533 | continue; |
534 | int size = printf_get_value_size(dir: &dir); |
535 | if (size == FSS_INVALID) { |
536 | static int ReportedOnce; |
537 | if (!ReportedOnce++) |
538 | Report( |
539 | format: "%s: WARNING: unexpected format specifier in printf " |
540 | "interceptor: %.*s (reported once per process)\n" , |
541 | SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin); |
542 | break; |
543 | } |
544 | if (dir.convSpecifier == 'n') { |
545 | void *argp = va_arg(aq, void *); |
546 | COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); |
547 | continue; |
548 | } else if (size == FSS_STRLEN) { |
549 | if (void *argp = va_arg(aq, void *)) { |
550 | uptr len; |
551 | if (dir.starredPrecision) { |
552 | // FIXME: properly support starred precision for strings. |
553 | len = 0; |
554 | } else if (dir.fieldPrecision > 0) { |
555 | // Won't read more than "precision" symbols. |
556 | len = internal_strnlen(s: (const char *)argp, maxlen: dir.fieldPrecision); |
557 | if (len < (uptr)dir.fieldPrecision) |
558 | len++; |
559 | } else { |
560 | // Whole string will be accessed. |
561 | len = internal_strlen(s: (const char *)argp) + 1; |
562 | } |
563 | COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, len); |
564 | } |
565 | } else if (size == FSS_WCSLEN) { |
566 | if (void *argp = va_arg(aq, void *)) { |
567 | // FIXME: Properly support wide-character strings (via wcsrtombs). |
568 | COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, 0); |
569 | } |
570 | } else { |
571 | // Skip non-pointer args |
572 | SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size); |
573 | } |
574 | } |
575 | } |
576 | |
577 | #endif // SANITIZER_INTERCEPT_PRINTF |
578 | |