blob: 92318cda35fde3f9b09f2070507064ed2d7d8e8e [file] [log] [blame]
Stephen Hines2d1fdb22014-05-28 23:58:16 -07001//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Scanf/printf implementation for use in *Sanitizer interceptors.
11// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
12// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
13// with a few common GNU extensions.
14//
15//===----------------------------------------------------------------------===//
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -080016
Stephen Hines2d1fdb22014-05-28 23:58:16 -070017#include <stdarg.h>
18
19static const char *parse_number(const char *p, int *out) {
20 *out = internal_atoll(p);
21 while (*p >= '0' && *p <= '9')
22 ++p;
23 return p;
24}
25
26static const char *maybe_parse_param_index(const char *p, int *out) {
27 // n$
28 if (*p >= '0' && *p <= '9') {
29 int number;
30 const char *q = parse_number(p, &number);
31 CHECK(q);
32 if (*q == '$') {
33 *out = number;
34 p = q + 1;
35 }
36 }
37
38 // Otherwise, do not change p. This will be re-parsed later as the field
39 // width.
40 return p;
41}
42
43static bool char_is_one_of(char c, const char *s) {
44 return !!internal_strchr(s, c);
45}
46
47static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
48 if (char_is_one_of(*p, "jztLq")) {
49 ll[0] = *p;
50 ++p;
51 } else if (*p == 'h') {
52 ll[0] = 'h';
53 ++p;
54 if (*p == 'h') {
55 ll[1] = 'h';
56 ++p;
57 }
58 } else if (*p == 'l') {
59 ll[0] = 'l';
60 ++p;
61 if (*p == 'l') {
62 ll[1] = 'l';
63 ++p;
64 }
65 }
66 return p;
67}
68
69// Returns true if the character is an integer conversion specifier.
70static bool format_is_integer_conv(char c) {
71 return char_is_one_of(c, "diouxXn");
72}
73
74// Returns true if the character is an floating point conversion specifier.
75static bool format_is_float_conv(char c) {
76 return char_is_one_of(c, "aAeEfFgG");
77}
78
79// Returns string output character size for string-like conversions,
80// or 0 if the conversion is invalid.
81static int format_get_char_size(char convSpecifier,
82 const char lengthModifier[2]) {
83 if (char_is_one_of(convSpecifier, "CS")) {
84 return sizeof(wchar_t);
85 }
86
87 if (char_is_one_of(convSpecifier, "cs[")) {
88 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
89 return sizeof(wchar_t);
90 else if (lengthModifier[0] == '\0')
91 return sizeof(char);
92 }
93
94 return 0;
95}
96
97enum FormatStoreSize {
98 // Store size not known in advance; can be calculated as wcslen() of the
99 // destination buffer.
100 FSS_WCSLEN = -2,
101 // Store size not known in advance; can be calculated as strlen() of the
102 // destination buffer.
103 FSS_STRLEN = -1,
104 // Invalid conversion specifier.
105 FSS_INVALID = 0
106};
107
108// Returns the memory size of a format directive (if >0), or a value of
109// FormatStoreSize.
110static int format_get_value_size(char convSpecifier,
111 const char lengthModifier[2],
112 bool promote_float) {
113 if (format_is_integer_conv(convSpecifier)) {
114 switch (lengthModifier[0]) {
115 case 'h':
116 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
117 case 'l':
118 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
119 case 'q':
120 return sizeof(long long);
121 case 'L':
122 return sizeof(long long);
123 case 'j':
124 return sizeof(INTMAX_T);
125 case 'z':
126 return sizeof(SIZE_T);
127 case 't':
128 return sizeof(PTRDIFF_T);
129 case 0:
130 return sizeof(int);
131 default:
132 return FSS_INVALID;
133 }
134 }
135
136 if (format_is_float_conv(convSpecifier)) {
137 switch (lengthModifier[0]) {
138 case 'L':
139 case 'q':
140 return sizeof(long double);
141 case 'l':
142 return lengthModifier[1] == 'l' ? sizeof(long double)
143 : sizeof(double);
144 case 0:
145 // Printf promotes floats to doubles but scanf does not
146 return promote_float ? sizeof(double) : sizeof(float);
147 default:
148 return FSS_INVALID;
149 }
150 }
151
152 if (convSpecifier == 'p') {
153 if (lengthModifier[0] != 0)
154 return FSS_INVALID;
155 return sizeof(void *);
156 }
157
158 return FSS_INVALID;
159}
160
161struct ScanfDirective {
162 int argIdx; // argument index, or -1 if not specified ("%n$")
163 int fieldWidth;
164 const char *begin;
165 const char *end;
166 bool suppressed; // suppress assignment ("*")
167 bool allocate; // allocate space ("m")
168 char lengthModifier[2];
169 char convSpecifier;
170 bool maybeGnuMalloc;
171};
172
173// Parse scanf format string. If a valid directive in encountered, it is
174// returned in dir. This function returns the pointer to the first
175// unprocessed character, or 0 in case of error.
176// In case of the end-of-string, a pointer to the closing \0 is returned.
177static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
178 ScanfDirective *dir) {
179 internal_memset(dir, 0, sizeof(*dir));
180 dir->argIdx = -1;
181
182 while (*p) {
183 if (*p != '%') {
184 ++p;
185 continue;
186 }
187 dir->begin = p;
188 ++p;
189 // %%
190 if (*p == '%') {
191 ++p;
192 continue;
193 }
194 if (*p == '\0') {
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800195 return nullptr;
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700196 }
197 // %n$
198 p = maybe_parse_param_index(p, &dir->argIdx);
199 CHECK(p);
200 // *
201 if (*p == '*') {
202 dir->suppressed = true;
203 ++p;
204 }
205 // Field width
206 if (*p >= '0' && *p <= '9') {
207 p = parse_number(p, &dir->fieldWidth);
208 CHECK(p);
209 if (dir->fieldWidth <= 0) // Width if at all must be non-zero
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800210 return nullptr;
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700211 }
212 // m
213 if (*p == 'm') {
214 dir->allocate = true;
215 ++p;
216 }
217 // Length modifier.
218 p = maybe_parse_length_modifier(p, dir->lengthModifier);
219 // Conversion specifier.
220 dir->convSpecifier = *p++;
221 // Consume %[...] expression.
222 if (dir->convSpecifier == '[') {
223 if (*p == '^')
224 ++p;
225 if (*p == ']')
226 ++p;
227 while (*p && *p != ']')
228 ++p;
229 if (*p == 0)
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800230 return nullptr; // unexpected end of string
231 // Consume the closing ']'.
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700232 ++p;
233 }
234 // This is unfortunately ambiguous between old GNU extension
235 // of %as, %aS and %a[...] and newer POSIX %a followed by
236 // letters s, S or [.
237 if (allowGnuMalloc && dir->convSpecifier == 'a' &&
238 !dir->lengthModifier[0]) {
239 if (*p == 's' || *p == 'S') {
240 dir->maybeGnuMalloc = true;
241 ++p;
242 } else if (*p == '[') {
243 // Watch for %a[h-j%d], if % appears in the
244 // [...] range, then we need to give up, we don't know
245 // if scanf will parse it as POSIX %a [h-j %d ] or
246 // GNU allocation of string with range dh-j plus %.
247 const char *q = p + 1;
248 if (*q == '^')
249 ++q;
250 if (*q == ']')
251 ++q;
252 while (*q && *q != ']' && *q != '%')
253 ++q;
254 if (*q == 0 || *q == '%')
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800255 return nullptr;
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700256 p = q + 1; // Consume the closing ']'.
257 dir->maybeGnuMalloc = true;
258 }
259 }
260 dir->end = p;
261 break;
262 }
263 return p;
264}
265
266static int scanf_get_value_size(ScanfDirective *dir) {
267 if (dir->allocate) {
268 if (!char_is_one_of(dir->convSpecifier, "cCsS["))
269 return FSS_INVALID;
270 return sizeof(char *);
271 }
272
273 if (dir->maybeGnuMalloc) {
274 if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
275 return FSS_INVALID;
276 // This is ambiguous, so check the smaller size of char * (if it is
277 // a GNU extension of %as, %aS or %a[...]) and float (if it is
278 // POSIX %a followed by s, S or [ letters).
279 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
280 }
281
282 if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
283 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
284 unsigned charSize =
285 format_get_char_size(dir->convSpecifier, dir->lengthModifier);
286 if (charSize == 0)
287 return FSS_INVALID;
288 if (dir->fieldWidth == 0) {
289 if (!needsTerminator)
290 return charSize;
291 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
292 }
293 return (dir->fieldWidth + needsTerminator) * charSize;
294 }
295
296 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
297}
298
299// Common part of *scanf interceptors.
300// Process format string and va_list, and report all store ranges.
301// Stops when "consuming" n_inputs input items.
302static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
303 const char *format, va_list aq) {
304 CHECK_GT(n_inputs, 0);
305 const char *p = format;
306
307 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
308
309 while (*p) {
310 ScanfDirective dir;
311 p = scanf_parse_next(p, allowGnuMalloc, &dir);
312 if (!p)
313 break;
314 if (dir.convSpecifier == 0) {
315 // This can only happen at the end of the format string.
316 CHECK_EQ(*p, 0);
317 break;
318 }
319 // Here the directive is valid. Do what it says.
320 if (dir.argIdx != -1) {
321 // Unsupported.
322 break;
323 }
324 if (dir.suppressed)
325 continue;
326 int size = scanf_get_value_size(&dir);
327 if (size == FSS_INVALID) {
328 Report("WARNING: unexpected format specifier in scanf interceptor: "
329 "%.*s\n", dir.end - dir.begin, dir.begin);
330 break;
331 }
332 void *argp = va_arg(aq, void *);
333 if (dir.convSpecifier != 'n')
334 --n_inputs;
335 if (n_inputs < 0)
336 break;
337 if (size == FSS_STRLEN) {
338 size = internal_strlen((const char *)argp) + 1;
339 } else if (size == FSS_WCSLEN) {
340 // FIXME: actually use wcslen() to calculate it.
341 size = 0;
342 }
343 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
344 }
345}
346
347#if SANITIZER_INTERCEPT_PRINTF
348
349struct PrintfDirective {
350 int fieldWidth;
351 int fieldPrecision;
352 int argIdx; // width argument index, or -1 if not specified ("%*n$")
353 int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
354 const char *begin;
355 const char *end;
356 bool starredWidth;
357 bool starredPrecision;
358 char lengthModifier[2];
359 char convSpecifier;
360};
361
362static const char *maybe_parse_number(const char *p, int *out) {
363 if (*p >= '0' && *p <= '9')
364 p = parse_number(p, out);
365 return p;
366}
367
368static const char *maybe_parse_number_or_star(const char *p, int *out,
369 bool *star) {
370 if (*p == '*') {
371 *star = true;
372 ++p;
373 } else {
374 *star = false;
375 p = maybe_parse_number(p, out);
376 }
377 return p;
378}
379
380// Parse printf format string. Same as scanf_parse_next.
381static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
382 internal_memset(dir, 0, sizeof(*dir));
383 dir->argIdx = -1;
384 dir->precisionIdx = -1;
385
386 while (*p) {
387 if (*p != '%') {
388 ++p;
389 continue;
390 }
391 dir->begin = p;
392 ++p;
393 // %%
394 if (*p == '%') {
395 ++p;
396 continue;
397 }
398 if (*p == '\0') {
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800399 return nullptr;
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700400 }
401 // %n$
402 p = maybe_parse_param_index(p, &dir->precisionIdx);
403 CHECK(p);
404 // Flags
405 while (char_is_one_of(*p, "'-+ #0")) {
406 ++p;
407 }
408 // Field width
409 p = maybe_parse_number_or_star(p, &dir->fieldWidth,
410 &dir->starredWidth);
411 if (!p)
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800412 return nullptr;
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700413 // Precision
414 if (*p == '.') {
415 ++p;
416 // Actual precision is optional (surprise!)
417 p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
418 &dir->starredPrecision);
419 if (!p)
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800420 return nullptr;
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700421 // m$
422 if (dir->starredPrecision) {
423 p = maybe_parse_param_index(p, &dir->precisionIdx);
424 CHECK(p);
425 }
426 }
427 // Length modifier.
428 p = maybe_parse_length_modifier(p, dir->lengthModifier);
429 // Conversion specifier.
430 dir->convSpecifier = *p++;
431 dir->end = p;
432 break;
433 }
434 return p;
435}
436
437static int printf_get_value_size(PrintfDirective *dir) {
438 if (dir->convSpecifier == 'm') {
439 return sizeof(char *);
440 }
441
442 if (char_is_one_of(dir->convSpecifier, "cCsS")) {
443 unsigned charSize =
444 format_get_char_size(dir->convSpecifier, dir->lengthModifier);
445 if (charSize == 0)
446 return FSS_INVALID;
447 if (char_is_one_of(dir->convSpecifier, "sS")) {
448 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
449 }
450 return charSize;
451 }
452
453 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
454}
455
456#define SKIP_SCALAR_ARG(aq, convSpecifier, size) \
457 do { \
458 if (format_is_float_conv(convSpecifier)) { \
459 switch (size) { \
460 case 8: \
461 va_arg(*aq, double); \
462 break; \
463 case 12: \
464 va_arg(*aq, long double); \
465 break; \
466 case 16: \
467 va_arg(*aq, long double); \
468 break; \
469 default: \
470 Report("WARNING: unexpected floating-point arg size" \
471 " in printf interceptor: %d\n", size); \
472 return; \
473 } \
474 } else { \
475 switch (size) { \
476 case 1: \
477 case 2: \
478 case 4: \
479 va_arg(*aq, u32); \
480 break; \
481 case 8: \
482 va_arg(*aq, u64); \
483 break; \
484 default: \
485 Report("WARNING: unexpected arg size" \
486 " in printf interceptor: %d\n", size); \
487 return; \
488 } \
489 } \
490 } while (0)
491
492// Common part of *printf interceptors.
493// Process format string and va_list, and report all load ranges.
494static void printf_common(void *ctx, const char *format, va_list aq) {
495 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
496
497 const char *p = format;
498
499 while (*p) {
500 PrintfDirective dir;
501 p = printf_parse_next(p, &dir);
502 if (!p)
503 break;
504 if (dir.convSpecifier == 0) {
505 // This can only happen at the end of the format string.
506 CHECK_EQ(*p, 0);
507 break;
508 }
509 // Here the directive is valid. Do what it says.
510 if (dir.argIdx != -1 || dir.precisionIdx != -1) {
511 // Unsupported.
512 break;
513 }
514 if (dir.starredWidth) {
515 // Dynamic width
516 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
517 }
518 if (dir.starredPrecision) {
519 // Dynamic precision
520 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
521 }
522 int size = printf_get_value_size(&dir);
523 if (size == FSS_INVALID) {
524 Report("WARNING: unexpected format specifier in printf "
525 "interceptor: %.*s\n", dir.end - dir.begin, dir.begin);
526 break;
527 }
528 if (dir.convSpecifier == 'n') {
529 void *argp = va_arg(aq, void *);
530 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
531 continue;
532 } else if (size == FSS_STRLEN) {
533 if (void *argp = va_arg(aq, void *)) {
534 if (dir.starredPrecision) {
535 // FIXME: properly support starred precision for strings.
536 size = 0;
537 } else if (dir.fieldPrecision > 0) {
538 // Won't read more than "precision" symbols.
539 size = internal_strnlen((const char *)argp, dir.fieldPrecision);
540 if (size < dir.fieldPrecision) size++;
541 } else {
542 // Whole string will be accessed.
543 size = internal_strlen((const char *)argp) + 1;
544 }
545 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
546 }
547 } else if (size == FSS_WCSLEN) {
548 if (void *argp = va_arg(aq, void *)) {
549 // FIXME: Properly support wide-character strings (via wcsrtombs).
550 size = 0;
551 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
552 }
553 } else {
554 // Skip non-pointer args
555 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
556 }
557 }
558}
559
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800560#endif // SANITIZER_INTERCEPT_PRINTF