blob: 066d5d6fa13e62dd158cf68c62b0ab3b61973837 [file] [log] [blame]
Ted Kremenekd9c904d2010-07-16 02:11:31 +00001//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends. The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
Ted Kremenek1e51c202010-07-20 20:04:47 +000022using clang::analyze_format_string::ConversionSpecifier;
Hans Wennborg6fcd9322011-12-10 13:20:11 +000023using clang::analyze_scanf::ScanfArgTypeResult;
Ted Kremenek6ecb9502010-07-20 20:04:27 +000024using clang::analyze_scanf::ScanfConversionSpecifier;
Ted Kremenekd9c904d2010-07-16 02:11:31 +000025using clang::analyze_scanf::ScanfSpecifier;
Dan Gohman3c46e8d2010-07-26 21:25:24 +000026using clang::UpdateOnReturn;
Hans Wennborg6fcd9322011-12-10 13:20:11 +000027using namespace clang;
Ted Kremenekd9c904d2010-07-16 02:11:31 +000028
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30 ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
Ted Kremenek6ecb9502010-07-20 20:04:27 +000033 ScanfConversionSpecifier &CS,
Ted Kremenekd9c904d2010-07-16 02:11:31 +000034 const char *&Beg, const char *E) {
35 const char *I = Beg;
36 const char *start = I - 1;
37 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39 // No more characters?
40 if (I == E) {
41 H.HandleIncompleteScanList(start, I);
42 return true;
43 }
44
45 // Special case: ']' is the first character.
46 if (*I == ']') {
47 if (++I == E) {
Ted Kremenekb7c21012010-07-16 18:28:03 +000048 H.HandleIncompleteScanList(start, I - 1);
Ted Kremenekd9c904d2010-07-16 02:11:31 +000049 return true;
50 }
51 }
52
53 // Look for a ']' character which denotes the end of the scan list.
54 while (*I != ']') {
55 if (++I == E) {
Ted Kremenekb7c21012010-07-16 18:28:03 +000056 H.HandleIncompleteScanList(start, I - 1);
Ted Kremenekd9c904d2010-07-16 02:11:31 +000057 return true;
58 }
59 }
60
61 CS.setEndScanList(I);
62 return false;
63}
64
65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66// We can possibly refactor.
67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68 const char *&Beg,
69 const char *E,
Hans Wennborgd02deeb2011-12-15 10:25:47 +000070 unsigned &argIndex,
71 const LangOptions &LO) {
Ted Kremenekd9c904d2010-07-16 02:11:31 +000072
73 using namespace clang::analyze_scanf;
74 const char *I = Beg;
75 const char *Start = 0;
76 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
77
78 // Look for a '%' character that indicates the start of a format specifier.
79 for ( ; I != E ; ++I) {
80 char c = *I;
81 if (c == '\0') {
82 // Detect spurious null characters, which are likely errors.
83 H.HandleNullChar(I);
84 return true;
85 }
86 if (c == '%') {
87 Start = I++; // Record the start of the format specifier.
88 break;
89 }
90 }
91
92 // No format specifier found?
93 if (!Start)
94 return false;
95
96 if (I == E) {
97 // No more characters left?
98 H.HandleIncompleteSpecifier(Start, E - Start);
99 return true;
100 }
101
102 ScanfSpecifier FS;
103 if (ParseArgPosition(H, FS, Start, I, E))
104 return true;
105
106 if (I == E) {
107 // No more characters left?
108 H.HandleIncompleteSpecifier(Start, E - Start);
109 return true;
110 }
111
112 // Look for '*' flag if it is present.
113 if (*I == '*') {
114 FS.setSuppressAssignment(I);
115 if (++I == E) {
116 H.HandleIncompleteSpecifier(Start, E - Start);
117 return true;
118 }
119 }
120
121 // Look for the field width (if any). Unlike printf, this is either
122 // a fixed integer or isn't present.
123 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
124 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
125 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
126 FS.setFieldWidth(Amt);
127
128 if (I == E) {
129 // No more characters left?
130 H.HandleIncompleteSpecifier(Start, E - Start);
131 return true;
132 }
133 }
134
135 // Look for the length modifier.
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000136 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000137 // No more characters left?
138 H.HandleIncompleteSpecifier(Start, E - Start);
139 return true;
140 }
141
142 // Detect spurious null characters, which are likely errors.
143 if (*I == '\0') {
144 H.HandleNullChar(I);
145 return true;
146 }
147
148 // Finally, look for the conversion specifier.
149 const char *conversionPosition = I++;
Ted Kremenek6ecb9502010-07-20 20:04:27 +0000150 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000151 switch (*conversionPosition) {
152 default:
153 break;
Ted Kremenek1e51c202010-07-20 20:04:47 +0000154 case '%': k = ConversionSpecifier::PercentArg; break;
155 case 'A': k = ConversionSpecifier::AArg; break;
156 case 'E': k = ConversionSpecifier::EArg; break;
157 case 'F': k = ConversionSpecifier::FArg; break;
158 case 'G': k = ConversionSpecifier::GArg; break;
159 case 'X': k = ConversionSpecifier::XArg; break;
160 case 'a': k = ConversionSpecifier::aArg; break;
161 case 'd': k = ConversionSpecifier::dArg; break;
162 case 'e': k = ConversionSpecifier::eArg; break;
163 case 'f': k = ConversionSpecifier::fArg; break;
164 case 'g': k = ConversionSpecifier::gArg; break;
165 case 'i': k = ConversionSpecifier::iArg; break;
166 case 'n': k = ConversionSpecifier::nArg; break;
167 case 'c': k = ConversionSpecifier::cArg; break;
168 case 'C': k = ConversionSpecifier::CArg; break;
169 case 'S': k = ConversionSpecifier::SArg; break;
170 case '[': k = ConversionSpecifier::ScanListArg; break;
171 case 'u': k = ConversionSpecifier::uArg; break;
172 case 'x': k = ConversionSpecifier::xArg; break;
173 case 'o': k = ConversionSpecifier::oArg; break;
174 case 's': k = ConversionSpecifier::sArg; break;
175 case 'p': k = ConversionSpecifier::pArg; break;
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000176 }
Ted Kremenek6ecb9502010-07-20 20:04:27 +0000177 ScanfConversionSpecifier CS(conversionPosition, k);
178 if (k == ScanfConversionSpecifier::ScanListArg) {
Hans Wennborg6de0b482012-01-12 14:44:54 +0000179 if (ParseScanList(H, CS, I, E))
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000180 return true;
181 }
182 FS.setConversionSpecifier(CS);
183 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
184 && !FS.usesPositionalArg())
185 FS.setArgIndex(argIndex++);
186
187 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
188 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
189
Ted Kremenek6ecb9502010-07-20 20:04:27 +0000190 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000191 // Assume the conversion takes one argument.
192 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
193 }
194 return ScanfSpecifierResult(Start, FS);
195}
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000196
197ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
198 const ScanfConversionSpecifier &CS = getConversionSpecifier();
199
200 if (!CS.consumesDataArgument())
201 return ScanfArgTypeResult::Invalid();
202
203 switch(CS.getKind()) {
204 // Signed int.
205 case ConversionSpecifier::dArg:
206 case ConversionSpecifier::iArg:
207 switch (LM.getKind()) {
208 case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
209 case LengthModifier::AsChar:
210 return ArgTypeResult(ArgTypeResult::AnyCharTy);
211 case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
212 case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
Hans Wennborg32addd52012-02-16 16:34:54 +0000213 case LengthModifier::AsLongLong:
214 case LengthModifier::AsQuad:
215 return ArgTypeResult(Ctx.LongLongTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000216 case LengthModifier::AsIntMax:
217 return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
218 case LengthModifier::AsSizeT:
219 // FIXME: ssize_t.
220 return ScanfArgTypeResult();
221 case LengthModifier::AsPtrDiff:
222 return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
Ted Kremenek9d24c2c2012-01-24 21:29:54 +0000223 case LengthModifier::AsLongDouble:
224 // GNU extension.
225 return ArgTypeResult(Ctx.LongLongTy);
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000226 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
Hans Wennborg37969b72012-01-12 17:11:12 +0000227 case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000228 }
229
230 // Unsigned int.
231 case ConversionSpecifier::oArg:
232 case ConversionSpecifier::uArg:
233 case ConversionSpecifier::xArg:
234 case ConversionSpecifier::XArg:
235 switch (LM.getKind()) {
236 case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
237 case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
238 case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
239 case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
240 case LengthModifier::AsLongLong:
Hans Wennborg32addd52012-02-16 16:34:54 +0000241 case LengthModifier::AsQuad:
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000242 return ArgTypeResult(Ctx.UnsignedLongLongTy);
243 case LengthModifier::AsIntMax:
244 return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
245 case LengthModifier::AsSizeT:
246 return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
247 case LengthModifier::AsPtrDiff:
248 // FIXME: Unsigned version of ptrdiff_t?
249 return ScanfArgTypeResult();
Ted Kremenek9d24c2c2012-01-24 21:29:54 +0000250 case LengthModifier::AsLongDouble:
251 // GNU extension.
252 return ArgTypeResult(Ctx.UnsignedLongLongTy);
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000253 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
Hans Wennborg37969b72012-01-12 17:11:12 +0000254 case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000255 }
256
257 // Float.
258 case ConversionSpecifier::aArg:
259 case ConversionSpecifier::AArg:
260 case ConversionSpecifier::eArg:
261 case ConversionSpecifier::EArg:
262 case ConversionSpecifier::fArg:
263 case ConversionSpecifier::FArg:
264 case ConversionSpecifier::gArg:
265 case ConversionSpecifier::GArg:
266 switch (LM.getKind()) {
267 case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
268 case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
269 case LengthModifier::AsLongDouble:
270 return ArgTypeResult(Ctx.LongDoubleTy);
271 default:
272 return ScanfArgTypeResult::Invalid();
273 }
274
275 // Char, string and scanlist.
276 case ConversionSpecifier::cArg:
277 case ConversionSpecifier::sArg:
278 case ConversionSpecifier::ScanListArg:
279 switch (LM.getKind()) {
280 case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
281 case LengthModifier::AsLong:
282 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
Hans Wennborg37969b72012-01-12 17:11:12 +0000283 case LengthModifier::AsAllocate:
284 case LengthModifier::AsMAllocate:
285 return ScanfArgTypeResult(ArgTypeResult::CStrTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000286 default:
287 return ScanfArgTypeResult::Invalid();
288 }
289 case ConversionSpecifier::CArg:
290 case ConversionSpecifier::SArg:
291 // FIXME: Mac OS X specific?
Hans Wennborg37969b72012-01-12 17:11:12 +0000292 switch (LM.getKind()) {
293 case LengthModifier::None:
294 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
295 case LengthModifier::AsAllocate:
296 case LengthModifier::AsMAllocate:
297 return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **");
298 default:
299 return ScanfArgTypeResult::Invalid();
300 }
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000301
302 // Pointer.
303 case ConversionSpecifier::pArg:
304 return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
305
306 default:
307 break;
308 }
309
310 return ScanfArgTypeResult();
311}
312
Hans Wennborgbe6126a2012-02-15 09:59:46 +0000313bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
314 ASTContext &Ctx) {
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000315 if (!QT->isPointerType())
316 return false;
317
318 QualType PT = QT->getPointeeType();
Jordan Rose033a9c02012-06-04 22:49:02 +0000319
320 // If it's an enum, get its underlying type.
321 if (const EnumType *ETy = QT->getAs<EnumType>())
322 QT = ETy->getDecl()->getIntegerType();
323
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000324 const BuiltinType *BT = PT->getAs<BuiltinType>();
325 if (!BT)
326 return false;
327
328 // Pointer to a character.
329 if (PT->isAnyCharacterType()) {
330 CS.setKind(ConversionSpecifier::sArg);
331 if (PT->isWideCharType())
332 LM.setKind(LengthModifier::AsWideChar);
333 else
334 LM.setKind(LengthModifier::None);
335 return true;
336 }
337
338 // Figure out the length modifier.
339 switch (BT->getKind()) {
340 // no modifier
341 case BuiltinType::UInt:
342 case BuiltinType::Int:
343 case BuiltinType::Float:
344 LM.setKind(LengthModifier::None);
345 break;
346
347 // hh
348 case BuiltinType::Char_U:
349 case BuiltinType::UChar:
350 case BuiltinType::Char_S:
351 case BuiltinType::SChar:
352 LM.setKind(LengthModifier::AsChar);
353 break;
354
355 // h
356 case BuiltinType::Short:
357 case BuiltinType::UShort:
358 LM.setKind(LengthModifier::AsShort);
359 break;
360
361 // l
362 case BuiltinType::Long:
363 case BuiltinType::ULong:
364 case BuiltinType::Double:
365 LM.setKind(LengthModifier::AsLong);
366 break;
367
368 // ll
369 case BuiltinType::LongLong:
370 case BuiltinType::ULongLong:
371 LM.setKind(LengthModifier::AsLongLong);
372 break;
373
374 // L
375 case BuiltinType::LongDouble:
376 LM.setKind(LengthModifier::AsLongDouble);
377 break;
378
379 // Don't know.
380 default:
381 return false;
382 }
383
384 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
385 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
386 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
387 if (Identifier->getName() == "size_t") {
388 LM.setKind(LengthModifier::AsSizeT);
389 } else if (Identifier->getName() == "ssize_t") {
390 // Not C99, but common in Unix.
391 LM.setKind(LengthModifier::AsSizeT);
392 } else if (Identifier->getName() == "intmax_t") {
393 LM.setKind(LengthModifier::AsIntMax);
394 } else if (Identifier->getName() == "uintmax_t") {
395 LM.setKind(LengthModifier::AsIntMax);
396 } else if (Identifier->getName() == "ptrdiff_t") {
397 LM.setKind(LengthModifier::AsPtrDiff);
398 }
399 }
400
Hans Wennborgbe6126a2012-02-15 09:59:46 +0000401 // If fixing the length modifier was enough, we are done.
402 const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx);
403 if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT))
404 return true;
405
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000406 // Figure out the conversion specifier.
407 if (PT->isRealFloatingType())
408 CS.setKind(ConversionSpecifier::fArg);
409 else if (PT->isSignedIntegerType())
410 CS.setKind(ConversionSpecifier::dArg);
Hans Wennborgbe6126a2012-02-15 09:59:46 +0000411 else if (PT->isUnsignedIntegerType())
412 CS.setKind(ConversionSpecifier::uArg);
413 else
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000414 llvm_unreachable("Unexpected type");
415
416 return true;
417}
418
419void ScanfSpecifier::toString(raw_ostream &os) const {
420 os << "%";
421
422 if (usesPositionalArg())
423 os << getPositionalArgIndex() << "$";
424 if (SuppressAssignment)
425 os << "*";
426
427 FieldWidth.toString(os);
428 os << LM.toString();
429 os << CS.toString();
430}
431
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000432bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
433 const char *I,
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000434 const char *E,
435 const LangOptions &LO) {
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000436
437 unsigned argIndex = 0;
438
439 // Keep looking for a format specifier until we have exhausted the string.
440 while (I != E) {
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000441 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
442 LO);
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000443 // Did a fail-stop error of any kind occur when parsing the specifier?
444 // If so, don't do any more processing.
445 if (FSR.shouldStop())
446 return true;;
447 // Did we exhaust the string or encounter an error that
448 // we can recover from?
449 if (!FSR.hasValue())
450 continue;
451 // We have a format specifier. Pass it to the callback.
452 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
453 I - FSR.getStart())) {
454 return true;
455 }
456 }
457 assert(I == E && "Format string not exhausted");
458 return false;
459}
460
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000461bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
462 switch (K) {
463 case InvalidTy:
464 llvm_unreachable("ArgTypeResult must be valid");
465 case UnknownTy:
466 return true;
467 case CStrTy:
468 return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
469 case WCStrTy:
470 return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
471 case PtrToArgTypeResultTy: {
472 const PointerType *PT = argTy->getAs<PointerType>();
473 if (!PT)
474 return false;
475 return A.matchesType(C, PT->getPointeeType());
476 }
477 }
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000478
David Blaikie30263482012-01-20 21:50:17 +0000479 llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000480}
481
482QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
483 switch (K) {
484 case InvalidTy:
485 llvm_unreachable("No representative type for Invalid ArgTypeResult");
486 case UnknownTy:
487 return QualType();
488 case CStrTy:
489 return C.getPointerType(C.CharTy);
490 case WCStrTy:
491 return C.getPointerType(C.getWCharType());
492 case PtrToArgTypeResultTy:
493 return C.getPointerType(A.getRepresentativeType(C));
494 }
495
David Blaikie30263482012-01-20 21:50:17 +0000496 llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000497}
498
499std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
500 std::string S = getRepresentativeType(C).getAsString();
501 if (!Name)
502 return std::string("'") + S + "'";
503 return std::string("'") + Name + "' (aka '" + S + "')";
504}