blob: 77d9c9658d374071176c921530899d136bd3ffcd [file] [log] [blame]
Ted Kremenekd9c904d2010-07-16 02:11:31 +00001//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends. The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
Ted Kremenek1e51c202010-07-20 20:04:47 +000022using clang::analyze_format_string::ConversionSpecifier;
Hans Wennborg6fcd9322011-12-10 13:20:11 +000023using clang::analyze_scanf::ScanfArgTypeResult;
Ted Kremenek6ecb9502010-07-20 20:04:27 +000024using clang::analyze_scanf::ScanfConversionSpecifier;
Ted Kremenekd9c904d2010-07-16 02:11:31 +000025using clang::analyze_scanf::ScanfSpecifier;
Dan Gohman3c46e8d2010-07-26 21:25:24 +000026using clang::UpdateOnReturn;
Hans Wennborg6fcd9322011-12-10 13:20:11 +000027using namespace clang;
Ted Kremenekd9c904d2010-07-16 02:11:31 +000028
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30 ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
Ted Kremenek6ecb9502010-07-20 20:04:27 +000033 ScanfConversionSpecifier &CS,
Ted Kremenekd9c904d2010-07-16 02:11:31 +000034 const char *&Beg, const char *E) {
35 const char *I = Beg;
36 const char *start = I - 1;
37 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39 // No more characters?
40 if (I == E) {
41 H.HandleIncompleteScanList(start, I);
42 return true;
43 }
44
45 // Special case: ']' is the first character.
46 if (*I == ']') {
47 if (++I == E) {
Ted Kremenekb7c21012010-07-16 18:28:03 +000048 H.HandleIncompleteScanList(start, I - 1);
Ted Kremenekd9c904d2010-07-16 02:11:31 +000049 return true;
50 }
51 }
52
53 // Look for a ']' character which denotes the end of the scan list.
54 while (*I != ']') {
55 if (++I == E) {
Ted Kremenekb7c21012010-07-16 18:28:03 +000056 H.HandleIncompleteScanList(start, I - 1);
Ted Kremenekd9c904d2010-07-16 02:11:31 +000057 return true;
58 }
59 }
60
61 CS.setEndScanList(I);
62 return false;
63}
64
65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66// We can possibly refactor.
67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68 const char *&Beg,
69 const char *E,
70 unsigned &argIndex) {
71
72 using namespace clang::analyze_scanf;
73 const char *I = Beg;
74 const char *Start = 0;
75 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
76
77 // Look for a '%' character that indicates the start of a format specifier.
78 for ( ; I != E ; ++I) {
79 char c = *I;
80 if (c == '\0') {
81 // Detect spurious null characters, which are likely errors.
82 H.HandleNullChar(I);
83 return true;
84 }
85 if (c == '%') {
86 Start = I++; // Record the start of the format specifier.
87 break;
88 }
89 }
90
91 // No format specifier found?
92 if (!Start)
93 return false;
94
95 if (I == E) {
96 // No more characters left?
97 H.HandleIncompleteSpecifier(Start, E - Start);
98 return true;
99 }
100
101 ScanfSpecifier FS;
102 if (ParseArgPosition(H, FS, Start, I, E))
103 return true;
104
105 if (I == E) {
106 // No more characters left?
107 H.HandleIncompleteSpecifier(Start, E - Start);
108 return true;
109 }
110
111 // Look for '*' flag if it is present.
112 if (*I == '*') {
113 FS.setSuppressAssignment(I);
114 if (++I == E) {
115 H.HandleIncompleteSpecifier(Start, E - Start);
116 return true;
117 }
118 }
119
120 // Look for the field width (if any). Unlike printf, this is either
121 // a fixed integer or isn't present.
122 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
123 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
124 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
125 FS.setFieldWidth(Amt);
126
127 if (I == E) {
128 // No more characters left?
129 H.HandleIncompleteSpecifier(Start, E - Start);
130 return true;
131 }
132 }
133
134 // Look for the length modifier.
135 if (ParseLengthModifier(FS, I, E) && I == E) {
136 // No more characters left?
137 H.HandleIncompleteSpecifier(Start, E - Start);
138 return true;
139 }
140
141 // Detect spurious null characters, which are likely errors.
142 if (*I == '\0') {
143 H.HandleNullChar(I);
144 return true;
145 }
146
147 // Finally, look for the conversion specifier.
148 const char *conversionPosition = I++;
Ted Kremenek6ecb9502010-07-20 20:04:27 +0000149 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000150 switch (*conversionPosition) {
151 default:
152 break;
Ted Kremenek1e51c202010-07-20 20:04:47 +0000153 case '%': k = ConversionSpecifier::PercentArg; break;
154 case 'A': k = ConversionSpecifier::AArg; break;
155 case 'E': k = ConversionSpecifier::EArg; break;
156 case 'F': k = ConversionSpecifier::FArg; break;
157 case 'G': k = ConversionSpecifier::GArg; break;
158 case 'X': k = ConversionSpecifier::XArg; break;
159 case 'a': k = ConversionSpecifier::aArg; break;
160 case 'd': k = ConversionSpecifier::dArg; break;
161 case 'e': k = ConversionSpecifier::eArg; break;
162 case 'f': k = ConversionSpecifier::fArg; break;
163 case 'g': k = ConversionSpecifier::gArg; break;
164 case 'i': k = ConversionSpecifier::iArg; break;
165 case 'n': k = ConversionSpecifier::nArg; break;
166 case 'c': k = ConversionSpecifier::cArg; break;
167 case 'C': k = ConversionSpecifier::CArg; break;
168 case 'S': k = ConversionSpecifier::SArg; break;
169 case '[': k = ConversionSpecifier::ScanListArg; break;
170 case 'u': k = ConversionSpecifier::uArg; break;
171 case 'x': k = ConversionSpecifier::xArg; break;
172 case 'o': k = ConversionSpecifier::oArg; break;
173 case 's': k = ConversionSpecifier::sArg; break;
174 case 'p': k = ConversionSpecifier::pArg; break;
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000175 }
Ted Kremenek6ecb9502010-07-20 20:04:27 +0000176 ScanfConversionSpecifier CS(conversionPosition, k);
177 if (k == ScanfConversionSpecifier::ScanListArg) {
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000178 if (!ParseScanList(H, CS, I, E))
179 return true;
180 }
181 FS.setConversionSpecifier(CS);
182 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
183 && !FS.usesPositionalArg())
184 FS.setArgIndex(argIndex++);
185
186 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
187 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
188
Ted Kremenek6ecb9502010-07-20 20:04:27 +0000189 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000190 // Assume the conversion takes one argument.
191 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
192 }
193 return ScanfSpecifierResult(Start, FS);
194}
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000195
196ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
197 const ScanfConversionSpecifier &CS = getConversionSpecifier();
198
199 if (!CS.consumesDataArgument())
200 return ScanfArgTypeResult::Invalid();
201
202 switch(CS.getKind()) {
203 // Signed int.
204 case ConversionSpecifier::dArg:
205 case ConversionSpecifier::iArg:
206 switch (LM.getKind()) {
207 case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
208 case LengthModifier::AsChar:
209 return ArgTypeResult(ArgTypeResult::AnyCharTy);
210 case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
211 case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
212 case LengthModifier::AsLongLong: return ArgTypeResult(Ctx.LongLongTy);
213 case LengthModifier::AsIntMax:
214 return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
215 case LengthModifier::AsSizeT:
216 // FIXME: ssize_t.
217 return ScanfArgTypeResult();
218 case LengthModifier::AsPtrDiff:
219 return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
220 case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid();
221 }
222
223 // Unsigned int.
224 case ConversionSpecifier::oArg:
225 case ConversionSpecifier::uArg:
226 case ConversionSpecifier::xArg:
227 case ConversionSpecifier::XArg:
228 switch (LM.getKind()) {
229 case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
230 case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
231 case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
232 case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
233 case LengthModifier::AsLongLong:
234 return ArgTypeResult(Ctx.UnsignedLongLongTy);
235 case LengthModifier::AsIntMax:
236 return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
237 case LengthModifier::AsSizeT:
238 return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
239 case LengthModifier::AsPtrDiff:
240 // FIXME: Unsigned version of ptrdiff_t?
241 return ScanfArgTypeResult();
242 case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid();
243 }
244
245 // Float.
246 case ConversionSpecifier::aArg:
247 case ConversionSpecifier::AArg:
248 case ConversionSpecifier::eArg:
249 case ConversionSpecifier::EArg:
250 case ConversionSpecifier::fArg:
251 case ConversionSpecifier::FArg:
252 case ConversionSpecifier::gArg:
253 case ConversionSpecifier::GArg:
254 switch (LM.getKind()) {
255 case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
256 case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
257 case LengthModifier::AsLongDouble:
258 return ArgTypeResult(Ctx.LongDoubleTy);
259 default:
260 return ScanfArgTypeResult::Invalid();
261 }
262
263 // Char, string and scanlist.
264 case ConversionSpecifier::cArg:
265 case ConversionSpecifier::sArg:
266 case ConversionSpecifier::ScanListArg:
267 switch (LM.getKind()) {
268 case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
269 case LengthModifier::AsLong:
270 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
271 default:
272 return ScanfArgTypeResult::Invalid();
273 }
274 case ConversionSpecifier::CArg:
275 case ConversionSpecifier::SArg:
276 // FIXME: Mac OS X specific?
277 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
278
279 // Pointer.
280 case ConversionSpecifier::pArg:
281 return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
282
283 default:
284 break;
285 }
286
287 return ScanfArgTypeResult();
288}
289
290bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt)
291{
292 if (!QT->isPointerType())
293 return false;
294
295 QualType PT = QT->getPointeeType();
296 const BuiltinType *BT = PT->getAs<BuiltinType>();
297 if (!BT)
298 return false;
299
300 // Pointer to a character.
301 if (PT->isAnyCharacterType()) {
302 CS.setKind(ConversionSpecifier::sArg);
303 if (PT->isWideCharType())
304 LM.setKind(LengthModifier::AsWideChar);
305 else
306 LM.setKind(LengthModifier::None);
307 return true;
308 }
309
310 // Figure out the length modifier.
311 switch (BT->getKind()) {
312 // no modifier
313 case BuiltinType::UInt:
314 case BuiltinType::Int:
315 case BuiltinType::Float:
316 LM.setKind(LengthModifier::None);
317 break;
318
319 // hh
320 case BuiltinType::Char_U:
321 case BuiltinType::UChar:
322 case BuiltinType::Char_S:
323 case BuiltinType::SChar:
324 LM.setKind(LengthModifier::AsChar);
325 break;
326
327 // h
328 case BuiltinType::Short:
329 case BuiltinType::UShort:
330 LM.setKind(LengthModifier::AsShort);
331 break;
332
333 // l
334 case BuiltinType::Long:
335 case BuiltinType::ULong:
336 case BuiltinType::Double:
337 LM.setKind(LengthModifier::AsLong);
338 break;
339
340 // ll
341 case BuiltinType::LongLong:
342 case BuiltinType::ULongLong:
343 LM.setKind(LengthModifier::AsLongLong);
344 break;
345
346 // L
347 case BuiltinType::LongDouble:
348 LM.setKind(LengthModifier::AsLongDouble);
349 break;
350
351 // Don't know.
352 default:
353 return false;
354 }
355
356 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
357 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
358 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
359 if (Identifier->getName() == "size_t") {
360 LM.setKind(LengthModifier::AsSizeT);
361 } else if (Identifier->getName() == "ssize_t") {
362 // Not C99, but common in Unix.
363 LM.setKind(LengthModifier::AsSizeT);
364 } else if (Identifier->getName() == "intmax_t") {
365 LM.setKind(LengthModifier::AsIntMax);
366 } else if (Identifier->getName() == "uintmax_t") {
367 LM.setKind(LengthModifier::AsIntMax);
368 } else if (Identifier->getName() == "ptrdiff_t") {
369 LM.setKind(LengthModifier::AsPtrDiff);
370 }
371 }
372
373 // Figure out the conversion specifier.
374 if (PT->isRealFloatingType())
375 CS.setKind(ConversionSpecifier::fArg);
376 else if (PT->isSignedIntegerType())
377 CS.setKind(ConversionSpecifier::dArg);
378 else if (PT->isUnsignedIntegerType()) {
379 // Preserve the original formatting, e.g. 'X', 'o'.
380 if (!CS.isUIntArg()) {
381 CS.setKind(ConversionSpecifier::uArg);
382 }
383 } else
384 llvm_unreachable("Unexpected type");
385
386 return true;
387}
388
389void ScanfSpecifier::toString(raw_ostream &os) const {
390 os << "%";
391
392 if (usesPositionalArg())
393 os << getPositionalArgIndex() << "$";
394 if (SuppressAssignment)
395 os << "*";
396
397 FieldWidth.toString(os);
398 os << LM.toString();
399 os << CS.toString();
400}
401
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000402bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
403 const char *I,
404 const char *E) {
405
406 unsigned argIndex = 0;
407
408 // Keep looking for a format specifier until we have exhausted the string.
409 while (I != E) {
410 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex);
411 // Did a fail-stop error of any kind occur when parsing the specifier?
412 // If so, don't do any more processing.
413 if (FSR.shouldStop())
414 return true;;
415 // Did we exhaust the string or encounter an error that
416 // we can recover from?
417 if (!FSR.hasValue())
418 continue;
419 // We have a format specifier. Pass it to the callback.
420 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
421 I - FSR.getStart())) {
422 return true;
423 }
424 }
425 assert(I == E && "Format string not exhausted");
426 return false;
427}
428
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000429bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
430 switch (K) {
431 case InvalidTy:
432 llvm_unreachable("ArgTypeResult must be valid");
433 case UnknownTy:
434 return true;
435 case CStrTy:
436 return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
437 case WCStrTy:
438 return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
439 case PtrToArgTypeResultTy: {
440 const PointerType *PT = argTy->getAs<PointerType>();
441 if (!PT)
442 return false;
443 return A.matchesType(C, PT->getPointeeType());
444 }
445 }
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000446
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000447 return false; // Unreachable, but we still get a warning.
448}
449
450QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
451 switch (K) {
452 case InvalidTy:
453 llvm_unreachable("No representative type for Invalid ArgTypeResult");
454 case UnknownTy:
455 return QualType();
456 case CStrTy:
457 return C.getPointerType(C.CharTy);
458 case WCStrTy:
459 return C.getPointerType(C.getWCharType());
460 case PtrToArgTypeResultTy:
461 return C.getPointerType(A.getRepresentativeType(C));
462 }
463
464 return QualType(); // Not reachable.
465}
466
467std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
468 std::string S = getRepresentativeType(C).getAsString();
469 if (!Name)
470 return std::string("'") + S + "'";
471 return std::string("'") + Name + "' (aka '" + S + "')";
472}