blob: 80deb37fad24e6b87f40c57dc86001f798cb8938 [file] [log] [blame]
Ted Kremenekd9c904d2010-07-16 02:11:31 +00001//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends. The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
Hans Wennborgf3749f42012-08-07 08:11:26 +000018using clang::analyze_format_string::ArgType;
Ted Kremenekd9c904d2010-07-16 02:11:31 +000019using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
Ted Kremenek1e51c202010-07-20 20:04:47 +000022using clang::analyze_format_string::ConversionSpecifier;
Ted Kremenek6ecb9502010-07-20 20:04:27 +000023using clang::analyze_scanf::ScanfConversionSpecifier;
Ted Kremenekd9c904d2010-07-16 02:11:31 +000024using clang::analyze_scanf::ScanfSpecifier;
Dan Gohman3c46e8d2010-07-26 21:25:24 +000025using clang::UpdateOnReturn;
Hans Wennborg6fcd9322011-12-10 13:20:11 +000026using namespace clang;
Ted Kremenekd9c904d2010-07-16 02:11:31 +000027
28typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
29 ScanfSpecifierResult;
30
31static bool ParseScanList(FormatStringHandler &H,
Ted Kremenek6ecb9502010-07-20 20:04:27 +000032 ScanfConversionSpecifier &CS,
Ted Kremenekd9c904d2010-07-16 02:11:31 +000033 const char *&Beg, const char *E) {
34 const char *I = Beg;
35 const char *start = I - 1;
36 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37
38 // No more characters?
39 if (I == E) {
40 H.HandleIncompleteScanList(start, I);
41 return true;
42 }
43
44 // Special case: ']' is the first character.
45 if (*I == ']') {
46 if (++I == E) {
Ted Kremenekb7c21012010-07-16 18:28:03 +000047 H.HandleIncompleteScanList(start, I - 1);
Ted Kremenekd9c904d2010-07-16 02:11:31 +000048 return true;
49 }
50 }
51
52 // Look for a ']' character which denotes the end of the scan list.
53 while (*I != ']') {
54 if (++I == E) {
Ted Kremenekb7c21012010-07-16 18:28:03 +000055 H.HandleIncompleteScanList(start, I - 1);
Ted Kremenekd9c904d2010-07-16 02:11:31 +000056 return true;
57 }
58 }
59
60 CS.setEndScanList(I);
61 return false;
62}
63
64// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
65// We can possibly refactor.
66static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
67 const char *&Beg,
68 const char *E,
Hans Wennborgd02deeb2011-12-15 10:25:47 +000069 unsigned &argIndex,
70 const LangOptions &LO) {
Ted Kremenekd9c904d2010-07-16 02:11:31 +000071
72 using namespace clang::analyze_scanf;
73 const char *I = Beg;
74 const char *Start = 0;
75 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
76
77 // Look for a '%' character that indicates the start of a format specifier.
78 for ( ; I != E ; ++I) {
79 char c = *I;
80 if (c == '\0') {
81 // Detect spurious null characters, which are likely errors.
82 H.HandleNullChar(I);
83 return true;
84 }
85 if (c == '%') {
86 Start = I++; // Record the start of the format specifier.
87 break;
88 }
89 }
90
91 // No format specifier found?
92 if (!Start)
93 return false;
94
95 if (I == E) {
96 // No more characters left?
97 H.HandleIncompleteSpecifier(Start, E - Start);
98 return true;
99 }
100
101 ScanfSpecifier FS;
102 if (ParseArgPosition(H, FS, Start, I, E))
103 return true;
104
105 if (I == E) {
106 // No more characters left?
107 H.HandleIncompleteSpecifier(Start, E - Start);
108 return true;
109 }
110
111 // Look for '*' flag if it is present.
112 if (*I == '*') {
113 FS.setSuppressAssignment(I);
114 if (++I == E) {
115 H.HandleIncompleteSpecifier(Start, E - Start);
116 return true;
117 }
118 }
119
120 // Look for the field width (if any). Unlike printf, this is either
121 // a fixed integer or isn't present.
122 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
123 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
124 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
125 FS.setFieldWidth(Amt);
126
127 if (I == E) {
128 // No more characters left?
129 H.HandleIncompleteSpecifier(Start, E - Start);
130 return true;
131 }
132 }
133
134 // Look for the length modifier.
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000135 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000136 // No more characters left?
137 H.HandleIncompleteSpecifier(Start, E - Start);
138 return true;
139 }
140
141 // Detect spurious null characters, which are likely errors.
142 if (*I == '\0') {
143 H.HandleNullChar(I);
144 return true;
145 }
146
147 // Finally, look for the conversion specifier.
148 const char *conversionPosition = I++;
Ted Kremenek6ecb9502010-07-20 20:04:27 +0000149 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000150 switch (*conversionPosition) {
151 default:
152 break;
Ted Kremenek1e51c202010-07-20 20:04:47 +0000153 case '%': k = ConversionSpecifier::PercentArg; break;
154 case 'A': k = ConversionSpecifier::AArg; break;
155 case 'E': k = ConversionSpecifier::EArg; break;
156 case 'F': k = ConversionSpecifier::FArg; break;
157 case 'G': k = ConversionSpecifier::GArg; break;
158 case 'X': k = ConversionSpecifier::XArg; break;
159 case 'a': k = ConversionSpecifier::aArg; break;
160 case 'd': k = ConversionSpecifier::dArg; break;
161 case 'e': k = ConversionSpecifier::eArg; break;
162 case 'f': k = ConversionSpecifier::fArg; break;
163 case 'g': k = ConversionSpecifier::gArg; break;
164 case 'i': k = ConversionSpecifier::iArg; break;
165 case 'n': k = ConversionSpecifier::nArg; break;
166 case 'c': k = ConversionSpecifier::cArg; break;
167 case 'C': k = ConversionSpecifier::CArg; break;
168 case 'S': k = ConversionSpecifier::SArg; break;
169 case '[': k = ConversionSpecifier::ScanListArg; break;
170 case 'u': k = ConversionSpecifier::uArg; break;
171 case 'x': k = ConversionSpecifier::xArg; break;
172 case 'o': k = ConversionSpecifier::oArg; break;
173 case 's': k = ConversionSpecifier::sArg; break;
174 case 'p': k = ConversionSpecifier::pArg; break;
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000175 }
Ted Kremenek6ecb9502010-07-20 20:04:27 +0000176 ScanfConversionSpecifier CS(conversionPosition, k);
177 if (k == ScanfConversionSpecifier::ScanListArg) {
Hans Wennborg6de0b482012-01-12 14:44:54 +0000178 if (ParseScanList(H, CS, I, E))
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000179 return true;
180 }
181 FS.setConversionSpecifier(CS);
182 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
183 && !FS.usesPositionalArg())
184 FS.setArgIndex(argIndex++);
185
186 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
187 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
188
Ted Kremenek6ecb9502010-07-20 20:04:27 +0000189 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000190 // Assume the conversion takes one argument.
191 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
192 }
193 return ScanfSpecifierResult(Start, FS);
194}
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000195
Hans Wennborg58e1e542012-08-07 08:59:46 +0000196ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000197 const ScanfConversionSpecifier &CS = getConversionSpecifier();
198
199 if (!CS.consumesDataArgument())
Hans Wennborg58e1e542012-08-07 08:59:46 +0000200 return ArgType::Invalid();
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000201
202 switch(CS.getKind()) {
203 // Signed int.
204 case ConversionSpecifier::dArg:
205 case ConversionSpecifier::iArg:
206 switch (LM.getKind()) {
Hans Wennborg58e1e542012-08-07 08:59:46 +0000207 case LengthModifier::None:
208 return ArgType::PtrTo(Ctx.IntTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000209 case LengthModifier::AsChar:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000210 return ArgType::PtrTo(ArgType::AnyCharTy);
211 case LengthModifier::AsShort:
212 return ArgType::PtrTo(Ctx.ShortTy);
213 case LengthModifier::AsLong:
214 return ArgType::PtrTo(Ctx.LongTy);
Hans Wennborg32addd52012-02-16 16:34:54 +0000215 case LengthModifier::AsLongLong:
216 case LengthModifier::AsQuad:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000217 return ArgType::PtrTo(Ctx.LongLongTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000218 case LengthModifier::AsIntMax:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000219 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000220 case LengthModifier::AsSizeT:
221 // FIXME: ssize_t.
Hans Wennborg58e1e542012-08-07 08:59:46 +0000222 return ArgType();
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000223 case LengthModifier::AsPtrDiff:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000224 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
Ted Kremenek9d24c2c2012-01-24 21:29:54 +0000225 case LengthModifier::AsLongDouble:
226 // GNU extension.
Hans Wennborg58e1e542012-08-07 08:59:46 +0000227 return ArgType::PtrTo(Ctx.LongLongTy);
228 case LengthModifier::AsAllocate:
229 return ArgType::Invalid();
230 case LengthModifier::AsMAllocate:
231 return ArgType::Invalid();
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000232 }
233
234 // Unsigned int.
235 case ConversionSpecifier::oArg:
236 case ConversionSpecifier::uArg:
237 case ConversionSpecifier::xArg:
238 case ConversionSpecifier::XArg:
239 switch (LM.getKind()) {
Hans Wennborg58e1e542012-08-07 08:59:46 +0000240 case LengthModifier::None:
241 return ArgType::PtrTo(Ctx.UnsignedIntTy);
242 case LengthModifier::AsChar:
243 return ArgType::PtrTo(Ctx.UnsignedCharTy);
244 case LengthModifier::AsShort:
245 return ArgType::PtrTo(Ctx.UnsignedShortTy);
246 case LengthModifier::AsLong:
247 return ArgType::PtrTo(Ctx.UnsignedLongTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000248 case LengthModifier::AsLongLong:
Hans Wennborg32addd52012-02-16 16:34:54 +0000249 case LengthModifier::AsQuad:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000250 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000251 case LengthModifier::AsIntMax:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000252 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000253 case LengthModifier::AsSizeT:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000254 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000255 case LengthModifier::AsPtrDiff:
256 // FIXME: Unsigned version of ptrdiff_t?
Hans Wennborg58e1e542012-08-07 08:59:46 +0000257 return ArgType();
Ted Kremenek9d24c2c2012-01-24 21:29:54 +0000258 case LengthModifier::AsLongDouble:
259 // GNU extension.
Hans Wennborg58e1e542012-08-07 08:59:46 +0000260 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
261 case LengthModifier::AsAllocate:
262 return ArgType::Invalid();
263 case LengthModifier::AsMAllocate:
264 return ArgType::Invalid();
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000265 }
266
267 // Float.
268 case ConversionSpecifier::aArg:
269 case ConversionSpecifier::AArg:
270 case ConversionSpecifier::eArg:
271 case ConversionSpecifier::EArg:
272 case ConversionSpecifier::fArg:
273 case ConversionSpecifier::FArg:
274 case ConversionSpecifier::gArg:
275 case ConversionSpecifier::GArg:
276 switch (LM.getKind()) {
Hans Wennborg58e1e542012-08-07 08:59:46 +0000277 case LengthModifier::None:
278 return ArgType::PtrTo(Ctx.FloatTy);
279 case LengthModifier::AsLong:
280 return ArgType::PtrTo(Ctx.DoubleTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000281 case LengthModifier::AsLongDouble:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000282 return ArgType::PtrTo(Ctx.LongDoubleTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000283 default:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000284 return ArgType::Invalid();
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000285 }
286
287 // Char, string and scanlist.
288 case ConversionSpecifier::cArg:
289 case ConversionSpecifier::sArg:
290 case ConversionSpecifier::ScanListArg:
291 switch (LM.getKind()) {
Hans Wennborg58e1e542012-08-07 08:59:46 +0000292 case LengthModifier::None:
293 return ArgType::PtrTo(ArgType::AnyCharTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000294 case LengthModifier::AsLong:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000295 return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
Hans Wennborg37969b72012-01-12 17:11:12 +0000296 case LengthModifier::AsAllocate:
297 case LengthModifier::AsMAllocate:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000298 return ArgType::PtrTo(ArgType::CStrTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000299 default:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000300 return ArgType::Invalid();
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000301 }
302 case ConversionSpecifier::CArg:
303 case ConversionSpecifier::SArg:
304 // FIXME: Mac OS X specific?
Hans Wennborg37969b72012-01-12 17:11:12 +0000305 switch (LM.getKind()) {
306 case LengthModifier::None:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000307 return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
Hans Wennborg37969b72012-01-12 17:11:12 +0000308 case LengthModifier::AsAllocate:
309 case LengthModifier::AsMAllocate:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000310 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
Hans Wennborg37969b72012-01-12 17:11:12 +0000311 default:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000312 return ArgType::Invalid();
Hans Wennborg37969b72012-01-12 17:11:12 +0000313 }
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000314
315 // Pointer.
316 case ConversionSpecifier::pArg:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000317 return ArgType::PtrTo(ArgType::CPointerTy);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000318
Hans Wennborg58e1e542012-08-07 08:59:46 +0000319 // Write-back.
Hans Wennborgcec9ce42012-07-30 17:11:32 +0000320 case ConversionSpecifier::nArg:
Hans Wennborg58e1e542012-08-07 08:59:46 +0000321 return ArgType::PtrTo(Ctx.IntTy);
Hans Wennborgcec9ce42012-07-30 17:11:32 +0000322
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000323 default:
324 break;
325 }
326
Hans Wennborg58e1e542012-08-07 08:59:46 +0000327 return ArgType();
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000328}
329
Hans Wennborgbe6126a2012-02-15 09:59:46 +0000330bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
331 ASTContext &Ctx) {
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000332 if (!QT->isPointerType())
333 return false;
334
Hans Wennborgcec9ce42012-07-30 17:11:32 +0000335 // %n is different from other conversion specifiers; don't try to fix it.
336 if (CS.getKind() == ConversionSpecifier::nArg)
337 return false;
338
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000339 QualType PT = QT->getPointeeType();
Jordan Rose033a9c02012-06-04 22:49:02 +0000340
341 // If it's an enum, get its underlying type.
342 if (const EnumType *ETy = QT->getAs<EnumType>())
343 QT = ETy->getDecl()->getIntegerType();
344
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000345 const BuiltinType *BT = PT->getAs<BuiltinType>();
346 if (!BT)
347 return false;
348
349 // Pointer to a character.
350 if (PT->isAnyCharacterType()) {
351 CS.setKind(ConversionSpecifier::sArg);
352 if (PT->isWideCharType())
353 LM.setKind(LengthModifier::AsWideChar);
354 else
355 LM.setKind(LengthModifier::None);
356 return true;
357 }
358
359 // Figure out the length modifier.
360 switch (BT->getKind()) {
361 // no modifier
362 case BuiltinType::UInt:
363 case BuiltinType::Int:
364 case BuiltinType::Float:
365 LM.setKind(LengthModifier::None);
366 break;
367
368 // hh
369 case BuiltinType::Char_U:
370 case BuiltinType::UChar:
371 case BuiltinType::Char_S:
372 case BuiltinType::SChar:
373 LM.setKind(LengthModifier::AsChar);
374 break;
375
376 // h
377 case BuiltinType::Short:
378 case BuiltinType::UShort:
379 LM.setKind(LengthModifier::AsShort);
380 break;
381
382 // l
383 case BuiltinType::Long:
384 case BuiltinType::ULong:
385 case BuiltinType::Double:
386 LM.setKind(LengthModifier::AsLong);
387 break;
388
389 // ll
390 case BuiltinType::LongLong:
391 case BuiltinType::ULongLong:
392 LM.setKind(LengthModifier::AsLongLong);
393 break;
394
395 // L
396 case BuiltinType::LongDouble:
397 LM.setKind(LengthModifier::AsLongDouble);
398 break;
399
400 // Don't know.
401 default:
402 return false;
403 }
404
405 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
Hans Wennborg46847782012-07-27 19:17:46 +0000406 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x))
407 namedTypeToLengthModifier(PT, LM);
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000408
Hans Wennborgbe6126a2012-02-15 09:59:46 +0000409 // If fixing the length modifier was enough, we are done.
Hans Wennborg58e1e542012-08-07 08:59:46 +0000410 const analyze_scanf::ArgType &AT = getArgType(Ctx);
411 if (hasValidLengthModifier() && AT.isValid() && AT.matchesType(Ctx, QT))
Hans Wennborgbe6126a2012-02-15 09:59:46 +0000412 return true;
413
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000414 // Figure out the conversion specifier.
415 if (PT->isRealFloatingType())
416 CS.setKind(ConversionSpecifier::fArg);
417 else if (PT->isSignedIntegerType())
418 CS.setKind(ConversionSpecifier::dArg);
Hans Wennborgbe6126a2012-02-15 09:59:46 +0000419 else if (PT->isUnsignedIntegerType())
420 CS.setKind(ConversionSpecifier::uArg);
421 else
Hans Wennborg6fcd9322011-12-10 13:20:11 +0000422 llvm_unreachable("Unexpected type");
423
424 return true;
425}
426
427void ScanfSpecifier::toString(raw_ostream &os) const {
428 os << "%";
429
430 if (usesPositionalArg())
431 os << getPositionalArgIndex() << "$";
432 if (SuppressAssignment)
433 os << "*";
434
435 FieldWidth.toString(os);
436 os << LM.toString();
437 os << CS.toString();
438}
439
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000440bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
441 const char *I,
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000442 const char *E,
443 const LangOptions &LO) {
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000444
445 unsigned argIndex = 0;
446
447 // Keep looking for a format specifier until we have exhausted the string.
448 while (I != E) {
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000449 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
450 LO);
Ted Kremenekd9c904d2010-07-16 02:11:31 +0000451 // Did a fail-stop error of any kind occur when parsing the specifier?
452 // If so, don't do any more processing.
453 if (FSR.shouldStop())
454 return true;;
455 // Did we exhaust the string or encounter an error that
456 // we can recover from?
457 if (!FSR.hasValue())
458 continue;
459 // We have a format specifier. Pass it to the callback.
460 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
461 I - FSR.getStart())) {
462 return true;
463 }
464 }
465 assert(I == E && "Format string not exhausted");
466 return false;
467}