blob: fb52742bb0a5d88add4bf2758e6bd4f8b16fcd06 [file] [log] [blame]
Ted Kremenek826a3452010-07-16 02:11:22 +00001// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Shared details for processing format strings of printf and scanf
11// (and friends).
12//
13//===----------------------------------------------------------------------===//
14
15#include "FormatStringParsing.h"
Hans Wennborgd02deeb2011-12-15 10:25:47 +000016#include "clang/Basic/LangOptions.h"
Ted Kremenek826a3452010-07-16 02:11:22 +000017
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::FormatSpecifier;
21using clang::analyze_format_string::LengthModifier;
22using clang::analyze_format_string::OptionalAmount;
23using clang::analyze_format_string::PositionContext;
Ted Kremeneka412a492010-07-20 20:04:42 +000024using clang::analyze_format_string::ConversionSpecifier;
Ted Kremenek826a3452010-07-16 02:11:22 +000025using namespace clang;
26
27// Key function to FormatStringHandler.
28FormatStringHandler::~FormatStringHandler() {}
29
30//===----------------------------------------------------------------------===//
31// Functions for parsing format strings components in both printf and
32// scanf format strings.
33//===----------------------------------------------------------------------===//
34
35OptionalAmount
36clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
37 const char *I = Beg;
38 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
39
40 unsigned accumulator = 0;
41 bool hasDigits = false;
42
43 for ( ; I != E; ++I) {
44 char c = *I;
45 if (c >= '0' && c <= '9') {
46 hasDigits = true;
47 accumulator = (accumulator * 10) + (c - '0');
48 continue;
49 }
50
51 if (hasDigits)
52 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
53 false);
54
55 break;
56 }
57
58 return OptionalAmount();
59}
60
61OptionalAmount
62clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
63 const char *E,
64 unsigned &argIndex) {
65 if (*Beg == '*') {
66 ++Beg;
67 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
68 }
69
70 return ParseAmount(Beg, E);
71}
72
73OptionalAmount
74clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
75 const char *Start,
76 const char *&Beg,
77 const char *E,
78 PositionContext p) {
79 if (*Beg == '*') {
80 const char *I = Beg + 1;
81 const OptionalAmount &Amt = ParseAmount(I, E);
82
83 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
84 H.HandleInvalidPosition(Beg, I - Beg, p);
85 return OptionalAmount(false);
86 }
87
88 if (I == E) {
89 // No more characters left?
90 H.HandleIncompleteSpecifier(Start, E - Start);
91 return OptionalAmount(false);
92 }
93
94 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
95
96 if (*I == '$') {
97 // Handle positional arguments
98
99 // Special case: '*0$', since this is an easy mistake.
100 if (Amt.getConstantAmount() == 0) {
101 H.HandleZeroPosition(Beg, I - Beg + 1);
102 return OptionalAmount(false);
103 }
104
105 const char *Tmp = Beg;
106 Beg = ++I;
107
108 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
109 Tmp, 0, true);
110 }
111
112 H.HandleInvalidPosition(Beg, I - Beg, p);
113 return OptionalAmount(false);
114 }
115
116 return ParseAmount(Beg, E);
117}
118
119
120bool
121clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
122 FormatSpecifier &CS,
123 const char *Start,
124 const char *&Beg, const char *E,
125 unsigned *argIndex) {
126 // FIXME: Support negative field widths.
127 if (argIndex) {
128 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
129 }
130 else {
131 const OptionalAmount Amt =
132 ParsePositionAmount(H, Start, Beg, E,
133 analyze_format_string::FieldWidthPos);
134
135 if (Amt.isInvalid())
136 return true;
137 CS.setFieldWidth(Amt);
138 }
139 return false;
140}
141
142bool
143clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
144 FormatSpecifier &FS,
145 const char *Start,
146 const char *&Beg,
147 const char *E) {
148 const char *I = Beg;
149
150 const OptionalAmount &Amt = ParseAmount(I, E);
151
152 if (I == E) {
153 // No more characters left?
154 H.HandleIncompleteSpecifier(Start, E - Start);
155 return true;
156 }
157
158 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
159 // Special case: '%0$', since this is an easy mistake.
160 if (Amt.getConstantAmount() == 0) {
161 H.HandleZeroPosition(Start, I - Start);
162 return true;
163 }
164
165 FS.setArgIndex(Amt.getConstantAmount() - 1);
166 FS.setUsesPositionalArg();
167 // Update the caller's pointer if we decided to consume
168 // these characters.
169 Beg = I;
170 return false;
171 }
172
173 return false;
174}
175
176bool
177clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
178 const char *&I,
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000179 const char *E,
180 const LangOptions &LO,
181 bool IsScanf) {
Ted Kremenek826a3452010-07-16 02:11:22 +0000182 LengthModifier::Kind lmKind = LengthModifier::None;
183 const char *lmPosition = I;
184 switch (*I) {
185 default:
186 return false;
187 case 'h':
188 ++I;
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000189 lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar)
190 : LengthModifier::AsShort;
Ted Kremenek826a3452010-07-16 02:11:22 +0000191 break;
192 case 'l':
193 ++I;
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000194 lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong)
195 : LengthModifier::AsLong;
Ted Kremenek826a3452010-07-16 02:11:22 +0000196 break;
197 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break;
198 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break;
199 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break;
200 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
201 case 'q': lmKind = LengthModifier::AsLongLong; ++I; break;
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000202 case 'a':
Hans Wennborg5294c792011-12-28 13:10:50 +0000203 if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) {
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000204 // For scanf in C90, look at the next character to see if this should
205 // be parsed as the GNU extension 'a' length modifier. If not, this
206 // will be parsed as a conversion specifier.
207 ++I;
208 if (I != E && (*I == 's' || *I == 'S' || *I == '[')) {
209 lmKind = LengthModifier::AsAllocate;
210 break;
211 }
212 --I;
213 }
214 return false;
Hans Wennborg37969b72012-01-12 17:11:12 +0000215 case 'm':
216 if (IsScanf) {
217 lmKind = LengthModifier::AsMAllocate;
218 ++I;
219 break;
220 }
221 return false;
Ted Kremenek826a3452010-07-16 02:11:22 +0000222 }
223 LengthModifier lm(lmPosition, lmKind);
224 FS.setLengthModifier(lm);
225 return true;
226}
227
228//===----------------------------------------------------------------------===//
229// Methods on ArgTypeResult.
230//===----------------------------------------------------------------------===//
231
232bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
233 switch (K) {
234 case InvalidTy:
David Blaikieb219cfc2011-09-23 05:06:16 +0000235 llvm_unreachable("ArgTypeResult must be valid");
Ted Kremenek826a3452010-07-16 02:11:22 +0000236
237 case UnknownTy:
238 return true;
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000239
240 case AnyCharTy: {
241 if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
242 switch (BT->getKind()) {
243 default:
244 break;
245 case BuiltinType::Char_S:
246 case BuiltinType::SChar:
247 case BuiltinType::UChar:
248 case BuiltinType::Char_U:
249 return true;
250 }
251 return false;
252 }
253
Ted Kremenek826a3452010-07-16 02:11:22 +0000254 case SpecificTy: {
255 argTy = C.getCanonicalType(argTy).getUnqualifiedType();
Nick Lewycky687b5df2011-12-02 23:21:43 +0000256 if (T == argTy)
Ted Kremenek826a3452010-07-16 02:11:22 +0000257 return true;
Ted Kremenekdc00d812011-07-13 17:35:14 +0000258 // Check for "compatible types".
Ted Kremenek1ad35be2011-07-14 17:05:32 +0000259 if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
Ted Kremenek826a3452010-07-16 02:11:22 +0000260 switch (BT->getKind()) {
261 default:
262 break;
263 case BuiltinType::Char_S:
264 case BuiltinType::SChar:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000265 return T == C.UnsignedCharTy;
Ted Kremenekdc00d812011-07-13 17:35:14 +0000266 case BuiltinType::Char_U:
Ted Kremenek1ad35be2011-07-14 17:05:32 +0000267 case BuiltinType::UChar:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000268 return T == C.SignedCharTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000269 case BuiltinType::Short:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000270 return T == C.UnsignedShortTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000271 case BuiltinType::UShort:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000272 return T == C.ShortTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000273 case BuiltinType::Int:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000274 return T == C.UnsignedIntTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000275 case BuiltinType::UInt:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000276 return T == C.IntTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000277 case BuiltinType::Long:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000278 return T == C.UnsignedLongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000279 case BuiltinType::ULong:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000280 return T == C.LongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000281 case BuiltinType::LongLong:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000282 return T == C.UnsignedLongLongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000283 case BuiltinType::ULongLong:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000284 return T == C.LongLongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000285 }
286 return false;
287 }
288
289 case CStrTy: {
290 const PointerType *PT = argTy->getAs<PointerType>();
291 if (!PT)
292 return false;
293 QualType pointeeTy = PT->getPointeeType();
294 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
295 switch (BT->getKind()) {
296 case BuiltinType::Void:
297 case BuiltinType::Char_U:
298 case BuiltinType::UChar:
299 case BuiltinType::Char_S:
300 case BuiltinType::SChar:
301 return true;
302 default:
303 break;
304 }
305
306 return false;
307 }
308
309 case WCStrTy: {
310 const PointerType *PT = argTy->getAs<PointerType>();
311 if (!PT)
312 return false;
313 QualType pointeeTy =
314 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
315 return pointeeTy == C.getWCharType();
316 }
Ted Kremenek9325eaf2010-08-24 22:24:51 +0000317
318 case WIntTy: {
319 // Instead of doing a lookup for the definition of 'wint_t' (which
320 // is defined by the system headers) instead see if wchar_t and
321 // the argument type promote to the same type.
322 QualType PromoWChar =
323 C.getWCharType()->isPromotableIntegerType()
324 ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType();
325 QualType PromoArg =
326 argTy->isPromotableIntegerType()
327 ? C.getPromotedIntegerType(argTy) : argTy;
328
329 PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType();
330 PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType();
331
332 return PromoWChar == PromoArg;
333 }
Ted Kremenek826a3452010-07-16 02:11:22 +0000334
335 case CPointerTy:
Anders Carlsson62425992010-11-06 14:58:53 +0000336 return argTy->isPointerType() || argTy->isObjCObjectPointerType() ||
337 argTy->isNullPtrType();
Ted Kremenek826a3452010-07-16 02:11:22 +0000338
339 case ObjCPointerTy:
Daniel Dunbard6a4d182011-06-28 23:33:55 +0000340 return argTy->getAs<ObjCObjectPointerType>() != NULL;
Ted Kremenek826a3452010-07-16 02:11:22 +0000341 }
342
David Blaikie30263482012-01-20 21:50:17 +0000343 llvm_unreachable("Invalid ArgTypeResult Kind!");
Ted Kremenek826a3452010-07-16 02:11:22 +0000344}
345
346QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
347 switch (K) {
348 case InvalidTy:
David Blaikieb219cfc2011-09-23 05:06:16 +0000349 llvm_unreachable("No representative type for Invalid ArgTypeResult");
Ted Kremenek826a3452010-07-16 02:11:22 +0000350 case UnknownTy:
351 return QualType();
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000352 case AnyCharTy:
353 return C.CharTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000354 case SpecificTy:
355 return T;
356 case CStrTy:
357 return C.getPointerType(C.CharTy);
358 case WCStrTy:
359 return C.getPointerType(C.getWCharType());
360 case ObjCPointerTy:
361 return C.ObjCBuiltinIdTy;
362 case CPointerTy:
363 return C.VoidPtrTy;
Ted Kremenek9325eaf2010-08-24 22:24:51 +0000364 case WIntTy: {
365 QualType WC = C.getWCharType();
366 return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC;
367 }
Ted Kremenek826a3452010-07-16 02:11:22 +0000368 }
369
David Blaikie30263482012-01-20 21:50:17 +0000370 llvm_unreachable("Invalid ArgTypeResult Kind!");
Ted Kremenek826a3452010-07-16 02:11:22 +0000371}
372
Hans Wennborga792aff2011-12-07 10:33:11 +0000373std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const {
Hans Wennborgf4f0c602011-12-09 12:22:12 +0000374 std::string S = getRepresentativeType(C).getAsString();
375 if (Name)
376 return std::string("'") + Name + "' (aka '" + S + "')";
377 return std::string("'") + S + "'";
Hans Wennborga792aff2011-12-07 10:33:11 +0000378}
379
380
Ted Kremenek826a3452010-07-16 02:11:22 +0000381//===----------------------------------------------------------------------===//
382// Methods on OptionalAmount.
383//===----------------------------------------------------------------------===//
384
385ArgTypeResult
386analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
387 return Ctx.IntTy;
388}
389
390//===----------------------------------------------------------------------===//
391// Methods on LengthModifier.
392//===----------------------------------------------------------------------===//
393
394const char *
395analyze_format_string::LengthModifier::toString() const {
396 switch (kind) {
397 case AsChar:
398 return "hh";
399 case AsShort:
400 return "h";
401 case AsLong: // or AsWideChar
402 return "l";
403 case AsLongLong:
404 return "ll";
405 case AsIntMax:
406 return "j";
407 case AsSizeT:
408 return "z";
409 case AsPtrDiff:
410 return "t";
411 case AsLongDouble:
412 return "L";
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000413 case AsAllocate:
414 return "a";
Hans Wennborg37969b72012-01-12 17:11:12 +0000415 case AsMAllocate:
416 return "m";
Ted Kremenek826a3452010-07-16 02:11:22 +0000417 case None:
418 return "";
419 }
420 return NULL;
421}
422
423//===----------------------------------------------------------------------===//
Hans Wennborgb8ec3e32011-12-09 11:11:07 +0000424// Methods on ConversionSpecifier.
425//===----------------------------------------------------------------------===//
426
427const char *ConversionSpecifier::toString() const {
428 switch (kind) {
429 case dArg: return "d";
430 case iArg: return "i";
431 case oArg: return "o";
432 case uArg: return "u";
433 case xArg: return "x";
434 case XArg: return "X";
435 case fArg: return "f";
436 case FArg: return "F";
437 case eArg: return "e";
438 case EArg: return "E";
439 case gArg: return "g";
440 case GArg: return "G";
441 case aArg: return "a";
442 case AArg: return "A";
443 case cArg: return "c";
444 case sArg: return "s";
445 case pArg: return "p";
446 case nArg: return "n";
447 case PercentArg: return "%";
448 case ScanListArg: return "[";
449 case InvalidSpecifier: return NULL;
450
451 // MacOS X unicode extensions.
452 case CArg: return "C";
453 case SArg: return "S";
454
455 // Objective-C specific specifiers.
456 case ObjCObjArg: return "@";
457
458 // GlibC specific specifiers.
459 case PrintErrno: return "m";
460 }
461 return NULL;
462}
463
464//===----------------------------------------------------------------------===//
Ted Kremenek826a3452010-07-16 02:11:22 +0000465// Methods on OptionalAmount.
466//===----------------------------------------------------------------------===//
467
Chris Lattner5f9e2722011-07-23 10:55:15 +0000468void OptionalAmount::toString(raw_ostream &os) const {
Ted Kremenek826a3452010-07-16 02:11:22 +0000469 switch (hs) {
470 case Invalid:
471 case NotSpecified:
472 return;
473 case Arg:
474 if (UsesDotPrefix)
475 os << ".";
476 if (usesPositionalArg())
477 os << "*" << getPositionalArgIndex() << "$";
478 else
479 os << "*";
480 break;
481 case Constant:
482 if (UsesDotPrefix)
483 os << ".";
484 os << amt;
485 break;
486 }
487}
488
Ted Kremeneka412a492010-07-20 20:04:42 +0000489bool FormatSpecifier::hasValidLengthModifier() const {
490 switch (LM.getKind()) {
491 case LengthModifier::None:
492 return true;
493
494 // Handle most integer flags
495 case LengthModifier::AsChar:
496 case LengthModifier::AsShort:
497 case LengthModifier::AsLongLong:
498 case LengthModifier::AsIntMax:
499 case LengthModifier::AsSizeT:
500 case LengthModifier::AsPtrDiff:
501 switch (CS.getKind()) {
502 case ConversionSpecifier::dArg:
503 case ConversionSpecifier::iArg:
504 case ConversionSpecifier::oArg:
505 case ConversionSpecifier::uArg:
506 case ConversionSpecifier::xArg:
507 case ConversionSpecifier::XArg:
508 case ConversionSpecifier::nArg:
509 return true;
510 default:
511 return false;
512 }
513
514 // Handle 'l' flag
515 case LengthModifier::AsLong:
516 switch (CS.getKind()) {
517 case ConversionSpecifier::dArg:
518 case ConversionSpecifier::iArg:
519 case ConversionSpecifier::oArg:
520 case ConversionSpecifier::uArg:
521 case ConversionSpecifier::xArg:
522 case ConversionSpecifier::XArg:
523 case ConversionSpecifier::aArg:
524 case ConversionSpecifier::AArg:
525 case ConversionSpecifier::fArg:
526 case ConversionSpecifier::FArg:
527 case ConversionSpecifier::eArg:
528 case ConversionSpecifier::EArg:
529 case ConversionSpecifier::gArg:
530 case ConversionSpecifier::GArg:
531 case ConversionSpecifier::nArg:
532 case ConversionSpecifier::cArg:
533 case ConversionSpecifier::sArg:
Ted Kremenekef1440b2012-01-20 22:11:52 +0000534 case ConversionSpecifier::ScanListArg:
Ted Kremeneka412a492010-07-20 20:04:42 +0000535 return true;
536 default:
537 return false;
538 }
539
540 case LengthModifier::AsLongDouble:
541 switch (CS.getKind()) {
542 case ConversionSpecifier::aArg:
543 case ConversionSpecifier::AArg:
544 case ConversionSpecifier::fArg:
545 case ConversionSpecifier::FArg:
546 case ConversionSpecifier::eArg:
547 case ConversionSpecifier::EArg:
548 case ConversionSpecifier::gArg:
549 case ConversionSpecifier::GArg:
550 return true;
551 default:
552 return false;
553 }
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000554
555 case LengthModifier::AsAllocate:
556 switch (CS.getKind()) {
557 case ConversionSpecifier::sArg:
558 case ConversionSpecifier::SArg:
Hans Wennborg28058d12012-01-12 15:07:16 +0000559 case ConversionSpecifier::ScanListArg:
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000560 return true;
561 default:
562 return false;
563 }
Hans Wennborg37969b72012-01-12 17:11:12 +0000564
565 case LengthModifier::AsMAllocate:
566 switch (CS.getKind()) {
567 case ConversionSpecifier::cArg:
568 case ConversionSpecifier::CArg:
569 case ConversionSpecifier::sArg:
570 case ConversionSpecifier::SArg:
571 case ConversionSpecifier::ScanListArg:
572 return true;
573 default:
574 return false;
575 }
Ted Kremeneka412a492010-07-20 20:04:42 +0000576 }
David Blaikie30263482012-01-20 21:50:17 +0000577 llvm_unreachable("Invalid LengthModifier Kind!");
Ted Kremeneka412a492010-07-20 20:04:42 +0000578}