blob: bbc6f07b36dfa5bab52e9c50c168899cdeceeda4 [file] [log] [blame]
Ted Kremenek826a3452010-07-16 02:11:22 +00001// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Shared details for processing format strings of printf and scanf
11// (and friends).
12//
13//===----------------------------------------------------------------------===//
14
15#include "FormatStringParsing.h"
Hans Wennborgd02deeb2011-12-15 10:25:47 +000016#include "clang/Basic/LangOptions.h"
Ted Kremenek826a3452010-07-16 02:11:22 +000017
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::FormatSpecifier;
21using clang::analyze_format_string::LengthModifier;
22using clang::analyze_format_string::OptionalAmount;
23using clang::analyze_format_string::PositionContext;
Ted Kremeneka412a492010-07-20 20:04:42 +000024using clang::analyze_format_string::ConversionSpecifier;
Ted Kremenek826a3452010-07-16 02:11:22 +000025using namespace clang;
26
27// Key function to FormatStringHandler.
28FormatStringHandler::~FormatStringHandler() {}
29
30//===----------------------------------------------------------------------===//
31// Functions for parsing format strings components in both printf and
32// scanf format strings.
33//===----------------------------------------------------------------------===//
34
35OptionalAmount
36clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
37 const char *I = Beg;
38 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
39
40 unsigned accumulator = 0;
41 bool hasDigits = false;
42
43 for ( ; I != E; ++I) {
44 char c = *I;
45 if (c >= '0' && c <= '9') {
46 hasDigits = true;
47 accumulator = (accumulator * 10) + (c - '0');
48 continue;
49 }
50
51 if (hasDigits)
52 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
53 false);
54
55 break;
56 }
57
58 return OptionalAmount();
59}
60
61OptionalAmount
62clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
63 const char *E,
64 unsigned &argIndex) {
65 if (*Beg == '*') {
66 ++Beg;
67 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
68 }
69
70 return ParseAmount(Beg, E);
71}
72
73OptionalAmount
74clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
75 const char *Start,
76 const char *&Beg,
77 const char *E,
78 PositionContext p) {
79 if (*Beg == '*') {
80 const char *I = Beg + 1;
81 const OptionalAmount &Amt = ParseAmount(I, E);
82
83 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
84 H.HandleInvalidPosition(Beg, I - Beg, p);
85 return OptionalAmount(false);
86 }
87
88 if (I == E) {
89 // No more characters left?
90 H.HandleIncompleteSpecifier(Start, E - Start);
91 return OptionalAmount(false);
92 }
93
94 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
95
96 if (*I == '$') {
97 // Handle positional arguments
98
99 // Special case: '*0$', since this is an easy mistake.
100 if (Amt.getConstantAmount() == 0) {
101 H.HandleZeroPosition(Beg, I - Beg + 1);
102 return OptionalAmount(false);
103 }
104
105 const char *Tmp = Beg;
106 Beg = ++I;
107
108 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
109 Tmp, 0, true);
110 }
111
112 H.HandleInvalidPosition(Beg, I - Beg, p);
113 return OptionalAmount(false);
114 }
115
116 return ParseAmount(Beg, E);
117}
118
119
120bool
121clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
122 FormatSpecifier &CS,
123 const char *Start,
124 const char *&Beg, const char *E,
125 unsigned *argIndex) {
126 // FIXME: Support negative field widths.
127 if (argIndex) {
128 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
129 }
130 else {
131 const OptionalAmount Amt =
132 ParsePositionAmount(H, Start, Beg, E,
133 analyze_format_string::FieldWidthPos);
134
135 if (Amt.isInvalid())
136 return true;
137 CS.setFieldWidth(Amt);
138 }
139 return false;
140}
141
142bool
143clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
144 FormatSpecifier &FS,
145 const char *Start,
146 const char *&Beg,
147 const char *E) {
148 const char *I = Beg;
149
150 const OptionalAmount &Amt = ParseAmount(I, E);
151
152 if (I == E) {
153 // No more characters left?
154 H.HandleIncompleteSpecifier(Start, E - Start);
155 return true;
156 }
157
158 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
159 // Special case: '%0$', since this is an easy mistake.
160 if (Amt.getConstantAmount() == 0) {
161 H.HandleZeroPosition(Start, I - Start);
162 return true;
163 }
164
165 FS.setArgIndex(Amt.getConstantAmount() - 1);
166 FS.setUsesPositionalArg();
167 // Update the caller's pointer if we decided to consume
168 // these characters.
169 Beg = I;
170 return false;
171 }
172
173 return false;
174}
175
176bool
177clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
178 const char *&I,
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000179 const char *E,
180 const LangOptions &LO,
181 bool IsScanf) {
Ted Kremenek826a3452010-07-16 02:11:22 +0000182 LengthModifier::Kind lmKind = LengthModifier::None;
183 const char *lmPosition = I;
184 switch (*I) {
185 default:
186 return false;
187 case 'h':
188 ++I;
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000189 lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar)
190 : LengthModifier::AsShort;
Ted Kremenek826a3452010-07-16 02:11:22 +0000191 break;
192 case 'l':
193 ++I;
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000194 lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong)
195 : LengthModifier::AsLong;
Ted Kremenek826a3452010-07-16 02:11:22 +0000196 break;
197 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break;
198 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break;
199 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break;
200 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
201 case 'q': lmKind = LengthModifier::AsLongLong; ++I; break;
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000202 case 'a':
203 if (IsScanf && !LO.C99 && !LO.CPlusPlus) {
204 // For scanf in C90, look at the next character to see if this should
205 // be parsed as the GNU extension 'a' length modifier. If not, this
206 // will be parsed as a conversion specifier.
207 ++I;
208 if (I != E && (*I == 's' || *I == 'S' || *I == '[')) {
209 lmKind = LengthModifier::AsAllocate;
210 break;
211 }
212 --I;
213 }
214 return false;
Ted Kremenek826a3452010-07-16 02:11:22 +0000215 }
216 LengthModifier lm(lmPosition, lmKind);
217 FS.setLengthModifier(lm);
218 return true;
219}
220
221//===----------------------------------------------------------------------===//
222// Methods on ArgTypeResult.
223//===----------------------------------------------------------------------===//
224
225bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
226 switch (K) {
227 case InvalidTy:
David Blaikieb219cfc2011-09-23 05:06:16 +0000228 llvm_unreachable("ArgTypeResult must be valid");
Ted Kremenek826a3452010-07-16 02:11:22 +0000229
230 case UnknownTy:
231 return true;
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000232
233 case AnyCharTy: {
234 if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
235 switch (BT->getKind()) {
236 default:
237 break;
238 case BuiltinType::Char_S:
239 case BuiltinType::SChar:
240 case BuiltinType::UChar:
241 case BuiltinType::Char_U:
242 return true;
243 }
244 return false;
245 }
246
Ted Kremenek826a3452010-07-16 02:11:22 +0000247 case SpecificTy: {
248 argTy = C.getCanonicalType(argTy).getUnqualifiedType();
Nick Lewycky687b5df2011-12-02 23:21:43 +0000249 if (T == argTy)
Ted Kremenek826a3452010-07-16 02:11:22 +0000250 return true;
Ted Kremenekdc00d812011-07-13 17:35:14 +0000251 // Check for "compatible types".
Ted Kremenek1ad35be2011-07-14 17:05:32 +0000252 if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
Ted Kremenek826a3452010-07-16 02:11:22 +0000253 switch (BT->getKind()) {
254 default:
255 break;
256 case BuiltinType::Char_S:
257 case BuiltinType::SChar:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000258 return T == C.UnsignedCharTy;
Ted Kremenekdc00d812011-07-13 17:35:14 +0000259 case BuiltinType::Char_U:
Ted Kremenek1ad35be2011-07-14 17:05:32 +0000260 case BuiltinType::UChar:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000261 return T == C.SignedCharTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000262 case BuiltinType::Short:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000263 return T == C.UnsignedShortTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000264 case BuiltinType::UShort:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000265 return T == C.ShortTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000266 case BuiltinType::Int:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000267 return T == C.UnsignedIntTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000268 case BuiltinType::UInt:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000269 return T == C.IntTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000270 case BuiltinType::Long:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000271 return T == C.UnsignedLongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000272 case BuiltinType::ULong:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000273 return T == C.LongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000274 case BuiltinType::LongLong:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000275 return T == C.UnsignedLongLongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000276 case BuiltinType::ULongLong:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000277 return T == C.LongLongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000278 }
279 return false;
280 }
281
282 case CStrTy: {
283 const PointerType *PT = argTy->getAs<PointerType>();
284 if (!PT)
285 return false;
286 QualType pointeeTy = PT->getPointeeType();
287 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
288 switch (BT->getKind()) {
289 case BuiltinType::Void:
290 case BuiltinType::Char_U:
291 case BuiltinType::UChar:
292 case BuiltinType::Char_S:
293 case BuiltinType::SChar:
294 return true;
295 default:
296 break;
297 }
298
299 return false;
300 }
301
302 case WCStrTy: {
303 const PointerType *PT = argTy->getAs<PointerType>();
304 if (!PT)
305 return false;
306 QualType pointeeTy =
307 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
308 return pointeeTy == C.getWCharType();
309 }
Ted Kremenek9325eaf2010-08-24 22:24:51 +0000310
311 case WIntTy: {
312 // Instead of doing a lookup for the definition of 'wint_t' (which
313 // is defined by the system headers) instead see if wchar_t and
314 // the argument type promote to the same type.
315 QualType PromoWChar =
316 C.getWCharType()->isPromotableIntegerType()
317 ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType();
318 QualType PromoArg =
319 argTy->isPromotableIntegerType()
320 ? C.getPromotedIntegerType(argTy) : argTy;
321
322 PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType();
323 PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType();
324
325 return PromoWChar == PromoArg;
326 }
Ted Kremenek826a3452010-07-16 02:11:22 +0000327
328 case CPointerTy:
Anders Carlsson62425992010-11-06 14:58:53 +0000329 return argTy->isPointerType() || argTy->isObjCObjectPointerType() ||
330 argTy->isNullPtrType();
Ted Kremenek826a3452010-07-16 02:11:22 +0000331
332 case ObjCPointerTy:
Daniel Dunbard6a4d182011-06-28 23:33:55 +0000333 return argTy->getAs<ObjCObjectPointerType>() != NULL;
Ted Kremenek826a3452010-07-16 02:11:22 +0000334 }
335
336 // FIXME: Should be unreachable, but Clang is currently emitting
337 // a warning.
338 return false;
339}
340
341QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
342 switch (K) {
343 case InvalidTy:
David Blaikieb219cfc2011-09-23 05:06:16 +0000344 llvm_unreachable("No representative type for Invalid ArgTypeResult");
Ted Kremenek826a3452010-07-16 02:11:22 +0000345 case UnknownTy:
346 return QualType();
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000347 case AnyCharTy:
348 return C.CharTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000349 case SpecificTy:
350 return T;
351 case CStrTy:
352 return C.getPointerType(C.CharTy);
353 case WCStrTy:
354 return C.getPointerType(C.getWCharType());
355 case ObjCPointerTy:
356 return C.ObjCBuiltinIdTy;
357 case CPointerTy:
358 return C.VoidPtrTy;
Ted Kremenek9325eaf2010-08-24 22:24:51 +0000359 case WIntTy: {
360 QualType WC = C.getWCharType();
361 return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC;
362 }
Ted Kremenek826a3452010-07-16 02:11:22 +0000363 }
364
365 // FIXME: Should be unreachable, but Clang is currently emitting
366 // a warning.
367 return QualType();
368}
369
Hans Wennborga792aff2011-12-07 10:33:11 +0000370std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const {
Hans Wennborgf4f0c602011-12-09 12:22:12 +0000371 std::string S = getRepresentativeType(C).getAsString();
372 if (Name)
373 return std::string("'") + Name + "' (aka '" + S + "')";
374 return std::string("'") + S + "'";
Hans Wennborga792aff2011-12-07 10:33:11 +0000375}
376
377
Ted Kremenek826a3452010-07-16 02:11:22 +0000378//===----------------------------------------------------------------------===//
379// Methods on OptionalAmount.
380//===----------------------------------------------------------------------===//
381
382ArgTypeResult
383analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
384 return Ctx.IntTy;
385}
386
387//===----------------------------------------------------------------------===//
388// Methods on LengthModifier.
389//===----------------------------------------------------------------------===//
390
391const char *
392analyze_format_string::LengthModifier::toString() const {
393 switch (kind) {
394 case AsChar:
395 return "hh";
396 case AsShort:
397 return "h";
398 case AsLong: // or AsWideChar
399 return "l";
400 case AsLongLong:
401 return "ll";
402 case AsIntMax:
403 return "j";
404 case AsSizeT:
405 return "z";
406 case AsPtrDiff:
407 return "t";
408 case AsLongDouble:
409 return "L";
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000410 case AsAllocate:
411 return "a";
Ted Kremenek826a3452010-07-16 02:11:22 +0000412 case None:
413 return "";
414 }
415 return NULL;
416}
417
418//===----------------------------------------------------------------------===//
Hans Wennborgb8ec3e32011-12-09 11:11:07 +0000419// Methods on ConversionSpecifier.
420//===----------------------------------------------------------------------===//
421
422const char *ConversionSpecifier::toString() const {
423 switch (kind) {
424 case dArg: return "d";
425 case iArg: return "i";
426 case oArg: return "o";
427 case uArg: return "u";
428 case xArg: return "x";
429 case XArg: return "X";
430 case fArg: return "f";
431 case FArg: return "F";
432 case eArg: return "e";
433 case EArg: return "E";
434 case gArg: return "g";
435 case GArg: return "G";
436 case aArg: return "a";
437 case AArg: return "A";
438 case cArg: return "c";
439 case sArg: return "s";
440 case pArg: return "p";
441 case nArg: return "n";
442 case PercentArg: return "%";
443 case ScanListArg: return "[";
444 case InvalidSpecifier: return NULL;
445
446 // MacOS X unicode extensions.
447 case CArg: return "C";
448 case SArg: return "S";
449
450 // Objective-C specific specifiers.
451 case ObjCObjArg: return "@";
452
453 // GlibC specific specifiers.
454 case PrintErrno: return "m";
455 }
456 return NULL;
457}
458
459//===----------------------------------------------------------------------===//
Ted Kremenek826a3452010-07-16 02:11:22 +0000460// Methods on OptionalAmount.
461//===----------------------------------------------------------------------===//
462
Chris Lattner5f9e2722011-07-23 10:55:15 +0000463void OptionalAmount::toString(raw_ostream &os) const {
Ted Kremenek826a3452010-07-16 02:11:22 +0000464 switch (hs) {
465 case Invalid:
466 case NotSpecified:
467 return;
468 case Arg:
469 if (UsesDotPrefix)
470 os << ".";
471 if (usesPositionalArg())
472 os << "*" << getPositionalArgIndex() << "$";
473 else
474 os << "*";
475 break;
476 case Constant:
477 if (UsesDotPrefix)
478 os << ".";
479 os << amt;
480 break;
481 }
482}
483
Ted Kremeneka412a492010-07-20 20:04:42 +0000484bool FormatSpecifier::hasValidLengthModifier() const {
485 switch (LM.getKind()) {
486 case LengthModifier::None:
487 return true;
488
489 // Handle most integer flags
490 case LengthModifier::AsChar:
491 case LengthModifier::AsShort:
492 case LengthModifier::AsLongLong:
493 case LengthModifier::AsIntMax:
494 case LengthModifier::AsSizeT:
495 case LengthModifier::AsPtrDiff:
496 switch (CS.getKind()) {
497 case ConversionSpecifier::dArg:
498 case ConversionSpecifier::iArg:
499 case ConversionSpecifier::oArg:
500 case ConversionSpecifier::uArg:
501 case ConversionSpecifier::xArg:
502 case ConversionSpecifier::XArg:
503 case ConversionSpecifier::nArg:
504 return true;
505 default:
506 return false;
507 }
508
509 // Handle 'l' flag
510 case LengthModifier::AsLong:
511 switch (CS.getKind()) {
512 case ConversionSpecifier::dArg:
513 case ConversionSpecifier::iArg:
514 case ConversionSpecifier::oArg:
515 case ConversionSpecifier::uArg:
516 case ConversionSpecifier::xArg:
517 case ConversionSpecifier::XArg:
518 case ConversionSpecifier::aArg:
519 case ConversionSpecifier::AArg:
520 case ConversionSpecifier::fArg:
521 case ConversionSpecifier::FArg:
522 case ConversionSpecifier::eArg:
523 case ConversionSpecifier::EArg:
524 case ConversionSpecifier::gArg:
525 case ConversionSpecifier::GArg:
526 case ConversionSpecifier::nArg:
527 case ConversionSpecifier::cArg:
528 case ConversionSpecifier::sArg:
529 return true;
530 default:
531 return false;
532 }
533
534 case LengthModifier::AsLongDouble:
535 switch (CS.getKind()) {
536 case ConversionSpecifier::aArg:
537 case ConversionSpecifier::AArg:
538 case ConversionSpecifier::fArg:
539 case ConversionSpecifier::FArg:
540 case ConversionSpecifier::eArg:
541 case ConversionSpecifier::EArg:
542 case ConversionSpecifier::gArg:
543 case ConversionSpecifier::GArg:
544 return true;
545 default:
546 return false;
547 }
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000548
549 case LengthModifier::AsAllocate:
550 switch (CS.getKind()) {
551 case ConversionSpecifier::sArg:
552 case ConversionSpecifier::SArg:
553 return true;
554 default:
555 return false;
556 }
Ted Kremeneka412a492010-07-20 20:04:42 +0000557 }
558 return false;
559}