blob: dcab03c3c090cd93e1f3b86c111394b4ddff76c2 [file] [log] [blame]
Ted Kremenek826a3452010-07-16 02:11:22 +00001// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Shared details for processing format strings of printf and scanf
11// (and friends).
12//
13//===----------------------------------------------------------------------===//
14
15#include "FormatStringParsing.h"
Hans Wennborgd02deeb2011-12-15 10:25:47 +000016#include "clang/Basic/LangOptions.h"
Ted Kremenek826a3452010-07-16 02:11:22 +000017
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::FormatSpecifier;
21using clang::analyze_format_string::LengthModifier;
22using clang::analyze_format_string::OptionalAmount;
23using clang::analyze_format_string::PositionContext;
Ted Kremeneka412a492010-07-20 20:04:42 +000024using clang::analyze_format_string::ConversionSpecifier;
Ted Kremenek826a3452010-07-16 02:11:22 +000025using namespace clang;
26
27// Key function to FormatStringHandler.
28FormatStringHandler::~FormatStringHandler() {}
29
30//===----------------------------------------------------------------------===//
31// Functions for parsing format strings components in both printf and
32// scanf format strings.
33//===----------------------------------------------------------------------===//
34
35OptionalAmount
36clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
37 const char *I = Beg;
38 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
39
40 unsigned accumulator = 0;
41 bool hasDigits = false;
42
43 for ( ; I != E; ++I) {
44 char c = *I;
45 if (c >= '0' && c <= '9') {
46 hasDigits = true;
47 accumulator = (accumulator * 10) + (c - '0');
48 continue;
49 }
50
51 if (hasDigits)
52 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
53 false);
54
55 break;
56 }
57
58 return OptionalAmount();
59}
60
61OptionalAmount
62clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
63 const char *E,
64 unsigned &argIndex) {
65 if (*Beg == '*') {
66 ++Beg;
67 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
68 }
69
70 return ParseAmount(Beg, E);
71}
72
73OptionalAmount
74clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
75 const char *Start,
76 const char *&Beg,
77 const char *E,
78 PositionContext p) {
79 if (*Beg == '*') {
80 const char *I = Beg + 1;
81 const OptionalAmount &Amt = ParseAmount(I, E);
82
83 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
84 H.HandleInvalidPosition(Beg, I - Beg, p);
85 return OptionalAmount(false);
86 }
87
88 if (I == E) {
89 // No more characters left?
90 H.HandleIncompleteSpecifier(Start, E - Start);
91 return OptionalAmount(false);
92 }
93
94 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
95
96 if (*I == '$') {
97 // Handle positional arguments
98
99 // Special case: '*0$', since this is an easy mistake.
100 if (Amt.getConstantAmount() == 0) {
101 H.HandleZeroPosition(Beg, I - Beg + 1);
102 return OptionalAmount(false);
103 }
104
105 const char *Tmp = Beg;
106 Beg = ++I;
107
108 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
109 Tmp, 0, true);
110 }
111
112 H.HandleInvalidPosition(Beg, I - Beg, p);
113 return OptionalAmount(false);
114 }
115
116 return ParseAmount(Beg, E);
117}
118
119
120bool
121clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
122 FormatSpecifier &CS,
123 const char *Start,
124 const char *&Beg, const char *E,
125 unsigned *argIndex) {
126 // FIXME: Support negative field widths.
127 if (argIndex) {
128 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
129 }
130 else {
131 const OptionalAmount Amt =
132 ParsePositionAmount(H, Start, Beg, E,
133 analyze_format_string::FieldWidthPos);
134
135 if (Amt.isInvalid())
136 return true;
137 CS.setFieldWidth(Amt);
138 }
139 return false;
140}
141
142bool
143clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
144 FormatSpecifier &FS,
145 const char *Start,
146 const char *&Beg,
147 const char *E) {
148 const char *I = Beg;
149
150 const OptionalAmount &Amt = ParseAmount(I, E);
151
152 if (I == E) {
153 // No more characters left?
154 H.HandleIncompleteSpecifier(Start, E - Start);
155 return true;
156 }
157
158 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
159 // Special case: '%0$', since this is an easy mistake.
160 if (Amt.getConstantAmount() == 0) {
161 H.HandleZeroPosition(Start, I - Start);
162 return true;
163 }
164
165 FS.setArgIndex(Amt.getConstantAmount() - 1);
166 FS.setUsesPositionalArg();
167 // Update the caller's pointer if we decided to consume
168 // these characters.
169 Beg = I;
170 return false;
171 }
172
173 return false;
174}
175
176bool
177clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
178 const char *&I,
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000179 const char *E,
180 const LangOptions &LO,
181 bool IsScanf) {
Ted Kremenek826a3452010-07-16 02:11:22 +0000182 LengthModifier::Kind lmKind = LengthModifier::None;
183 const char *lmPosition = I;
184 switch (*I) {
185 default:
186 return false;
187 case 'h':
188 ++I;
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000189 lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar)
190 : LengthModifier::AsShort;
Ted Kremenek826a3452010-07-16 02:11:22 +0000191 break;
192 case 'l':
193 ++I;
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000194 lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong)
195 : LengthModifier::AsLong;
Ted Kremenek826a3452010-07-16 02:11:22 +0000196 break;
197 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break;
198 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break;
199 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break;
200 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
201 case 'q': lmKind = LengthModifier::AsLongLong; ++I; break;
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000202 case 'a':
Hans Wennborg5294c792011-12-28 13:10:50 +0000203 if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) {
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000204 // For scanf in C90, look at the next character to see if this should
205 // be parsed as the GNU extension 'a' length modifier. If not, this
206 // will be parsed as a conversion specifier.
207 ++I;
208 if (I != E && (*I == 's' || *I == 'S' || *I == '[')) {
209 lmKind = LengthModifier::AsAllocate;
210 break;
211 }
212 --I;
213 }
214 return false;
Hans Wennborg37969b72012-01-12 17:11:12 +0000215 case 'm':
216 if (IsScanf) {
217 lmKind = LengthModifier::AsMAllocate;
218 ++I;
219 break;
220 }
221 return false;
Ted Kremenek826a3452010-07-16 02:11:22 +0000222 }
223 LengthModifier lm(lmPosition, lmKind);
224 FS.setLengthModifier(lm);
225 return true;
226}
227
228//===----------------------------------------------------------------------===//
229// Methods on ArgTypeResult.
230//===----------------------------------------------------------------------===//
231
232bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
233 switch (K) {
234 case InvalidTy:
David Blaikieb219cfc2011-09-23 05:06:16 +0000235 llvm_unreachable("ArgTypeResult must be valid");
Ted Kremenek826a3452010-07-16 02:11:22 +0000236
237 case UnknownTy:
238 return true;
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000239
240 case AnyCharTy: {
241 if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
242 switch (BT->getKind()) {
243 default:
244 break;
245 case BuiltinType::Char_S:
246 case BuiltinType::SChar:
247 case BuiltinType::UChar:
248 case BuiltinType::Char_U:
249 return true;
250 }
251 return false;
252 }
253
Ted Kremenek826a3452010-07-16 02:11:22 +0000254 case SpecificTy: {
255 argTy = C.getCanonicalType(argTy).getUnqualifiedType();
Nick Lewycky687b5df2011-12-02 23:21:43 +0000256 if (T == argTy)
Ted Kremenek826a3452010-07-16 02:11:22 +0000257 return true;
Ted Kremenekdc00d812011-07-13 17:35:14 +0000258 // Check for "compatible types".
Ted Kremenek1ad35be2011-07-14 17:05:32 +0000259 if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
Ted Kremenek826a3452010-07-16 02:11:22 +0000260 switch (BT->getKind()) {
261 default:
262 break;
263 case BuiltinType::Char_S:
264 case BuiltinType::SChar:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000265 return T == C.UnsignedCharTy;
Ted Kremenekdc00d812011-07-13 17:35:14 +0000266 case BuiltinType::Char_U:
Ted Kremenek1ad35be2011-07-14 17:05:32 +0000267 case BuiltinType::UChar:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000268 return T == C.SignedCharTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000269 case BuiltinType::Short:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000270 return T == C.UnsignedShortTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000271 case BuiltinType::UShort:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000272 return T == C.ShortTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000273 case BuiltinType::Int:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000274 return T == C.UnsignedIntTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000275 case BuiltinType::UInt:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000276 return T == C.IntTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000277 case BuiltinType::Long:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000278 return T == C.UnsignedLongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000279 case BuiltinType::ULong:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000280 return T == C.LongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000281 case BuiltinType::LongLong:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000282 return T == C.UnsignedLongLongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000283 case BuiltinType::ULongLong:
Nick Lewycky687b5df2011-12-02 23:21:43 +0000284 return T == C.LongLongTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000285 }
286 return false;
287 }
288
289 case CStrTy: {
290 const PointerType *PT = argTy->getAs<PointerType>();
291 if (!PT)
292 return false;
293 QualType pointeeTy = PT->getPointeeType();
294 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
295 switch (BT->getKind()) {
296 case BuiltinType::Void:
297 case BuiltinType::Char_U:
298 case BuiltinType::UChar:
299 case BuiltinType::Char_S:
300 case BuiltinType::SChar:
301 return true;
302 default:
303 break;
304 }
305
306 return false;
307 }
308
309 case WCStrTy: {
310 const PointerType *PT = argTy->getAs<PointerType>();
311 if (!PT)
312 return false;
313 QualType pointeeTy =
314 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
315 return pointeeTy == C.getWCharType();
316 }
Ted Kremenek9325eaf2010-08-24 22:24:51 +0000317
318 case WIntTy: {
319 // Instead of doing a lookup for the definition of 'wint_t' (which
320 // is defined by the system headers) instead see if wchar_t and
321 // the argument type promote to the same type.
322 QualType PromoWChar =
323 C.getWCharType()->isPromotableIntegerType()
324 ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType();
325 QualType PromoArg =
326 argTy->isPromotableIntegerType()
327 ? C.getPromotedIntegerType(argTy) : argTy;
328
329 PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType();
330 PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType();
331
332 return PromoWChar == PromoArg;
333 }
Ted Kremenek826a3452010-07-16 02:11:22 +0000334
335 case CPointerTy:
Anders Carlsson62425992010-11-06 14:58:53 +0000336 return argTy->isPointerType() || argTy->isObjCObjectPointerType() ||
337 argTy->isNullPtrType();
Ted Kremenek826a3452010-07-16 02:11:22 +0000338
339 case ObjCPointerTy:
Daniel Dunbard6a4d182011-06-28 23:33:55 +0000340 return argTy->getAs<ObjCObjectPointerType>() != NULL;
Ted Kremenek826a3452010-07-16 02:11:22 +0000341 }
342
343 // FIXME: Should be unreachable, but Clang is currently emitting
344 // a warning.
345 return false;
346}
347
348QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
349 switch (K) {
350 case InvalidTy:
David Blaikieb219cfc2011-09-23 05:06:16 +0000351 llvm_unreachable("No representative type for Invalid ArgTypeResult");
Ted Kremenek826a3452010-07-16 02:11:22 +0000352 case UnknownTy:
353 return QualType();
Ted Kremenek6ca4a9a2011-10-25 04:20:41 +0000354 case AnyCharTy:
355 return C.CharTy;
Ted Kremenek826a3452010-07-16 02:11:22 +0000356 case SpecificTy:
357 return T;
358 case CStrTy:
359 return C.getPointerType(C.CharTy);
360 case WCStrTy:
361 return C.getPointerType(C.getWCharType());
362 case ObjCPointerTy:
363 return C.ObjCBuiltinIdTy;
364 case CPointerTy:
365 return C.VoidPtrTy;
Ted Kremenek9325eaf2010-08-24 22:24:51 +0000366 case WIntTy: {
367 QualType WC = C.getWCharType();
368 return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC;
369 }
Ted Kremenek826a3452010-07-16 02:11:22 +0000370 }
371
372 // FIXME: Should be unreachable, but Clang is currently emitting
373 // a warning.
374 return QualType();
375}
376
Hans Wennborga792aff2011-12-07 10:33:11 +0000377std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const {
Hans Wennborgf4f0c602011-12-09 12:22:12 +0000378 std::string S = getRepresentativeType(C).getAsString();
379 if (Name)
380 return std::string("'") + Name + "' (aka '" + S + "')";
381 return std::string("'") + S + "'";
Hans Wennborga792aff2011-12-07 10:33:11 +0000382}
383
384
Ted Kremenek826a3452010-07-16 02:11:22 +0000385//===----------------------------------------------------------------------===//
386// Methods on OptionalAmount.
387//===----------------------------------------------------------------------===//
388
389ArgTypeResult
390analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
391 return Ctx.IntTy;
392}
393
394//===----------------------------------------------------------------------===//
395// Methods on LengthModifier.
396//===----------------------------------------------------------------------===//
397
398const char *
399analyze_format_string::LengthModifier::toString() const {
400 switch (kind) {
401 case AsChar:
402 return "hh";
403 case AsShort:
404 return "h";
405 case AsLong: // or AsWideChar
406 return "l";
407 case AsLongLong:
408 return "ll";
409 case AsIntMax:
410 return "j";
411 case AsSizeT:
412 return "z";
413 case AsPtrDiff:
414 return "t";
415 case AsLongDouble:
416 return "L";
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000417 case AsAllocate:
418 return "a";
Hans Wennborg37969b72012-01-12 17:11:12 +0000419 case AsMAllocate:
420 return "m";
Ted Kremenek826a3452010-07-16 02:11:22 +0000421 case None:
422 return "";
423 }
424 return NULL;
425}
426
427//===----------------------------------------------------------------------===//
Hans Wennborgb8ec3e32011-12-09 11:11:07 +0000428// Methods on ConversionSpecifier.
429//===----------------------------------------------------------------------===//
430
431const char *ConversionSpecifier::toString() const {
432 switch (kind) {
433 case dArg: return "d";
434 case iArg: return "i";
435 case oArg: return "o";
436 case uArg: return "u";
437 case xArg: return "x";
438 case XArg: return "X";
439 case fArg: return "f";
440 case FArg: return "F";
441 case eArg: return "e";
442 case EArg: return "E";
443 case gArg: return "g";
444 case GArg: return "G";
445 case aArg: return "a";
446 case AArg: return "A";
447 case cArg: return "c";
448 case sArg: return "s";
449 case pArg: return "p";
450 case nArg: return "n";
451 case PercentArg: return "%";
452 case ScanListArg: return "[";
453 case InvalidSpecifier: return NULL;
454
455 // MacOS X unicode extensions.
456 case CArg: return "C";
457 case SArg: return "S";
458
459 // Objective-C specific specifiers.
460 case ObjCObjArg: return "@";
461
462 // GlibC specific specifiers.
463 case PrintErrno: return "m";
464 }
465 return NULL;
466}
467
468//===----------------------------------------------------------------------===//
Ted Kremenek826a3452010-07-16 02:11:22 +0000469// Methods on OptionalAmount.
470//===----------------------------------------------------------------------===//
471
Chris Lattner5f9e2722011-07-23 10:55:15 +0000472void OptionalAmount::toString(raw_ostream &os) const {
Ted Kremenek826a3452010-07-16 02:11:22 +0000473 switch (hs) {
474 case Invalid:
475 case NotSpecified:
476 return;
477 case Arg:
478 if (UsesDotPrefix)
479 os << ".";
480 if (usesPositionalArg())
481 os << "*" << getPositionalArgIndex() << "$";
482 else
483 os << "*";
484 break;
485 case Constant:
486 if (UsesDotPrefix)
487 os << ".";
488 os << amt;
489 break;
490 }
491}
492
Ted Kremeneka412a492010-07-20 20:04:42 +0000493bool FormatSpecifier::hasValidLengthModifier() const {
494 switch (LM.getKind()) {
495 case LengthModifier::None:
496 return true;
497
498 // Handle most integer flags
499 case LengthModifier::AsChar:
500 case LengthModifier::AsShort:
501 case LengthModifier::AsLongLong:
502 case LengthModifier::AsIntMax:
503 case LengthModifier::AsSizeT:
504 case LengthModifier::AsPtrDiff:
505 switch (CS.getKind()) {
506 case ConversionSpecifier::dArg:
507 case ConversionSpecifier::iArg:
508 case ConversionSpecifier::oArg:
509 case ConversionSpecifier::uArg:
510 case ConversionSpecifier::xArg:
511 case ConversionSpecifier::XArg:
512 case ConversionSpecifier::nArg:
513 return true;
514 default:
515 return false;
516 }
517
518 // Handle 'l' flag
519 case LengthModifier::AsLong:
520 switch (CS.getKind()) {
521 case ConversionSpecifier::dArg:
522 case ConversionSpecifier::iArg:
523 case ConversionSpecifier::oArg:
524 case ConversionSpecifier::uArg:
525 case ConversionSpecifier::xArg:
526 case ConversionSpecifier::XArg:
527 case ConversionSpecifier::aArg:
528 case ConversionSpecifier::AArg:
529 case ConversionSpecifier::fArg:
530 case ConversionSpecifier::FArg:
531 case ConversionSpecifier::eArg:
532 case ConversionSpecifier::EArg:
533 case ConversionSpecifier::gArg:
534 case ConversionSpecifier::GArg:
535 case ConversionSpecifier::nArg:
536 case ConversionSpecifier::cArg:
537 case ConversionSpecifier::sArg:
538 return true;
539 default:
540 return false;
541 }
542
543 case LengthModifier::AsLongDouble:
544 switch (CS.getKind()) {
545 case ConversionSpecifier::aArg:
546 case ConversionSpecifier::AArg:
547 case ConversionSpecifier::fArg:
548 case ConversionSpecifier::FArg:
549 case ConversionSpecifier::eArg:
550 case ConversionSpecifier::EArg:
551 case ConversionSpecifier::gArg:
552 case ConversionSpecifier::GArg:
553 return true;
554 default:
555 return false;
556 }
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000557
558 case LengthModifier::AsAllocate:
559 switch (CS.getKind()) {
560 case ConversionSpecifier::sArg:
561 case ConversionSpecifier::SArg:
Hans Wennborg28058d12012-01-12 15:07:16 +0000562 case ConversionSpecifier::ScanListArg:
Hans Wennborgd02deeb2011-12-15 10:25:47 +0000563 return true;
564 default:
565 return false;
566 }
Hans Wennborg37969b72012-01-12 17:11:12 +0000567
568 case LengthModifier::AsMAllocate:
569 switch (CS.getKind()) {
570 case ConversionSpecifier::cArg:
571 case ConversionSpecifier::CArg:
572 case ConversionSpecifier::sArg:
573 case ConversionSpecifier::SArg:
574 case ConversionSpecifier::ScanListArg:
575 return true;
576 default:
577 return false;
578 }
Ted Kremeneka412a492010-07-20 20:04:42 +0000579 }
580 return false;
581}