blob: 55abd1077150634c1880fbd4c1dabfc86b2067e0 [file] [log] [blame]
Ted Kremeneka2e77b42010-01-27 23:43:25 +00001//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends. The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
Ted Kremenek79db7b72010-01-29 22:59:32 +000016#include "clang/AST/ASTContext.h"
Ted Kremeneka2e77b42010-01-27 23:43:25 +000017
Ted Kremenek176f7d62010-01-29 02:13:53 +000018using clang::analyze_printf::FormatSpecifier;
19using clang::analyze_printf::OptionalAmount;
Ted Kremenek79db7b72010-01-29 22:59:32 +000020using clang::analyze_printf::ArgTypeResult;
Ted Kremenek1de17072010-02-04 20:46:58 +000021using clang::analyze_printf::FormatStringHandler;
Ted Kremenekc22f78d2010-01-29 03:16:21 +000022using namespace clang;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000023
24namespace {
25class FormatSpecifierResult {
26 FormatSpecifier FS;
27 const char *Start;
Ted Kremenek94af5752010-01-29 02:40:24 +000028 bool Stop;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000029public:
Ted Kremenek94af5752010-01-29 02:40:24 +000030 FormatSpecifierResult(bool stop = false)
31 : Start(0), Stop(stop) {}
Ted Kremeneka2e77b42010-01-27 23:43:25 +000032 FormatSpecifierResult(const char *start,
Ted Kremenek08ad1cc2010-01-28 02:02:59 +000033 const FormatSpecifier &fs)
Ted Kremenek94af5752010-01-29 02:40:24 +000034 : FS(fs), Start(start), Stop(false) {}
Ted Kremeneka2e77b42010-01-27 23:43:25 +000035
Ted Kremenekc8b188d2010-02-16 01:46:59 +000036
Ted Kremeneka2e77b42010-01-27 23:43:25 +000037 const char *getStart() const { return Start; }
Ted Kremenek94af5752010-01-29 02:40:24 +000038 bool shouldStop() const { return Stop; }
Ted Kremeneka2e77b42010-01-27 23:43:25 +000039 bool hasValue() const { return Start != 0; }
40 const FormatSpecifier &getValue() const {
41 assert(hasValue());
42 return FS;
43 }
Ted Kremenek08ad1cc2010-01-28 02:02:59 +000044 const FormatSpecifier &getValue() { return FS; }
Ted Kremeneka2e77b42010-01-27 23:43:25 +000045};
46} // end anonymous namespace
47
48template <typename T>
49class UpdateOnReturn {
50 T &ValueToUpdate;
51 const T &ValueToCopy;
52public:
53 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
54 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
Ted Kremenekc8b188d2010-02-16 01:46:59 +000055
Ted Kremeneka2e77b42010-01-27 23:43:25 +000056 ~UpdateOnReturn() {
57 ValueToUpdate = ValueToCopy;
58 }
Ted Kremenekc8b188d2010-02-16 01:46:59 +000059};
60
61//===----------------------------------------------------------------------===//
62// Methods for parsing format strings.
63//===----------------------------------------------------------------------===//
Ted Kremeneka2e77b42010-01-27 23:43:25 +000064
65static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
66 const char *I = Beg;
67 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
Ted Kremenekc8b188d2010-02-16 01:46:59 +000068
Ted Kremeneka2e77b42010-01-27 23:43:25 +000069 bool foundDigits = false;
70 unsigned accumulator = 0;
71
72 for ( ; I != E; ++I) {
73 char c = *I;
74 if (c >= '0' && c <= '9') {
75 foundDigits = true;
76 accumulator += (accumulator * 10) + (c - '0');
77 continue;
78 }
79
80 if (foundDigits)
Ted Kremenek5739de72010-01-29 01:06:55 +000081 return OptionalAmount(accumulator, Beg);
Ted Kremenekc8b188d2010-02-16 01:46:59 +000082
Ted Kremenek5739de72010-01-29 01:06:55 +000083 if (c == '*') {
84 ++I;
85 return OptionalAmount(OptionalAmount::Arg, Beg);
86 }
Ted Kremenekc8b188d2010-02-16 01:46:59 +000087
Ted Kremeneka2e77b42010-01-27 23:43:25 +000088 break;
89 }
Ted Kremenekc8b188d2010-02-16 01:46:59 +000090
91 return OptionalAmount();
Ted Kremeneka2e77b42010-01-27 23:43:25 +000092}
93
Ted Kremenekc22f78d2010-01-29 03:16:21 +000094static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
Ted Kremenek1de17072010-02-04 20:46:58 +000095 const char *&Beg,
96 const char *E) {
Ted Kremenekc8b188d2010-02-16 01:46:59 +000097
Ted Kremenek176f7d62010-01-29 02:13:53 +000098 using namespace clang::analyze_printf;
Ted Kremenekc8b188d2010-02-16 01:46:59 +000099
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000100 const char *I = Beg;
Ted Kremenekc8d9c012010-01-28 00:02:05 +0000101 const char *Start = 0;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000102 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
103
104 // Look for a '%' character that indicates the start of a format specifier.
Ted Kremenekb5c98ef2010-01-28 23:56:52 +0000105 for ( ; I != E ; ++I) {
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000106 char c = *I;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000107 if (c == '\0') {
108 // Detect spurious null characters, which are likely errors.
109 H.HandleNullChar(I);
110 return true;
111 }
112 if (c == '%') {
Ted Kremenekb5c98ef2010-01-28 23:56:52 +0000113 Start = I++; // Record the start of the format specifier.
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000114 break;
115 }
116 }
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000117
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000118 // No format specifier found?
119 if (!Start)
120 return false;
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000121
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000122 if (I == E) {
123 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000124 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000125 return true;
126 }
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000127
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000128 FormatSpecifier FS;
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000129
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000130 // Look for flags (if any).
131 bool hasMore = true;
132 for ( ; I != E; ++I) {
133 switch (*I) {
134 default: hasMore = false; break;
135 case '-': FS.setIsLeftJustified(); break;
136 case '+': FS.setHasPlusPrefix(); break;
137 case ' ': FS.setHasSpacePrefix(); break;
138 case '#': FS.setHasAlternativeForm(); break;
139 case '0': FS.setHasLeadingZeros(); break;
140 }
141 if (!hasMore)
142 break;
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000143 }
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000144
145 if (I == E) {
146 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000147 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000148 return true;
149 }
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000150
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000151 // Look for the field width (if any).
152 FS.setFieldWidth(ParseAmount(I, E));
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000153
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000154 if (I == E) {
155 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000156 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000157 return true;
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000158 }
159
160 // Look for the precision (if any).
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000161 if (*I == '.') {
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000162 ++I;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000163 if (I == E) {
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000164 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000165 return true;
166 }
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000167
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000168 FS.setPrecision(ParseAmount(I, E));
169
170 if (I == E) {
171 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000172 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000173 return true;
174 }
175 }
176
177 // Look for the length modifier.
178 LengthModifier lm = None;
179 switch (*I) {
180 default:
181 break;
182 case 'h':
183 ++I;
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000184 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000185 break;
186 case 'l':
187 ++I;
188 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
189 break;
190 case 'j': lm = AsIntMax; ++I; break;
191 case 'z': lm = AsSizeT; ++I; break;
192 case 't': lm = AsPtrDiff; ++I; break;
193 case 'L': lm = AsLongDouble; ++I; break;
Daniel Dunbar19b70bd2010-01-30 15:49:20 +0000194 case 'q': lm = AsLongLong; ++I; break;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000195 }
196 FS.setLengthModifier(lm);
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000197
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000198 if (I == E) {
199 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000200 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000201 return true;
202 }
Ted Kremenek2a0cd592010-02-09 00:04:09 +0000203
Ted Kremenek23a71a12010-01-29 20:29:53 +0000204 if (*I == '\0') {
Ted Kremenek2a0cd592010-02-09 00:04:09 +0000205 // Detect spurious null characters, which are likely errors.
206 H.HandleNullChar(I);
207 return true;
Ted Kremenek23a71a12010-01-29 20:29:53 +0000208 }
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000209
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000210 // Finally, look for the conversion specifier.
Ted Kremenekfee0e962010-01-28 02:46:17 +0000211 const char *conversionPosition = I++;
Ted Kremenek94af5752010-01-29 02:40:24 +0000212 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
Ted Kremenekfee0e962010-01-28 02:46:17 +0000213 switch (*conversionPosition) {
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000214 default:
Ted Kremenek94af5752010-01-29 02:40:24 +0000215 break;
Ted Kremenekc06ead62010-01-28 00:55:28 +0000216 // C99: 7.19.6.1 (section 8).
Ted Kremenekfee0e962010-01-28 02:46:17 +0000217 case 'd': k = ConversionSpecifier::dArg; break;
218 case 'i': k = ConversionSpecifier::iArg; break;
219 case 'o': k = ConversionSpecifier::oArg; break;
220 case 'u': k = ConversionSpecifier::uArg; break;
221 case 'x': k = ConversionSpecifier::xArg; break;
222 case 'X': k = ConversionSpecifier::XArg; break;
223 case 'f': k = ConversionSpecifier::fArg; break;
224 case 'F': k = ConversionSpecifier::FArg; break;
225 case 'e': k = ConversionSpecifier::eArg; break;
226 case 'E': k = ConversionSpecifier::EArg; break;
227 case 'g': k = ConversionSpecifier::gArg; break;
228 case 'G': k = ConversionSpecifier::GArg; break;
229 case 'a': k = ConversionSpecifier::aArg; break;
230 case 'A': k = ConversionSpecifier::AArg; break;
231 case 'c': k = ConversionSpecifier::IntAsCharArg; break;
232 case 's': k = ConversionSpecifier::CStrArg; break;
233 case 'p': k = ConversionSpecifier::VoidPtrArg; break;
234 case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000235 case '%': k = ConversionSpecifier::PercentArg; break;
Ted Kremenekc06ead62010-01-28 00:55:28 +0000236 // Objective-C.
Ted Kremenek23a71a12010-01-29 20:29:53 +0000237 case '@': k = ConversionSpecifier::ObjCObjArg; break;
Ted Kremenek2a0cd592010-02-09 00:04:09 +0000238 // Glibc specific.
Ted Kremenek23a71a12010-01-29 20:29:53 +0000239 case 'm': k = ConversionSpecifier::PrintErrno; break;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000240 }
Ted Kremenekfee0e962010-01-28 02:46:17 +0000241 FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
Ted Kremenek94af5752010-01-29 02:40:24 +0000242
243 if (k == ConversionSpecifier::InvalidSpecifier) {
244 H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
245 return false; // Keep processing format specifiers.
246 }
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000247 return FormatSpecifierResult(Start, FS);
248}
249
Ted Kremenek1de17072010-02-04 20:46:58 +0000250bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
Ted Kremenekfee0e962010-01-28 02:46:17 +0000251 const char *I, const char *E) {
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000252 // Keep looking for a format specifier until we have exhausted the string.
253 while (I != E) {
254 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
Ted Kremenek94af5752010-01-29 02:40:24 +0000255 // Did a fail-stop error of any kind occur when parsing the specifier?
256 // If so, don't do any more processing.
257 if (FSR.shouldStop())
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000258 return true;;
Ted Kremenek94af5752010-01-29 02:40:24 +0000259 // Did we exhaust the string or encounter an error that
260 // we can recover from?
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000261 if (!FSR.hasValue())
Ted Kremenek94af5752010-01-29 02:40:24 +0000262 continue;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000263 // We have a format specifier. Pass it to the callback.
Ted Kremenekfee0e962010-01-28 02:46:17 +0000264 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
265 I - FSR.getStart()))
Ted Kremenek23a71a12010-01-29 20:29:53 +0000266 return true;
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000267 }
268 assert(I == E && "Format string not exhausted");
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000269 return false;
270}
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000271
272FormatStringHandler::~FormatStringHandler() {}
Ted Kremenek79db7b72010-01-29 22:59:32 +0000273
274//===----------------------------------------------------------------------===//
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000275// Methods on ArgTypeResult.
276//===----------------------------------------------------------------------===//
277
278bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
279 assert(isValid());
280
281 if (K == UnknownTy)
282 return true;
283
284 if (K == SpecificTy) {
285 argTy = C.getCanonicalType(argTy).getUnqualifiedType();
286
287 if (T == argTy)
288 return true;
289
290 if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
291 switch (BT->getKind()) {
292 default:
293 break;
294 case BuiltinType::Char_S:
295 case BuiltinType::SChar:
296 return T == C.UnsignedCharTy;
297 case BuiltinType::Char_U:
298 case BuiltinType::UChar:
299 return T == C.SignedCharTy;
300 case BuiltinType::Short:
301 return T == C.UnsignedShortTy;
302 case BuiltinType::UShort:
303 return T == C.ShortTy;
304 case BuiltinType::Int:
305 return T == C.UnsignedIntTy;
306 case BuiltinType::UInt:
307 return T == C.IntTy;
308 case BuiltinType::Long:
309 return T == C.UnsignedLongTy;
310 case BuiltinType::ULong:
311 return T == C.LongTy;
312 case BuiltinType::LongLong:
313 return T == C.UnsignedLongLongTy;
314 case BuiltinType::ULongLong:
315 return T == C.LongLongTy;
316 }
317
318 return false;
319 }
320
321 if (K == CStrTy) {
322 const PointerType *PT = argTy->getAs<PointerType>();
323 if (!PT)
324 return false;
325
326 QualType pointeeTy = PT->getPointeeType();
327
328 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
329 switch (BT->getKind()) {
330 case BuiltinType::Void:
331 case BuiltinType::Char_U:
332 case BuiltinType::UChar:
333 case BuiltinType::Char_S:
334 case BuiltinType::SChar:
335 return true;
336 default:
337 break;
338 }
339
340 return false;
341 }
342
343 if (K == WCStrTy) {
344 const PointerType *PT = argTy->getAs<PointerType>();
345 if (!PT)
346 return false;
347
348 QualType pointeeTy = PT->getPointeeType();
349 return pointeeTy == C.WCharTy;
350 }
351
352 return false;
353}
354
355QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
356 assert(isValid());
357 if (K == SpecificTy)
358 return T;
359 if (K == CStrTy)
360 return C.getPointerType(C.CharTy);
361 if (K == WCStrTy)
362 return C.getPointerType(C.WCharTy);
363 if (K == ObjCPointerTy)
364 return C.ObjCBuiltinIdTy;
365
366 return QualType();
367}
368
369//===----------------------------------------------------------------------===//
370// Methods on OptionalAmount.
371//===----------------------------------------------------------------------===//
372
373ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
374 return Ctx.IntTy;
375}
376
377//===----------------------------------------------------------------------===//
Ted Kremenek79db7b72010-01-29 22:59:32 +0000378// Methods on FormatSpecifier.
379//===----------------------------------------------------------------------===//
380
381ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
382 if (!CS.consumesDataArgument())
383 return ArgTypeResult::Invalid();
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000384
Ted Kremenek79db7b72010-01-29 22:59:32 +0000385 if (CS.isIntArg())
386 switch (LM) {
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000387 case AsLongDouble:
Ted Kremenek79db7b72010-01-29 22:59:32 +0000388 return ArgTypeResult::Invalid();
389 case None: return Ctx.IntTy;
390 case AsChar: return Ctx.SignedCharTy;
391 case AsShort: return Ctx.ShortTy;
392 case AsLong: return Ctx.LongTy;
393 case AsLongLong: return Ctx.LongLongTy;
394 case AsIntMax:
395 // FIXME: Return unknown for now.
396 return ArgTypeResult();
397 case AsSizeT: return Ctx.getSizeType();
398 case AsPtrDiff: return Ctx.getPointerDiffType();
399 }
400
401 if (CS.isUIntArg())
402 switch (LM) {
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000403 case AsLongDouble:
Ted Kremenek79db7b72010-01-29 22:59:32 +0000404 return ArgTypeResult::Invalid();
405 case None: return Ctx.UnsignedIntTy;
406 case AsChar: return Ctx.UnsignedCharTy;
407 case AsShort: return Ctx.UnsignedShortTy;
408 case AsLong: return Ctx.UnsignedLongTy;
409 case AsLongLong: return Ctx.UnsignedLongLongTy;
410 case AsIntMax:
411 // FIXME: Return unknown for now.
412 return ArgTypeResult();
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000413 case AsSizeT:
Ted Kremenek79db7b72010-01-29 22:59:32 +0000414 // FIXME: How to get the corresponding unsigned
415 // version of size_t?
416 return ArgTypeResult();
417 case AsPtrDiff:
418 // FIXME: How to get the corresponding unsigned
419 // version of ptrdiff_t?
420 return ArgTypeResult();
421 }
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000422
Ted Kremenek016b6052010-02-01 23:23:50 +0000423 if (CS.isDoubleArg()) {
424 if (LM == AsLongDouble)
425 return Ctx.LongDoubleTy;
Ted Kremenek9ff02052010-01-30 01:02:18 +0000426 return Ctx.DoubleTy;
Ted Kremenek016b6052010-02-01 23:23:50 +0000427 }
Ted Kremenek79db7b72010-01-29 22:59:32 +0000428
Ted Kremenekc8b188d2010-02-16 01:46:59 +0000429 if (CS.getKind() == ConversionSpecifier::CStrArg)
430 return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy
431 : ArgTypeResult::CStrTy);
432
Ted Kremenek79db7b72010-01-29 22:59:32 +0000433 // FIXME: Handle other cases.
Ted Kremenekba775fe2010-01-29 23:00:35 +0000434 return ArgTypeResult();
Ted Kremenek79db7b72010-01-29 22:59:32 +0000435}
436