blob: 6192c29e419c3c379ae3cff28892bf80e0cf9b42 [file] [log] [blame]
Ted Kremeneka2e77b42010-01-27 23:43:25 +00001//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends. The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
Ted Kremenek79db7b72010-01-29 22:59:32 +000016#include "clang/AST/ASTContext.h"
Ted Kremeneka2e77b42010-01-27 23:43:25 +000017
Ted Kremenek176f7d62010-01-29 02:13:53 +000018using clang::analyze_printf::FormatSpecifier;
19using clang::analyze_printf::OptionalAmount;
Ted Kremenek79db7b72010-01-29 22:59:32 +000020using clang::analyze_printf::ArgTypeResult;
Ted Kremenekc22f78d2010-01-29 03:16:21 +000021using namespace clang;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000022
23namespace {
24class FormatSpecifierResult {
25 FormatSpecifier FS;
26 const char *Start;
Ted Kremenek94af5752010-01-29 02:40:24 +000027 bool Stop;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000028public:
Ted Kremenek94af5752010-01-29 02:40:24 +000029 FormatSpecifierResult(bool stop = false)
30 : Start(0), Stop(stop) {}
Ted Kremeneka2e77b42010-01-27 23:43:25 +000031 FormatSpecifierResult(const char *start,
Ted Kremenek08ad1cc2010-01-28 02:02:59 +000032 const FormatSpecifier &fs)
Ted Kremenek94af5752010-01-29 02:40:24 +000033 : FS(fs), Start(start), Stop(false) {}
Ted Kremeneka2e77b42010-01-27 23:43:25 +000034
35
36 const char *getStart() const { return Start; }
Ted Kremenek94af5752010-01-29 02:40:24 +000037 bool shouldStop() const { return Stop; }
Ted Kremeneka2e77b42010-01-27 23:43:25 +000038 bool hasValue() const { return Start != 0; }
39 const FormatSpecifier &getValue() const {
40 assert(hasValue());
41 return FS;
42 }
Ted Kremenek08ad1cc2010-01-28 02:02:59 +000043 const FormatSpecifier &getValue() { return FS; }
Ted Kremeneka2e77b42010-01-27 23:43:25 +000044};
45} // end anonymous namespace
46
47template <typename T>
48class UpdateOnReturn {
49 T &ValueToUpdate;
50 const T &ValueToCopy;
51public:
52 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
53 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
54
55 ~UpdateOnReturn() {
56 ValueToUpdate = ValueToCopy;
57 }
58};
59
60static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
61 const char *I = Beg;
62 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
63
64 bool foundDigits = false;
65 unsigned accumulator = 0;
66
67 for ( ; I != E; ++I) {
68 char c = *I;
69 if (c >= '0' && c <= '9') {
70 foundDigits = true;
71 accumulator += (accumulator * 10) + (c - '0');
72 continue;
73 }
74
75 if (foundDigits)
Ted Kremenek5739de72010-01-29 01:06:55 +000076 return OptionalAmount(accumulator, Beg);
Ted Kremeneka2e77b42010-01-27 23:43:25 +000077
Ted Kremenek5739de72010-01-29 01:06:55 +000078 if (c == '*') {
79 ++I;
80 return OptionalAmount(OptionalAmount::Arg, Beg);
81 }
Ted Kremeneka2e77b42010-01-27 23:43:25 +000082
83 break;
84 }
85
86 return OptionalAmount();
87}
88
Ted Kremenekc22f78d2010-01-29 03:16:21 +000089static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
90 const char *&Beg, const char *E) {
Ted Kremenek176f7d62010-01-29 02:13:53 +000091
92 using namespace clang::analyze_printf;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000093
94 const char *I = Beg;
Ted Kremenekc8d9c012010-01-28 00:02:05 +000095 const char *Start = 0;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000096 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
97
98 // Look for a '%' character that indicates the start of a format specifier.
Ted Kremenekb5c98ef2010-01-28 23:56:52 +000099 for ( ; I != E ; ++I) {
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000100 char c = *I;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000101 if (c == '\0') {
102 // Detect spurious null characters, which are likely errors.
103 H.HandleNullChar(I);
104 return true;
105 }
106 if (c == '%') {
Ted Kremenekb5c98ef2010-01-28 23:56:52 +0000107 Start = I++; // Record the start of the format specifier.
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000108 break;
109 }
110 }
111
112 // No format specifier found?
113 if (!Start)
114 return false;
115
116 if (I == E) {
117 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000118 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000119 return true;
120 }
121
122 FormatSpecifier FS;
123
124 // Look for flags (if any).
125 bool hasMore = true;
126 for ( ; I != E; ++I) {
127 switch (*I) {
128 default: hasMore = false; break;
129 case '-': FS.setIsLeftJustified(); break;
130 case '+': FS.setHasPlusPrefix(); break;
131 case ' ': FS.setHasSpacePrefix(); break;
132 case '#': FS.setHasAlternativeForm(); break;
133 case '0': FS.setHasLeadingZeros(); break;
134 }
135 if (!hasMore)
136 break;
137 }
138
139 if (I == E) {
140 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000141 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000142 return true;
143 }
144
145 // Look for the field width (if any).
146 FS.setFieldWidth(ParseAmount(I, E));
147
148 if (I == E) {
149 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000150 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000151 return true;
152 }
153
154 // Look for the precision (if any).
155 if (*I == '.') {
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000156 ++I;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000157 if (I == E) {
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000158 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000159 return true;
160 }
161
162 FS.setPrecision(ParseAmount(I, E));
163
164 if (I == E) {
165 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000166 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000167 return true;
168 }
169 }
170
171 // Look for the length modifier.
172 LengthModifier lm = None;
173 switch (*I) {
174 default:
175 break;
176 case 'h':
177 ++I;
178 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
179 break;
180 case 'l':
181 ++I;
182 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
183 break;
184 case 'j': lm = AsIntMax; ++I; break;
185 case 'z': lm = AsSizeT; ++I; break;
186 case 't': lm = AsPtrDiff; ++I; break;
187 case 'L': lm = AsLongDouble; ++I; break;
188 }
189 FS.setLengthModifier(lm);
190
191 if (I == E) {
192 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000193 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000194 return true;
195 }
Ted Kremenek23a71a12010-01-29 20:29:53 +0000196
197 if (*I == '\0') {
198 // Detect spurious null characters, which are likely errors.
199 H.HandleNullChar(I);
200 return true;
201 }
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000202
203 // Finally, look for the conversion specifier.
Ted Kremenekfee0e962010-01-28 02:46:17 +0000204 const char *conversionPosition = I++;
Ted Kremenek94af5752010-01-29 02:40:24 +0000205 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
Ted Kremenekfee0e962010-01-28 02:46:17 +0000206 switch (*conversionPosition) {
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000207 default:
Ted Kremenek94af5752010-01-29 02:40:24 +0000208 break;
Ted Kremenekc06ead62010-01-28 00:55:28 +0000209 // C99: 7.19.6.1 (section 8).
Ted Kremenekfee0e962010-01-28 02:46:17 +0000210 case 'd': k = ConversionSpecifier::dArg; break;
211 case 'i': k = ConversionSpecifier::iArg; break;
212 case 'o': k = ConversionSpecifier::oArg; break;
213 case 'u': k = ConversionSpecifier::uArg; break;
214 case 'x': k = ConversionSpecifier::xArg; break;
215 case 'X': k = ConversionSpecifier::XArg; break;
216 case 'f': k = ConversionSpecifier::fArg; break;
217 case 'F': k = ConversionSpecifier::FArg; break;
218 case 'e': k = ConversionSpecifier::eArg; break;
219 case 'E': k = ConversionSpecifier::EArg; break;
220 case 'g': k = ConversionSpecifier::gArg; break;
221 case 'G': k = ConversionSpecifier::GArg; break;
222 case 'a': k = ConversionSpecifier::aArg; break;
223 case 'A': k = ConversionSpecifier::AArg; break;
224 case 'c': k = ConversionSpecifier::IntAsCharArg; break;
225 case 's': k = ConversionSpecifier::CStrArg; break;
226 case 'p': k = ConversionSpecifier::VoidPtrArg; break;
227 case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
228 case '%': k = ConversionSpecifier::PercentArg; break;
Ted Kremenekc06ead62010-01-28 00:55:28 +0000229 // Objective-C.
Ted Kremenek23a71a12010-01-29 20:29:53 +0000230 case '@': k = ConversionSpecifier::ObjCObjArg; break;
231 // Glibc specific.
232 case 'm': k = ConversionSpecifier::PrintErrno; break;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000233 }
Ted Kremenekfee0e962010-01-28 02:46:17 +0000234 FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
Ted Kremenek94af5752010-01-29 02:40:24 +0000235
236 if (k == ConversionSpecifier::InvalidSpecifier) {
237 H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
238 return false; // Keep processing format specifiers.
239 }
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000240 return FormatSpecifierResult(Start, FS);
241}
242
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000243bool clang::ParseFormatString(FormatStringHandler &H,
Ted Kremenekfee0e962010-01-28 02:46:17 +0000244 const char *I, const char *E) {
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000245 // Keep looking for a format specifier until we have exhausted the string.
246 while (I != E) {
247 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
Ted Kremenek94af5752010-01-29 02:40:24 +0000248 // Did a fail-stop error of any kind occur when parsing the specifier?
249 // If so, don't do any more processing.
250 if (FSR.shouldStop())
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000251 return true;;
Ted Kremenek94af5752010-01-29 02:40:24 +0000252 // Did we exhaust the string or encounter an error that
253 // we can recover from?
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000254 if (!FSR.hasValue())
Ted Kremenek94af5752010-01-29 02:40:24 +0000255 continue;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000256 // We have a format specifier. Pass it to the callback.
Ted Kremenekfee0e962010-01-28 02:46:17 +0000257 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
258 I - FSR.getStart()))
Ted Kremenek23a71a12010-01-29 20:29:53 +0000259 return true;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000260 }
261 assert(I == E && "Format string not exhausted");
262 return false;
263}
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000264
265FormatStringHandler::~FormatStringHandler() {}
Ted Kremenek79db7b72010-01-29 22:59:32 +0000266
267//===----------------------------------------------------------------------===//
268// Methods on FormatSpecifier.
269//===----------------------------------------------------------------------===//
270
271ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
272 if (!CS.consumesDataArgument())
273 return ArgTypeResult::Invalid();
274
275 if (CS.isIntArg())
276 switch (LM) {
277 case AsLongDouble:
278 return ArgTypeResult::Invalid();
279 case None: return Ctx.IntTy;
280 case AsChar: return Ctx.SignedCharTy;
281 case AsShort: return Ctx.ShortTy;
282 case AsLong: return Ctx.LongTy;
283 case AsLongLong: return Ctx.LongLongTy;
284 case AsIntMax:
285 // FIXME: Return unknown for now.
286 return ArgTypeResult();
287 case AsSizeT: return Ctx.getSizeType();
288 case AsPtrDiff: return Ctx.getPointerDiffType();
289 }
290
291 if (CS.isUIntArg())
292 switch (LM) {
293 case AsLongDouble:
294 return ArgTypeResult::Invalid();
295 case None: return Ctx.UnsignedIntTy;
296 case AsChar: return Ctx.UnsignedCharTy;
297 case AsShort: return Ctx.UnsignedShortTy;
298 case AsLong: return Ctx.UnsignedLongTy;
299 case AsLongLong: return Ctx.UnsignedLongLongTy;
300 case AsIntMax:
301 // FIXME: Return unknown for now.
302 return ArgTypeResult();
303 case AsSizeT:
304 // FIXME: How to get the corresponding unsigned
305 // version of size_t?
306 return ArgTypeResult();
307 case AsPtrDiff:
308 // FIXME: How to get the corresponding unsigned
309 // version of ptrdiff_t?
310 return ArgTypeResult();
311 }
312
313 // FIXME: Handle other cases.
314 return ArgTypeResult();
315}
316