blob: 28d6b4f0e70debfda1efc039cc248a118d1b699e [file] [log] [blame]
Shih-wei Liaof8fd82b2010-02-10 11:10:31 -08001//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends. The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16#include "clang/AST/ASTContext.h"
17
18using clang::analyze_printf::FormatSpecifier;
19using clang::analyze_printf::OptionalAmount;
20using clang::analyze_printf::ArgTypeResult;
21using clang::analyze_printf::FormatStringHandler;
22using namespace clang;
23
24namespace {
25class FormatSpecifierResult {
26 FormatSpecifier FS;
27 const char *Start;
28 bool Stop;
29public:
30 FormatSpecifierResult(bool stop = false)
31 : Start(0), Stop(stop) {}
32 FormatSpecifierResult(const char *start,
33 const FormatSpecifier &fs)
34 : FS(fs), Start(start), Stop(false) {}
35
36
37 const char *getStart() const { return Start; }
38 bool shouldStop() const { return Stop; }
39 bool hasValue() const { return Start != 0; }
40 const FormatSpecifier &getValue() const {
41 assert(hasValue());
42 return FS;
43 }
44 const FormatSpecifier &getValue() { return FS; }
45};
46} // end anonymous namespace
47
48template <typename T>
49class UpdateOnReturn {
50 T &ValueToUpdate;
51 const T &ValueToCopy;
52public:
53 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
54 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
55
56 ~UpdateOnReturn() {
57 ValueToUpdate = ValueToCopy;
58 }
59};
60
61static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
62 const char *I = Beg;
63 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
64
65 bool foundDigits = false;
66 unsigned accumulator = 0;
67
68 for ( ; I != E; ++I) {
69 char c = *I;
70 if (c >= '0' && c <= '9') {
71 foundDigits = true;
72 accumulator += (accumulator * 10) + (c - '0');
73 continue;
74 }
75
76 if (foundDigits)
77 return OptionalAmount(accumulator, Beg);
78
79 if (c == '*') {
80 ++I;
81 return OptionalAmount(OptionalAmount::Arg, Beg);
82 }
83
84 break;
85 }
86
87 return OptionalAmount();
88}
89
90static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
91 const char *&Beg,
92 const char *E) {
93
94 using namespace clang::analyze_printf;
95
96 const char *I = Beg;
97 const char *Start = 0;
98 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
99
100 // Look for a '%' character that indicates the start of a format specifier.
101 for ( ; I != E ; ++I) {
102 char c = *I;
103 if (c == '\0') {
104 // Detect spurious null characters, which are likely errors.
105 H.HandleNullChar(I);
106 return true;
107 }
108 if (c == '%') {
109 Start = I++; // Record the start of the format specifier.
110 break;
111 }
112 }
113
114 // No format specifier found?
115 if (!Start)
116 return false;
117
118 if (I == E) {
119 // No more characters left?
120 H.HandleIncompleteFormatSpecifier(Start, E - Start);
121 return true;
122 }
123
124 FormatSpecifier FS;
125
126 // Look for flags (if any).
127 bool hasMore = true;
128 for ( ; I != E; ++I) {
129 switch (*I) {
130 default: hasMore = false; break;
131 case '-': FS.setIsLeftJustified(); break;
132 case '+': FS.setHasPlusPrefix(); break;
133 case ' ': FS.setHasSpacePrefix(); break;
134 case '#': FS.setHasAlternativeForm(); break;
135 case '0': FS.setHasLeadingZeros(); break;
136 }
137 if (!hasMore)
138 break;
139 }
140
141 if (I == E) {
142 // No more characters left?
143 H.HandleIncompleteFormatSpecifier(Start, E - Start);
144 return true;
145 }
146
147 // Look for the field width (if any).
148 FS.setFieldWidth(ParseAmount(I, E));
149
150 if (I == E) {
151 // No more characters left?
152 H.HandleIncompleteFormatSpecifier(Start, E - Start);
153 return true;
154 }
155
156 // Look for the precision (if any).
157 if (*I == '.') {
158 ++I;
159 if (I == E) {
160 H.HandleIncompleteFormatSpecifier(Start, E - Start);
161 return true;
162 }
163
164 FS.setPrecision(ParseAmount(I, E));
165
166 if (I == E) {
167 // No more characters left?
168 H.HandleIncompleteFormatSpecifier(Start, E - Start);
169 return true;
170 }
171 }
172
173 // Look for the length modifier.
174 LengthModifier lm = None;
175 switch (*I) {
176 default:
177 break;
178 case 'h':
179 ++I;
180 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
181 break;
182 case 'l':
183 ++I;
184 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
185 break;
186 case 'j': lm = AsIntMax; ++I; break;
187 case 'z': lm = AsSizeT; ++I; break;
188 case 't': lm = AsPtrDiff; ++I; break;
189 case 'L': lm = AsLongDouble; ++I; break;
190 case 'q': lm = AsLongLong; ++I; break;
191 }
192 FS.setLengthModifier(lm);
193
194 if (I == E) {
195 // No more characters left?
196 H.HandleIncompleteFormatSpecifier(Start, E - Start);
197 return true;
198 }
199
200 if (*I == '\0') {
201 // Detect spurious null characters, which are likely errors.
202 H.HandleNullChar(I);
203 return true;
204 }
205
206 // Finally, look for the conversion specifier.
207 const char *conversionPosition = I++;
208 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
209 switch (*conversionPosition) {
210 default:
211 break;
212 // C99: 7.19.6.1 (section 8).
213 case 'd': k = ConversionSpecifier::dArg; break;
214 case 'i': k = ConversionSpecifier::iArg; break;
215 case 'o': k = ConversionSpecifier::oArg; break;
216 case 'u': k = ConversionSpecifier::uArg; break;
217 case 'x': k = ConversionSpecifier::xArg; break;
218 case 'X': k = ConversionSpecifier::XArg; break;
219 case 'f': k = ConversionSpecifier::fArg; break;
220 case 'F': k = ConversionSpecifier::FArg; break;
221 case 'e': k = ConversionSpecifier::eArg; break;
222 case 'E': k = ConversionSpecifier::EArg; break;
223 case 'g': k = ConversionSpecifier::gArg; break;
224 case 'G': k = ConversionSpecifier::GArg; break;
225 case 'a': k = ConversionSpecifier::aArg; break;
226 case 'A': k = ConversionSpecifier::AArg; break;
227 case 'c': k = ConversionSpecifier::IntAsCharArg; break;
228 case 's': k = ConversionSpecifier::CStrArg; break;
229 case 'p': k = ConversionSpecifier::VoidPtrArg; break;
230 case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
231 case '%': k = ConversionSpecifier::PercentArg; break;
232 // Objective-C.
233 case '@': k = ConversionSpecifier::ObjCObjArg; break;
234 // Glibc specific.
235 case 'm': k = ConversionSpecifier::PrintErrno; break;
236 }
237 FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
238
239 if (k == ConversionSpecifier::InvalidSpecifier) {
240 H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
241 return false; // Keep processing format specifiers.
242 }
243 return FormatSpecifierResult(Start, FS);
244}
245
246bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
247 const char *I, const char *E) {
248 // Keep looking for a format specifier until we have exhausted the string.
249 while (I != E) {
250 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
251 // Did a fail-stop error of any kind occur when parsing the specifier?
252 // If so, don't do any more processing.
253 if (FSR.shouldStop())
254 return true;;
255 // Did we exhaust the string or encounter an error that
256 // we can recover from?
257 if (!FSR.hasValue())
258 continue;
259 // We have a format specifier. Pass it to the callback.
260 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
261 I - FSR.getStart()))
262 return true;
263 }
264 assert(I == E && "Format string not exhausted");
265 return false;
266}
267
268FormatStringHandler::~FormatStringHandler() {}
269
270//===----------------------------------------------------------------------===//
271// Methods on FormatSpecifier.
272//===----------------------------------------------------------------------===//
273
274ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
275 if (!CS.consumesDataArgument())
276 return ArgTypeResult::Invalid();
277
278 if (CS.isIntArg())
279 switch (LM) {
280 case AsLongDouble:
281 return ArgTypeResult::Invalid();
282 case None: return Ctx.IntTy;
283 case AsChar: return Ctx.SignedCharTy;
284 case AsShort: return Ctx.ShortTy;
285 case AsLong: return Ctx.LongTy;
286 case AsLongLong: return Ctx.LongLongTy;
287 case AsIntMax:
288 // FIXME: Return unknown for now.
289 return ArgTypeResult();
290 case AsSizeT: return Ctx.getSizeType();
291 case AsPtrDiff: return Ctx.getPointerDiffType();
292 }
293
294 if (CS.isUIntArg())
295 switch (LM) {
296 case AsLongDouble:
297 return ArgTypeResult::Invalid();
298 case None: return Ctx.UnsignedIntTy;
299 case AsChar: return Ctx.UnsignedCharTy;
300 case AsShort: return Ctx.UnsignedShortTy;
301 case AsLong: return Ctx.UnsignedLongTy;
302 case AsLongLong: return Ctx.UnsignedLongLongTy;
303 case AsIntMax:
304 // FIXME: Return unknown for now.
305 return ArgTypeResult();
306 case AsSizeT:
307 // FIXME: How to get the corresponding unsigned
308 // version of size_t?
309 return ArgTypeResult();
310 case AsPtrDiff:
311 // FIXME: How to get the corresponding unsigned
312 // version of ptrdiff_t?
313 return ArgTypeResult();
314 }
315
316 if (CS.isDoubleArg()) {
317 if (LM == AsLongDouble)
318 return Ctx.LongDoubleTy;
319 return Ctx.DoubleTy;
320 }
321
322 // FIXME: Handle other cases.
323 return ArgTypeResult();
324}
325