blob: d2bcbb04f9d68c0d9578727ffeb5df9d5cde5195 [file] [log] [blame]
Ted Kremenek8f0a1c72010-01-27 23:43:25 +00001//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends. The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
Ted Kremenek33567d22010-01-29 22:59:32 +000016#include "clang/AST/ASTContext.h"
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000017
Ted Kremenek4b220fa2010-01-29 02:13:53 +000018using clang::analyze_printf::FormatSpecifier;
19using clang::analyze_printf::OptionalAmount;
Ted Kremenek33567d22010-01-29 22:59:32 +000020using clang::analyze_printf::ArgTypeResult;
Ted Kremenek808015a2010-01-29 03:16:21 +000021using namespace clang;
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000022
23namespace {
24class FormatSpecifierResult {
25 FormatSpecifier FS;
26 const char *Start;
Ted Kremenek26ac2e02010-01-29 02:40:24 +000027 bool Stop;
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000028public:
Ted Kremenek26ac2e02010-01-29 02:40:24 +000029 FormatSpecifierResult(bool stop = false)
30 : Start(0), Stop(stop) {}
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000031 FormatSpecifierResult(const char *start,
Ted Kremenekd2dcece2010-01-28 02:02:59 +000032 const FormatSpecifier &fs)
Ted Kremenek26ac2e02010-01-29 02:40:24 +000033 : FS(fs), Start(start), Stop(false) {}
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000034
35
36 const char *getStart() const { return Start; }
Ted Kremenek26ac2e02010-01-29 02:40:24 +000037 bool shouldStop() const { return Stop; }
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000038 bool hasValue() const { return Start != 0; }
39 const FormatSpecifier &getValue() const {
40 assert(hasValue());
41 return FS;
42 }
Ted Kremenekd2dcece2010-01-28 02:02:59 +000043 const FormatSpecifier &getValue() { return FS; }
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000044};
45} // end anonymous namespace
46
47template <typename T>
48class UpdateOnReturn {
49 T &ValueToUpdate;
50 const T &ValueToCopy;
51public:
52 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
53 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
54
55 ~UpdateOnReturn() {
56 ValueToUpdate = ValueToCopy;
57 }
58};
59
60static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
61 const char *I = Beg;
62 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
63
64 bool foundDigits = false;
65 unsigned accumulator = 0;
66
67 for ( ; I != E; ++I) {
68 char c = *I;
69 if (c >= '0' && c <= '9') {
70 foundDigits = true;
71 accumulator += (accumulator * 10) + (c - '0');
72 continue;
73 }
74
75 if (foundDigits)
Ted Kremenek0d277352010-01-29 01:06:55 +000076 return OptionalAmount(accumulator, Beg);
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000077
Ted Kremenek0d277352010-01-29 01:06:55 +000078 if (c == '*') {
79 ++I;
80 return OptionalAmount(OptionalAmount::Arg, Beg);
81 }
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000082
83 break;
84 }
85
86 return OptionalAmount();
87}
88
Ted Kremenek808015a2010-01-29 03:16:21 +000089static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
90 const char *&Beg, const char *E) {
Ted Kremenek4b220fa2010-01-29 02:13:53 +000091
92 using namespace clang::analyze_printf;
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000093
94 const char *I = Beg;
Ted Kremenekc7ae51a2010-01-28 00:02:05 +000095 const char *Start = 0;
Ted Kremenek8f0a1c72010-01-27 23:43:25 +000096 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
97
98 // Look for a '%' character that indicates the start of a format specifier.
Ted Kremeneke729acb2010-01-28 23:56:52 +000099 for ( ; I != E ; ++I) {
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000100 char c = *I;
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000101 if (c == '\0') {
102 // Detect spurious null characters, which are likely errors.
103 H.HandleNullChar(I);
104 return true;
105 }
106 if (c == '%') {
Ted Kremeneke729acb2010-01-28 23:56:52 +0000107 Start = I++; // Record the start of the format specifier.
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000108 break;
109 }
110 }
111
112 // No format specifier found?
113 if (!Start)
114 return false;
115
116 if (I == E) {
117 // No more characters left?
Ted Kremenek808015a2010-01-29 03:16:21 +0000118 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000119 return true;
120 }
121
122 FormatSpecifier FS;
123
124 // Look for flags (if any).
125 bool hasMore = true;
126 for ( ; I != E; ++I) {
127 switch (*I) {
128 default: hasMore = false; break;
129 case '-': FS.setIsLeftJustified(); break;
130 case '+': FS.setHasPlusPrefix(); break;
131 case ' ': FS.setHasSpacePrefix(); break;
132 case '#': FS.setHasAlternativeForm(); break;
133 case '0': FS.setHasLeadingZeros(); break;
134 }
135 if (!hasMore)
136 break;
137 }
138
139 if (I == E) {
140 // No more characters left?
Ted Kremenek808015a2010-01-29 03:16:21 +0000141 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000142 return true;
143 }
144
145 // Look for the field width (if any).
146 FS.setFieldWidth(ParseAmount(I, E));
147
148 if (I == E) {
149 // No more characters left?
Ted Kremenek808015a2010-01-29 03:16:21 +0000150 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000151 return true;
152 }
153
154 // Look for the precision (if any).
155 if (*I == '.') {
Ted Kremenek808015a2010-01-29 03:16:21 +0000156 ++I;
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000157 if (I == E) {
Ted Kremenek808015a2010-01-29 03:16:21 +0000158 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000159 return true;
160 }
161
162 FS.setPrecision(ParseAmount(I, E));
163
164 if (I == E) {
165 // No more characters left?
Ted Kremenek808015a2010-01-29 03:16:21 +0000166 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000167 return true;
168 }
169 }
170
171 // Look for the length modifier.
172 LengthModifier lm = None;
173 switch (*I) {
174 default:
175 break;
176 case 'h':
177 ++I;
178 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
179 break;
180 case 'l':
181 ++I;
182 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
183 break;
184 case 'j': lm = AsIntMax; ++I; break;
185 case 'z': lm = AsSizeT; ++I; break;
186 case 't': lm = AsPtrDiff; ++I; break;
187 case 'L': lm = AsLongDouble; ++I; break;
Daniel Dunbar01aefc62010-01-30 15:49:20 +0000188 case 'q': lm = AsLongLong; ++I; break;
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000189 }
190 FS.setLengthModifier(lm);
191
192 if (I == E) {
193 // No more characters left?
Ted Kremenek808015a2010-01-29 03:16:21 +0000194 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000195 return true;
196 }
Ted Kremenek4dcb18f2010-01-29 20:29:53 +0000197
198 if (*I == '\0') {
199 // Detect spurious null characters, which are likely errors.
200 H.HandleNullChar(I);
201 return true;
202 }
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000203
204 // Finally, look for the conversion specifier.
Ted Kremeneka8d8fec2010-01-28 02:46:17 +0000205 const char *conversionPosition = I++;
Ted Kremenek26ac2e02010-01-29 02:40:24 +0000206 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
Ted Kremeneka8d8fec2010-01-28 02:46:17 +0000207 switch (*conversionPosition) {
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000208 default:
Ted Kremenek26ac2e02010-01-29 02:40:24 +0000209 break;
Ted Kremenekc7cbb9b2010-01-28 00:55:28 +0000210 // C99: 7.19.6.1 (section 8).
Ted Kremeneka8d8fec2010-01-28 02:46:17 +0000211 case 'd': k = ConversionSpecifier::dArg; break;
212 case 'i': k = ConversionSpecifier::iArg; break;
213 case 'o': k = ConversionSpecifier::oArg; break;
214 case 'u': k = ConversionSpecifier::uArg; break;
215 case 'x': k = ConversionSpecifier::xArg; break;
216 case 'X': k = ConversionSpecifier::XArg; break;
217 case 'f': k = ConversionSpecifier::fArg; break;
218 case 'F': k = ConversionSpecifier::FArg; break;
219 case 'e': k = ConversionSpecifier::eArg; break;
220 case 'E': k = ConversionSpecifier::EArg; break;
221 case 'g': k = ConversionSpecifier::gArg; break;
222 case 'G': k = ConversionSpecifier::GArg; break;
223 case 'a': k = ConversionSpecifier::aArg; break;
224 case 'A': k = ConversionSpecifier::AArg; break;
225 case 'c': k = ConversionSpecifier::IntAsCharArg; break;
226 case 's': k = ConversionSpecifier::CStrArg; break;
227 case 'p': k = ConversionSpecifier::VoidPtrArg; break;
228 case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
229 case '%': k = ConversionSpecifier::PercentArg; break;
Ted Kremenekc7cbb9b2010-01-28 00:55:28 +0000230 // Objective-C.
Ted Kremenek4dcb18f2010-01-29 20:29:53 +0000231 case '@': k = ConversionSpecifier::ObjCObjArg; break;
232 // Glibc specific.
233 case 'm': k = ConversionSpecifier::PrintErrno; break;
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000234 }
Ted Kremeneka8d8fec2010-01-28 02:46:17 +0000235 FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
Ted Kremenek26ac2e02010-01-29 02:40:24 +0000236
237 if (k == ConversionSpecifier::InvalidSpecifier) {
238 H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
239 return false; // Keep processing format specifiers.
240 }
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000241 return FormatSpecifierResult(Start, FS);
242}
243
Ted Kremenek808015a2010-01-29 03:16:21 +0000244bool clang::ParseFormatString(FormatStringHandler &H,
Ted Kremeneka8d8fec2010-01-28 02:46:17 +0000245 const char *I, const char *E) {
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000246 // Keep looking for a format specifier until we have exhausted the string.
247 while (I != E) {
248 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
Ted Kremenek26ac2e02010-01-29 02:40:24 +0000249 // Did a fail-stop error of any kind occur when parsing the specifier?
250 // If so, don't do any more processing.
251 if (FSR.shouldStop())
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000252 return true;;
Ted Kremenek26ac2e02010-01-29 02:40:24 +0000253 // Did we exhaust the string or encounter an error that
254 // we can recover from?
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000255 if (!FSR.hasValue())
Ted Kremenek26ac2e02010-01-29 02:40:24 +0000256 continue;
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000257 // We have a format specifier. Pass it to the callback.
Ted Kremeneka8d8fec2010-01-28 02:46:17 +0000258 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
259 I - FSR.getStart()))
Ted Kremenek4dcb18f2010-01-29 20:29:53 +0000260 return true;
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000261 }
262 assert(I == E && "Format string not exhausted");
263 return false;
264}
Ted Kremenek8f0a1c72010-01-27 23:43:25 +0000265
266FormatStringHandler::~FormatStringHandler() {}
Ted Kremenek33567d22010-01-29 22:59:32 +0000267
268//===----------------------------------------------------------------------===//
269// Methods on FormatSpecifier.
270//===----------------------------------------------------------------------===//
271
272ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
273 if (!CS.consumesDataArgument())
274 return ArgTypeResult::Invalid();
275
276 if (CS.isIntArg())
277 switch (LM) {
278 case AsLongDouble:
279 return ArgTypeResult::Invalid();
280 case None: return Ctx.IntTy;
281 case AsChar: return Ctx.SignedCharTy;
282 case AsShort: return Ctx.ShortTy;
283 case AsLong: return Ctx.LongTy;
284 case AsLongLong: return Ctx.LongLongTy;
285 case AsIntMax:
286 // FIXME: Return unknown for now.
287 return ArgTypeResult();
288 case AsSizeT: return Ctx.getSizeType();
289 case AsPtrDiff: return Ctx.getPointerDiffType();
290 }
291
292 if (CS.isUIntArg())
293 switch (LM) {
294 case AsLongDouble:
295 return ArgTypeResult::Invalid();
296 case None: return Ctx.UnsignedIntTy;
297 case AsChar: return Ctx.UnsignedCharTy;
298 case AsShort: return Ctx.UnsignedShortTy;
299 case AsLong: return Ctx.UnsignedLongTy;
300 case AsLongLong: return Ctx.UnsignedLongLongTy;
301 case AsIntMax:
302 // FIXME: Return unknown for now.
303 return ArgTypeResult();
304 case AsSizeT:
305 // FIXME: How to get the corresponding unsigned
306 // version of size_t?
307 return ArgTypeResult();
308 case AsPtrDiff:
309 // FIXME: How to get the corresponding unsigned
310 // version of ptrdiff_t?
311 return ArgTypeResult();
312 }
Ted Kremenekc9a89fe2010-01-30 01:02:18 +0000313
314 if (CS.isDoubleArg())
315 return Ctx.DoubleTy;
Ted Kremenek33567d22010-01-29 22:59:32 +0000316
317 // FIXME: Handle other cases.
Ted Kremenek40888ad2010-01-29 23:00:35 +0000318 return ArgTypeResult();
Ted Kremenek33567d22010-01-29 22:59:32 +0000319}
320