blob: bb9ac8480a0eccad84983c0812671dc49edd3548 [file] [log] [blame]
Ted Kremeneka2e77b42010-01-27 23:43:25 +00001//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends. The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16
Ted Kremenek176f7d62010-01-29 02:13:53 +000017using clang::analyze_printf::FormatSpecifier;
18using clang::analyze_printf::OptionalAmount;
Ted Kremenekc22f78d2010-01-29 03:16:21 +000019using namespace clang;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000020
21namespace {
22class FormatSpecifierResult {
23 FormatSpecifier FS;
24 const char *Start;
Ted Kremenek94af5752010-01-29 02:40:24 +000025 bool Stop;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000026public:
Ted Kremenek94af5752010-01-29 02:40:24 +000027 FormatSpecifierResult(bool stop = false)
28 : Start(0), Stop(stop) {}
Ted Kremeneka2e77b42010-01-27 23:43:25 +000029 FormatSpecifierResult(const char *start,
Ted Kremenek08ad1cc2010-01-28 02:02:59 +000030 const FormatSpecifier &fs)
Ted Kremenek94af5752010-01-29 02:40:24 +000031 : FS(fs), Start(start), Stop(false) {}
Ted Kremeneka2e77b42010-01-27 23:43:25 +000032
33
34 const char *getStart() const { return Start; }
Ted Kremenek94af5752010-01-29 02:40:24 +000035 bool shouldStop() const { return Stop; }
Ted Kremeneka2e77b42010-01-27 23:43:25 +000036 bool hasValue() const { return Start != 0; }
37 const FormatSpecifier &getValue() const {
38 assert(hasValue());
39 return FS;
40 }
Ted Kremenek08ad1cc2010-01-28 02:02:59 +000041 const FormatSpecifier &getValue() { return FS; }
Ted Kremeneka2e77b42010-01-27 23:43:25 +000042};
43} // end anonymous namespace
44
45template <typename T>
46class UpdateOnReturn {
47 T &ValueToUpdate;
48 const T &ValueToCopy;
49public:
50 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
51 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
52
53 ~UpdateOnReturn() {
54 ValueToUpdate = ValueToCopy;
55 }
56};
57
58static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
59 const char *I = Beg;
60 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
61
62 bool foundDigits = false;
63 unsigned accumulator = 0;
64
65 for ( ; I != E; ++I) {
66 char c = *I;
67 if (c >= '0' && c <= '9') {
68 foundDigits = true;
69 accumulator += (accumulator * 10) + (c - '0');
70 continue;
71 }
72
73 if (foundDigits)
Ted Kremenek5739de72010-01-29 01:06:55 +000074 return OptionalAmount(accumulator, Beg);
Ted Kremeneka2e77b42010-01-27 23:43:25 +000075
Ted Kremenek5739de72010-01-29 01:06:55 +000076 if (c == '*') {
77 ++I;
78 return OptionalAmount(OptionalAmount::Arg, Beg);
79 }
Ted Kremeneka2e77b42010-01-27 23:43:25 +000080
81 break;
82 }
83
84 return OptionalAmount();
85}
86
Ted Kremenekc22f78d2010-01-29 03:16:21 +000087static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
88 const char *&Beg, const char *E) {
Ted Kremenek176f7d62010-01-29 02:13:53 +000089
90 using namespace clang::analyze_printf;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000091
92 const char *I = Beg;
Ted Kremenekc8d9c012010-01-28 00:02:05 +000093 const char *Start = 0;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000094 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
95
96 // Look for a '%' character that indicates the start of a format specifier.
Ted Kremenekb5c98ef2010-01-28 23:56:52 +000097 for ( ; I != E ; ++I) {
Ted Kremeneka2e77b42010-01-27 23:43:25 +000098 char c = *I;
Ted Kremeneka2e77b42010-01-27 23:43:25 +000099 if (c == '\0') {
100 // Detect spurious null characters, which are likely errors.
101 H.HandleNullChar(I);
102 return true;
103 }
104 if (c == '%') {
Ted Kremenekb5c98ef2010-01-28 23:56:52 +0000105 Start = I++; // Record the start of the format specifier.
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000106 break;
107 }
108 }
109
110 // No format specifier found?
111 if (!Start)
112 return false;
113
114 if (I == E) {
115 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000116 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000117 return true;
118 }
119
120 FormatSpecifier FS;
121
122 // Look for flags (if any).
123 bool hasMore = true;
124 for ( ; I != E; ++I) {
125 switch (*I) {
126 default: hasMore = false; break;
127 case '-': FS.setIsLeftJustified(); break;
128 case '+': FS.setHasPlusPrefix(); break;
129 case ' ': FS.setHasSpacePrefix(); break;
130 case '#': FS.setHasAlternativeForm(); break;
131 case '0': FS.setHasLeadingZeros(); break;
132 }
133 if (!hasMore)
134 break;
135 }
136
137 if (I == E) {
138 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000139 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000140 return true;
141 }
142
143 // Look for the field width (if any).
144 FS.setFieldWidth(ParseAmount(I, E));
145
146 if (I == E) {
147 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000148 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000149 return true;
150 }
151
152 // Look for the precision (if any).
153 if (*I == '.') {
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000154 ++I;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000155 if (I == E) {
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000156 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000157 return true;
158 }
159
160 FS.setPrecision(ParseAmount(I, E));
161
162 if (I == E) {
163 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000164 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000165 return true;
166 }
167 }
168
169 // Look for the length modifier.
170 LengthModifier lm = None;
171 switch (*I) {
172 default:
173 break;
174 case 'h':
175 ++I;
176 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
177 break;
178 case 'l':
179 ++I;
180 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
181 break;
182 case 'j': lm = AsIntMax; ++I; break;
183 case 'z': lm = AsSizeT; ++I; break;
184 case 't': lm = AsPtrDiff; ++I; break;
185 case 'L': lm = AsLongDouble; ++I; break;
186 }
187 FS.setLengthModifier(lm);
188
189 if (I == E) {
190 // No more characters left?
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000191 H.HandleIncompleteFormatSpecifier(Start, E - Start);
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000192 return true;
193 }
Ted Kremenek23a71a12010-01-29 20:29:53 +0000194
195 if (*I == '\0') {
196 // Detect spurious null characters, which are likely errors.
197 H.HandleNullChar(I);
198 return true;
199 }
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000200
201 // Finally, look for the conversion specifier.
Ted Kremenekfee0e962010-01-28 02:46:17 +0000202 const char *conversionPosition = I++;
Ted Kremenek94af5752010-01-29 02:40:24 +0000203 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
Ted Kremenekfee0e962010-01-28 02:46:17 +0000204 switch (*conversionPosition) {
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000205 default:
Ted Kremenek94af5752010-01-29 02:40:24 +0000206 break;
Ted Kremenekc06ead62010-01-28 00:55:28 +0000207 // C99: 7.19.6.1 (section 8).
Ted Kremenekfee0e962010-01-28 02:46:17 +0000208 case 'd': k = ConversionSpecifier::dArg; break;
209 case 'i': k = ConversionSpecifier::iArg; break;
210 case 'o': k = ConversionSpecifier::oArg; break;
211 case 'u': k = ConversionSpecifier::uArg; break;
212 case 'x': k = ConversionSpecifier::xArg; break;
213 case 'X': k = ConversionSpecifier::XArg; break;
214 case 'f': k = ConversionSpecifier::fArg; break;
215 case 'F': k = ConversionSpecifier::FArg; break;
216 case 'e': k = ConversionSpecifier::eArg; break;
217 case 'E': k = ConversionSpecifier::EArg; break;
218 case 'g': k = ConversionSpecifier::gArg; break;
219 case 'G': k = ConversionSpecifier::GArg; break;
220 case 'a': k = ConversionSpecifier::aArg; break;
221 case 'A': k = ConversionSpecifier::AArg; break;
222 case 'c': k = ConversionSpecifier::IntAsCharArg; break;
223 case 's': k = ConversionSpecifier::CStrArg; break;
224 case 'p': k = ConversionSpecifier::VoidPtrArg; break;
225 case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
226 case '%': k = ConversionSpecifier::PercentArg; break;
Ted Kremenekc06ead62010-01-28 00:55:28 +0000227 // Objective-C.
Ted Kremenek23a71a12010-01-29 20:29:53 +0000228 case '@': k = ConversionSpecifier::ObjCObjArg; break;
229 // Glibc specific.
230 case 'm': k = ConversionSpecifier::PrintErrno; break;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000231 }
Ted Kremenekfee0e962010-01-28 02:46:17 +0000232 FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
Ted Kremenek94af5752010-01-29 02:40:24 +0000233
234 if (k == ConversionSpecifier::InvalidSpecifier) {
235 H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
236 return false; // Keep processing format specifiers.
237 }
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000238 return FormatSpecifierResult(Start, FS);
239}
240
Ted Kremenekc22f78d2010-01-29 03:16:21 +0000241bool clang::ParseFormatString(FormatStringHandler &H,
Ted Kremenekfee0e962010-01-28 02:46:17 +0000242 const char *I, const char *E) {
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000243 // Keep looking for a format specifier until we have exhausted the string.
244 while (I != E) {
245 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
Ted Kremenek94af5752010-01-29 02:40:24 +0000246 // Did a fail-stop error of any kind occur when parsing the specifier?
247 // If so, don't do any more processing.
248 if (FSR.shouldStop())
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000249 return true;;
Ted Kremenek94af5752010-01-29 02:40:24 +0000250 // Did we exhaust the string or encounter an error that
251 // we can recover from?
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000252 if (!FSR.hasValue())
Ted Kremenek94af5752010-01-29 02:40:24 +0000253 continue;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000254 // We have a format specifier. Pass it to the callback.
Ted Kremenekfee0e962010-01-28 02:46:17 +0000255 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
256 I - FSR.getStart()))
Ted Kremenek23a71a12010-01-29 20:29:53 +0000257 return true;
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000258 }
259 assert(I == E && "Format string not exhausted");
260 return false;
261}
Ted Kremeneka2e77b42010-01-27 23:43:25 +0000262
263FormatStringHandler::~FormatStringHandler() {}