blob: dcd18db16ced601c3f1005a4d978aaa2c125c1ca [file] [log] [blame]
Ted Kremenek02087932010-07-16 02:11:22 +00001// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Shared details for processing format strings of printf and scanf
11// (and friends).
12//
13//===----------------------------------------------------------------------===//
14
15#include "FormatStringParsing.h"
16
17using clang::analyze_format_string::ArgTypeResult;
18using clang::analyze_format_string::FormatStringHandler;
19using clang::analyze_format_string::FormatSpecifier;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::PositionContext;
23using namespace clang;
24
25// Key function to FormatStringHandler.
26FormatStringHandler::~FormatStringHandler() {}
27
28//===----------------------------------------------------------------------===//
29// Functions for parsing format strings components in both printf and
30// scanf format strings.
31//===----------------------------------------------------------------------===//
32
33OptionalAmount
34clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
35 const char *I = Beg;
36 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37
38 unsigned accumulator = 0;
39 bool hasDigits = false;
40
41 for ( ; I != E; ++I) {
42 char c = *I;
43 if (c >= '0' && c <= '9') {
44 hasDigits = true;
45 accumulator = (accumulator * 10) + (c - '0');
46 continue;
47 }
48
49 if (hasDigits)
50 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
51 false);
52
53 break;
54 }
55
56 return OptionalAmount();
57}
58
59OptionalAmount
60clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
61 const char *E,
62 unsigned &argIndex) {
63 if (*Beg == '*') {
64 ++Beg;
65 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
66 }
67
68 return ParseAmount(Beg, E);
69}
70
71OptionalAmount
72clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
73 const char *Start,
74 const char *&Beg,
75 const char *E,
76 PositionContext p) {
77 if (*Beg == '*') {
78 const char *I = Beg + 1;
79 const OptionalAmount &Amt = ParseAmount(I, E);
80
81 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
82 H.HandleInvalidPosition(Beg, I - Beg, p);
83 return OptionalAmount(false);
84 }
85
86 if (I == E) {
87 // No more characters left?
88 H.HandleIncompleteSpecifier(Start, E - Start);
89 return OptionalAmount(false);
90 }
91
92 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
93
94 if (*I == '$') {
95 // Handle positional arguments
96
97 // Special case: '*0$', since this is an easy mistake.
98 if (Amt.getConstantAmount() == 0) {
99 H.HandleZeroPosition(Beg, I - Beg + 1);
100 return OptionalAmount(false);
101 }
102
103 const char *Tmp = Beg;
104 Beg = ++I;
105
106 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
107 Tmp, 0, true);
108 }
109
110 H.HandleInvalidPosition(Beg, I - Beg, p);
111 return OptionalAmount(false);
112 }
113
114 return ParseAmount(Beg, E);
115}
116
117
118bool
119clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
120 FormatSpecifier &CS,
121 const char *Start,
122 const char *&Beg, const char *E,
123 unsigned *argIndex) {
124 // FIXME: Support negative field widths.
125 if (argIndex) {
126 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
127 }
128 else {
129 const OptionalAmount Amt =
130 ParsePositionAmount(H, Start, Beg, E,
131 analyze_format_string::FieldWidthPos);
132
133 if (Amt.isInvalid())
134 return true;
135 CS.setFieldWidth(Amt);
136 }
137 return false;
138}
139
140bool
141clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
142 FormatSpecifier &FS,
143 const char *Start,
144 const char *&Beg,
145 const char *E) {
146 const char *I = Beg;
147
148 const OptionalAmount &Amt = ParseAmount(I, E);
149
150 if (I == E) {
151 // No more characters left?
152 H.HandleIncompleteSpecifier(Start, E - Start);
153 return true;
154 }
155
156 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
157 // Special case: '%0$', since this is an easy mistake.
158 if (Amt.getConstantAmount() == 0) {
159 H.HandleZeroPosition(Start, I - Start);
160 return true;
161 }
162
163 FS.setArgIndex(Amt.getConstantAmount() - 1);
164 FS.setUsesPositionalArg();
165 // Update the caller's pointer if we decided to consume
166 // these characters.
167 Beg = I;
168 return false;
169 }
170
171 return false;
172}
173
174bool
175clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
176 const char *&I,
177 const char *E) {
178 LengthModifier::Kind lmKind = LengthModifier::None;
179 const char *lmPosition = I;
180 switch (*I) {
181 default:
182 return false;
183 case 'h':
184 ++I;
185 lmKind = (I != E && *I == 'h') ?
186 ++I, LengthModifier::AsChar : LengthModifier::AsShort;
187 break;
188 case 'l':
189 ++I;
190 lmKind = (I != E && *I == 'l') ?
191 ++I, LengthModifier::AsLongLong : LengthModifier::AsLong;
192 break;
193 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break;
194 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break;
195 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break;
196 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
197 case 'q': lmKind = LengthModifier::AsLongLong; ++I; break;
198 }
199 LengthModifier lm(lmPosition, lmKind);
200 FS.setLengthModifier(lm);
201 return true;
202}
203
204//===----------------------------------------------------------------------===//
205// Methods on ArgTypeResult.
206//===----------------------------------------------------------------------===//
207
208bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
209 switch (K) {
210 case InvalidTy:
211 assert(false && "ArgTypeResult must be valid");
212 return true;
213
214 case UnknownTy:
215 return true;
216
217 case SpecificTy: {
218 argTy = C.getCanonicalType(argTy).getUnqualifiedType();
219 if (T == argTy)
220 return true;
221 if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
222 switch (BT->getKind()) {
223 default:
224 break;
225 case BuiltinType::Char_S:
226 case BuiltinType::SChar:
227 return T == C.UnsignedCharTy;
228 case BuiltinType::Char_U:
229 case BuiltinType::UChar:
230 return T == C.SignedCharTy;
231 case BuiltinType::Short:
232 return T == C.UnsignedShortTy;
233 case BuiltinType::UShort:
234 return T == C.ShortTy;
235 case BuiltinType::Int:
236 return T == C.UnsignedIntTy;
237 case BuiltinType::UInt:
238 return T == C.IntTy;
239 case BuiltinType::Long:
240 return T == C.UnsignedLongTy;
241 case BuiltinType::ULong:
242 return T == C.LongTy;
243 case BuiltinType::LongLong:
244 return T == C.UnsignedLongLongTy;
245 case BuiltinType::ULongLong:
246 return T == C.LongLongTy;
247 }
248 return false;
249 }
250
251 case CStrTy: {
252 const PointerType *PT = argTy->getAs<PointerType>();
253 if (!PT)
254 return false;
255 QualType pointeeTy = PT->getPointeeType();
256 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
257 switch (BT->getKind()) {
258 case BuiltinType::Void:
259 case BuiltinType::Char_U:
260 case BuiltinType::UChar:
261 case BuiltinType::Char_S:
262 case BuiltinType::SChar:
263 return true;
264 default:
265 break;
266 }
267
268 return false;
269 }
270
271 case WCStrTy: {
272 const PointerType *PT = argTy->getAs<PointerType>();
273 if (!PT)
274 return false;
275 QualType pointeeTy =
276 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
277 return pointeeTy == C.getWCharType();
278 }
279
280 case CPointerTy:
281 return argTy->getAs<PointerType>() != NULL ||
282 argTy->getAs<ObjCObjectPointerType>() != NULL;
283
284 case ObjCPointerTy:
285 return argTy->getAs<ObjCObjectPointerType>() != NULL;
286 }
287
288 // FIXME: Should be unreachable, but Clang is currently emitting
289 // a warning.
290 return false;
291}
292
293QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
294 switch (K) {
295 case InvalidTy:
296 assert(false && "No representative type for Invalid ArgTypeResult");
297 // Fall-through.
298 case UnknownTy:
299 return QualType();
300 case SpecificTy:
301 return T;
302 case CStrTy:
303 return C.getPointerType(C.CharTy);
304 case WCStrTy:
305 return C.getPointerType(C.getWCharType());
306 case ObjCPointerTy:
307 return C.ObjCBuiltinIdTy;
308 case CPointerTy:
309 return C.VoidPtrTy;
310 }
311
312 // FIXME: Should be unreachable, but Clang is currently emitting
313 // a warning.
314 return QualType();
315}
316
317//===----------------------------------------------------------------------===//
318// Methods on OptionalAmount.
319//===----------------------------------------------------------------------===//
320
321ArgTypeResult
322analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
323 return Ctx.IntTy;
324}
325
326//===----------------------------------------------------------------------===//
327// Methods on LengthModifier.
328//===----------------------------------------------------------------------===//
329
330const char *
331analyze_format_string::LengthModifier::toString() const {
332 switch (kind) {
333 case AsChar:
334 return "hh";
335 case AsShort:
336 return "h";
337 case AsLong: // or AsWideChar
338 return "l";
339 case AsLongLong:
340 return "ll";
341 case AsIntMax:
342 return "j";
343 case AsSizeT:
344 return "z";
345 case AsPtrDiff:
346 return "t";
347 case AsLongDouble:
348 return "L";
349 case None:
350 return "";
351 }
352 return NULL;
353}
354
355//===----------------------------------------------------------------------===//
356// Methods on OptionalAmount.
357//===----------------------------------------------------------------------===//
358
359void
360analyze_format_string::OptionalAmount::toString(llvm::raw_ostream &os) const {
361 switch (hs) {
362 case Invalid:
363 case NotSpecified:
364 return;
365 case Arg:
366 if (UsesDotPrefix)
367 os << ".";
368 if (usesPositionalArg())
369 os << "*" << getPositionalArgIndex() << "$";
370 else
371 os << "*";
372 break;
373 case Constant:
374 if (UsesDotPrefix)
375 os << ".";
376 os << amt;
377 break;
378 }
379}
380