blob: 1fa4faea18073cce67e3760463e27fa4b46159e8 [file] [log] [blame]
Ted Kremeneka1ef09402010-07-16 02:11:31 +00001//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends. The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_scanf::ConversionSpecifier;
23using clang::analyze_scanf::ScanfSpecifier;
24
25typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
26 ScanfSpecifierResult;
27
28static bool ParseScanList(FormatStringHandler &H,
29 ConversionSpecifier &CS,
30 const char *&Beg, const char *E) {
31 const char *I = Beg;
32 const char *start = I - 1;
33 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
34
35 // No more characters?
36 if (I == E) {
37 H.HandleIncompleteScanList(start, I);
38 return true;
39 }
40
41 // Special case: ']' is the first character.
42 if (*I == ']') {
43 if (++I == E) {
44 H.HandleIncompleteScanList(start, I);
45 return true;
46 }
47 }
48
49 // Look for a ']' character which denotes the end of the scan list.
50 while (*I != ']') {
51 if (++I == E) {
52 H.HandleIncompleteScanList(start, I);
53 return true;
54 }
55 }
56
57 CS.setEndScanList(I);
58 return false;
59}
60
61// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
62// We can possibly refactor.
63static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
64 const char *&Beg,
65 const char *E,
66 unsigned &argIndex) {
67
68 using namespace clang::analyze_scanf;
69 const char *I = Beg;
70 const char *Start = 0;
71 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
72
73 // Look for a '%' character that indicates the start of a format specifier.
74 for ( ; I != E ; ++I) {
75 char c = *I;
76 if (c == '\0') {
77 // Detect spurious null characters, which are likely errors.
78 H.HandleNullChar(I);
79 return true;
80 }
81 if (c == '%') {
82 Start = I++; // Record the start of the format specifier.
83 break;
84 }
85 }
86
87 // No format specifier found?
88 if (!Start)
89 return false;
90
91 if (I == E) {
92 // No more characters left?
93 H.HandleIncompleteSpecifier(Start, E - Start);
94 return true;
95 }
96
97 ScanfSpecifier FS;
98 if (ParseArgPosition(H, FS, Start, I, E))
99 return true;
100
101 if (I == E) {
102 // No more characters left?
103 H.HandleIncompleteSpecifier(Start, E - Start);
104 return true;
105 }
106
107 // Look for '*' flag if it is present.
108 if (*I == '*') {
109 FS.setSuppressAssignment(I);
110 if (++I == E) {
111 H.HandleIncompleteSpecifier(Start, E - Start);
112 return true;
113 }
114 }
115
116 // Look for the field width (if any). Unlike printf, this is either
117 // a fixed integer or isn't present.
118 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
119 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
120 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
121 FS.setFieldWidth(Amt);
122
123 if (I == E) {
124 // No more characters left?
125 H.HandleIncompleteSpecifier(Start, E - Start);
126 return true;
127 }
128 }
129
130 // Look for the length modifier.
131 if (ParseLengthModifier(FS, I, E) && I == E) {
132 // No more characters left?
133 H.HandleIncompleteSpecifier(Start, E - Start);
134 return true;
135 }
136
137 // Detect spurious null characters, which are likely errors.
138 if (*I == '\0') {
139 H.HandleNullChar(I);
140 return true;
141 }
142
143 // Finally, look for the conversion specifier.
144 const char *conversionPosition = I++;
145 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
146 switch (*conversionPosition) {
147 default:
148 break;
149 case '%': k = ConversionSpecifier::PercentArg; break;
150 case 'A': k = ConversionSpecifier::AArg; break;
151 case 'E': k = ConversionSpecifier::EArg; break;
152 case 'F': k = ConversionSpecifier::FArg; break;
153 case 'G': k = ConversionSpecifier::GArg; break;
154 case 'X': k = ConversionSpecifier::XArg; break;
155 case 'a': k = ConversionSpecifier::aArg; break;
156 case 'd': k = ConversionSpecifier::dArg; break;
157 case 'e': k = ConversionSpecifier::eArg; break;
158 case 'f': k = ConversionSpecifier::fArg; break;
159 case 'g': k = ConversionSpecifier::gArg; break;
160 case 'i': k = ConversionSpecifier::iArg; break;
161 case 'n': k = ConversionSpecifier::ConsumedSoFarArg; break;
162 case 'c': k = ConversionSpecifier::cArg; break;
163 case 'C': k = ConversionSpecifier::CArg; break;
164 case 'S': k = ConversionSpecifier::SArg; break;
165 case '[': k = ConversionSpecifier::ScanListArg; break;
166 }
167 ConversionSpecifier CS(conversionPosition, k);
168 if (k == ConversionSpecifier::ScanListArg) {
169 if (!ParseScanList(H, CS, I, E))
170 return true;
171 }
172 FS.setConversionSpecifier(CS);
173 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
174 && !FS.usesPositionalArg())
175 FS.setArgIndex(argIndex++);
176
177 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
178 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
179
180 if (k == ConversionSpecifier::InvalidSpecifier) {
181 // Assume the conversion takes one argument.
182 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
183 }
184 return ScanfSpecifierResult(Start, FS);
185}
186
187bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
188 const char *I,
189 const char *E) {
190
191 unsigned argIndex = 0;
192
193 // Keep looking for a format specifier until we have exhausted the string.
194 while (I != E) {
195 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex);
196 // Did a fail-stop error of any kind occur when parsing the specifier?
197 // If so, don't do any more processing.
198 if (FSR.shouldStop())
199 return true;;
200 // Did we exhaust the string or encounter an error that
201 // we can recover from?
202 if (!FSR.hasValue())
203 continue;
204 // We have a format specifier. Pass it to the callback.
205 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
206 I - FSR.getStart())) {
207 return true;
208 }
209 }
210 assert(I == E && "Format string not exhausted");
211 return false;
212}
213
214