blob: 07970cfda78387f02b127df713dbe47980d54637 [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Anna Zaks9b0c7492012-01-18 02:45:07 +000023#include "clang/Basic/Builtins.h"
Anna Zaks1fb826a2012-01-12 02:22:34 +000024#include <climits>
Anna Zaksdf18c5a2011-11-16 19:58:13 +000025
26using namespace clang;
27using namespace ento;
28
29namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000030class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000031 check::PreStmt<CallExpr> > {
32public:
Anna Zaks8568ee72012-01-14 02:48:40 +000033 static void *getTag() { static int Tag; return &Tag; }
34
35 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
36 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
37
38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000039
Anna Zaks9ffbe242011-12-17 00:26:34 +000040private:
Anna Zaks8568ee72012-01-14 02:48:40 +000041 static const unsigned ReturnValueIndex = UINT_MAX;
Anna Zaks022b3f42012-01-17 00:37:02 +000042 static const unsigned InvalidArgIndex = UINT_MAX - 1;
Anna Zaks8568ee72012-01-14 02:48:40 +000043
Anna Zaks8f4caf52011-11-18 02:26:36 +000044 mutable llvm::OwningPtr<BugType> BT;
Anna Zaks9b0c7492012-01-18 02:45:07 +000045 inline void initBugType() const {
46 if (!BT)
47 BT.reset(new BugType("Taint Analysis", "General"));
48 }
Anna Zaks8f4caf52011-11-18 02:26:36 +000049
Anna Zaks1fb826a2012-01-12 02:22:34 +000050 /// \brief Catch taint related bugs. Check if tainted data is passed to a
51 /// system call etc.
Anna Zaks9f03b622012-01-07 02:33:10 +000052 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
53
Anna Zaks1fb826a2012-01-12 02:22:34 +000054 /// \brief Add taint sources on a pre-visit.
55 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
56
57 /// \brief Propagate taint generated at pre-visit.
58 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
59
60 /// \brief Add taint sources on a post visit.
61 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
62
63 /// \brief Given a pointer argument, get the symbol of the value it contains
Anna Zaks8f4caf52011-11-18 02:26:36 +000064 /// (points to).
65 SymbolRef getPointedToSymbol(CheckerContext &C,
Anna Zaks9ffbe242011-12-17 00:26:34 +000066 const Expr *Arg,
Anna Zaks1fb826a2012-01-12 02:22:34 +000067 bool IssueWarning = false) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000068
Anna Zaks022b3f42012-01-17 00:37:02 +000069 inline bool isTaintedOrPointsToTainted(const Expr *E,
70 const ProgramState *State,
71 CheckerContext &C) const {
72 return (State->isTainted(E, C.getLocationContext()) ||
73 (E->getType().getTypePtr()->isPointerType() &&
74 State->isTainted(getPointedToSymbol(C, E))));
75 }
76
Anna Zaks9ffbe242011-12-17 00:26:34 +000077 /// Functions defining the attack surface.
78 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
79 CheckerContext &C) const;
80 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000081 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000082
83 /// Taint the scanned input if the file is tainted.
84 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000085
Anna Zaksd3d85482011-12-16 18:28:50 +000086 /// Check if the region the expression evaluates to is the standard input,
87 /// and thus, is tainted.
Anna Zaksefd69892011-12-14 00:56:18 +000088 bool isStdin(const Expr *E, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000089
Anna Zaks9f03b622012-01-07 02:33:10 +000090 /// Check for CWE-134: Uncontrolled Format String.
Anna Zaks8568ee72012-01-14 02:48:40 +000091 static const char MsgUncontrolledFormatString[];
Anna Zaks9f03b622012-01-07 02:33:10 +000092 bool checkUncontrolledFormatString(const CallExpr *CE,
93 CheckerContext &C) const;
94
Anna Zaks8568ee72012-01-14 02:48:40 +000095 /// Check for:
96 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
97 /// CWE-78, "Failure to Sanitize Data into an OS Command"
98 static const char MsgSanitizeSystemArgs[];
99 bool checkSystemCall(const CallExpr *CE, StringRef Name,
100 CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000101
Anna Zaks4e462212012-01-18 02:45:11 +0000102 /// Check if tainted data is used as a buffer size ins strn.. functions,
103 /// and allocators.
104 static const char MsgTaintedBufferSize[];
105 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
106 CheckerContext &C) const;
107
Anna Zaks8568ee72012-01-14 02:48:40 +0000108 /// Generate a report if the expression is tainted or points to tainted data.
109 bool generateReportIfTainted(const Expr *E, const char Msg[],
110 CheckerContext &C) const;
Anna Zaks022b3f42012-01-17 00:37:02 +0000111
112
113 typedef llvm::SmallVector<unsigned, 2> ArgVector;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000114
Anna Zaks022b3f42012-01-17 00:37:02 +0000115 /// \brief A struct used to specify taint propagation rules for a function.
116 ///
117 /// If any of the possible taint source arguments is tainted, all of the
118 /// destination arguments should also be tainted. Use InvalidArgIndex in the
119 /// src list to specify that all of the arguments can introduce taint. Use
120 /// InvalidArgIndex in the dst arguments to signify that all the non-const
121 /// pointer and reference arguments might be tainted on return. If
122 /// ReturnValueIndex is added to the dst list, the return value will be
123 /// tainted.
124 struct TaintPropagationRule {
125 /// List of arguments which can be taint sources and should be checked.
126 ArgVector SrcArgs;
127 /// List of arguments which should be tainted on function return.
128 ArgVector DstArgs;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000129 // TODO: Check if using other data structures would be more optimal.
Anna Zaks022b3f42012-01-17 00:37:02 +0000130
131 TaintPropagationRule() {}
132
Anna Zaks9b0c7492012-01-18 02:45:07 +0000133 TaintPropagationRule(unsigned SArg,
134 unsigned DArg, bool TaintRet = false) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000135 SrcArgs.push_back(SArg);
136 DstArgs.push_back(DArg);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000137 if (TaintRet)
138 DstArgs.push_back(ReturnValueIndex);
Anna Zaks022b3f42012-01-17 00:37:02 +0000139 }
140
Anna Zaks9b0c7492012-01-18 02:45:07 +0000141 TaintPropagationRule(unsigned SArg1, unsigned SArg2,
142 unsigned DArg, bool TaintRet = false) {
143 SrcArgs.push_back(SArg1);
144 SrcArgs.push_back(SArg2);
145 DstArgs.push_back(DArg);
146 if (TaintRet)
147 DstArgs.push_back(ReturnValueIndex);
148 }
149
150 /// Get the propagation rule for a given function.
151 static TaintPropagationRule
152 getTaintPropagationRule(const FunctionDecl *FDecl,
153 StringRef Name,
154 CheckerContext &C);
155
Anna Zaks022b3f42012-01-17 00:37:02 +0000156 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
157 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
158
Anna Zaks9b0c7492012-01-18 02:45:07 +0000159 inline bool isNull() const { return SrcArgs.empty(); }
160
161 inline bool isDestinationArgument(unsigned ArgNum) const {
162 return (std::find(DstArgs.begin(),
163 DstArgs.end(), ArgNum) != DstArgs.end());
164 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000165 };
166
167 /// \brief Pre-process a function which propagates taint according to the
168 /// given taint rule.
169 const ProgramState *prePropagateTaint(const CallExpr *CE,
170 CheckerContext &C,
171 const TaintPropagationRule PR) const;
172
173
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000174};
Anna Zaks9b0c7492012-01-18 02:45:07 +0000175
176const unsigned GenericTaintChecker::ReturnValueIndex;
177const unsigned GenericTaintChecker::InvalidArgIndex;
178
Anna Zaks8568ee72012-01-14 02:48:40 +0000179const char GenericTaintChecker::MsgUncontrolledFormatString[] =
180 "Tainted format string (CWE-134: Uncontrolled Format String)";
181
182const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
183 "Tainted data passed to a system call "
184 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
Anna Zaks4e462212012-01-18 02:45:11 +0000185
186const char GenericTaintChecker::MsgTaintedBufferSize[] =
187 "Tainted data is used to specify the buffer size "
188 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
189 "character data and the null terminator)";
190
191} // end of anonymous namespace
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000192
Anna Zaks1fb826a2012-01-12 02:22:34 +0000193/// A set which is used to pass information from call pre-visit instruction
194/// to the call post-visit. The values are unsigned integers, which are either
195/// ReturnValueIndex, or indexes of the pointer/reference argument, which
196/// points to data, which should be tainted on return.
197namespace { struct TaintArgsOnPostVisit{}; }
198namespace clang { namespace ento {
199template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
200 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
201 static void *GDMIndex() { return GenericTaintChecker::getTag(); }
202};
203}}
Anna Zaks9ffbe242011-12-17 00:26:34 +0000204
Anna Zaks9b0c7492012-01-18 02:45:07 +0000205GenericTaintChecker::TaintPropagationRule
206GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
207 const FunctionDecl *FDecl,
208 StringRef Name,
209 CheckerContext &C) {
210 // Check for exact name match for functions without builtin substitutes.
211 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
212 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
213 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
214 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
215 .Default(TaintPropagationRule());
216
217 if (!Rule.isNull())
218 return Rule;
219
220 // Check if it's one of the memory setting/copying functions.
221 // This check is specialized but faster then calling isCLibraryFunction.
222 unsigned BId = 0;
223 if ( (BId = FDecl->getMemoryFunctionKind()) )
224 switch(BId) {
225 case Builtin::BImemcpy:
226 case Builtin::BImemmove:
227 case Builtin::BIstrncpy:
228 case Builtin::BIstrncat:
229 return TaintPropagationRule(1, 2, 0, true);
230 break;
231 case Builtin::BIstrlcpy:
232 case Builtin::BIstrlcat:
233 return TaintPropagationRule(1, 2, 0, false);
234 break;
235 case Builtin::BIstrndup:
236 return TaintPropagationRule(0, 1, ReturnValueIndex);
237 break;
238
239 default:
240 break;
241 };
242
243 // Process all other functions which could be defined as builtins.
244 if (Rule.isNull()) {
245 if (C.isCLibraryFunction(FDecl, "snprintf") ||
246 C.isCLibraryFunction(FDecl, "sprintf"))
247 return TaintPropagationRule(InvalidArgIndex, 0, true);
248 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
249 C.isCLibraryFunction(FDecl, "stpcpy") ||
250 C.isCLibraryFunction(FDecl, "strcat"))
251 return TaintPropagationRule(1, 0, true);
252 else if (C.isCLibraryFunction(FDecl, "bcopy"))
253 return TaintPropagationRule(0, 2, 1, false);
254 else if (C.isCLibraryFunction(FDecl, "strdup") ||
255 C.isCLibraryFunction(FDecl, "strdupa"))
256 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks4e462212012-01-18 02:45:11 +0000257 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
258 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000259 }
260
261 // Skipping the following functions, since they might be used for cleansing
262 // or smart memory copy:
263 // - memccpy - copying untill hitting a special character.
264
265 return TaintPropagationRule();
Anna Zaks8f4caf52011-11-18 02:26:36 +0000266}
267
Anna Zaks9ffbe242011-12-17 00:26:34 +0000268void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
269 CheckerContext &C) const {
Anna Zaks9f03b622012-01-07 02:33:10 +0000270 // Check for errors first.
271 if (checkPre(CE, C))
272 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000273
Anna Zaks9f03b622012-01-07 02:33:10 +0000274 // Add taint second.
Anna Zaks1fb826a2012-01-12 02:22:34 +0000275 addSourcesPre(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000276}
277
278void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
279 CheckerContext &C) const {
Anna Zaks1fb826a2012-01-12 02:22:34 +0000280 if (propagateFromPre(CE, C))
281 return;
282 addSourcesPost(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000283}
284
Anna Zaks1fb826a2012-01-12 02:22:34 +0000285void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
286 CheckerContext &C) const {
Anna Zaks9b0c7492012-01-18 02:45:07 +0000287 const ProgramState *State = 0;
288 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
289 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000290 if (Name.empty())
291 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000292
Anna Zaks9b0c7492012-01-18 02:45:07 +0000293 // First, try generating a propagation rule for this function.
294 TaintPropagationRule Rule =
295 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
Anna Zaks022b3f42012-01-17 00:37:02 +0000296 if (!Rule.isNull()) {
297 State = prePropagateTaint(CE, C, Rule);
298 if (!State)
299 return;
300 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000301 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000302 }
303
Anna Zaks9b0c7492012-01-18 02:45:07 +0000304 // Otherwise, check if we have custom pre-processing implemented.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000305 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000306 .Case("fscanf", &GenericTaintChecker::preFscanf)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000307 .Default(0);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000308 // Check and evaluate the call.
309 if (evalFunction)
310 State = (this->*evalFunction)(CE, C);
311 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000312 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000313 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000314
Anna Zaks9ffbe242011-12-17 00:26:34 +0000315}
316
Anna Zaks1fb826a2012-01-12 02:22:34 +0000317bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
318 CheckerContext &C) const {
319 const ProgramState *State = C.getState();
320
321 // Depending on what was tainted at pre-visit, we determined a set of
322 // arguments which should be tainted after the function returns. These are
323 // stored in the state as TaintArgsOnPostVisit set.
324 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
325 for (llvm::ImmutableSet<unsigned>::iterator
326 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
327 unsigned ArgNum = *I;
328
329 // Special handling for the tainted return value.
330 if (ArgNum == ReturnValueIndex) {
331 State = State->addTaint(CE, C.getLocationContext());
332 continue;
333 }
334
335 // The arguments are pointer arguments. The data they are pointing at is
336 // tainted after the call.
337 const Expr* Arg = CE->getArg(ArgNum);
338 SymbolRef Sym = getPointedToSymbol(C, Arg, true);
339 if (Sym)
340 State = State->addTaint(Sym);
341 }
342
343 // Clear up the taint info from the state.
344 State = State->remove<TaintArgsOnPostVisit>();
345
346 if (State != C.getState()) {
347 C.addTransition(State);
348 return true;
349 }
350 return false;
351}
352
353void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
354 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000355 // Define the attack surface.
356 // Set the evaluation function by switching on the callee name.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000357 StringRef Name = C.getCalleeName(CE);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000358 if (Name.empty())
359 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000360 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000361 .Case("scanf", &GenericTaintChecker::postScanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000362 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000363 .Case("getchar", &GenericTaintChecker::postRetTaint)
364 .Case("getenv", &GenericTaintChecker::postRetTaint)
365 .Case("fopen", &GenericTaintChecker::postRetTaint)
366 .Case("fdopen", &GenericTaintChecker::postRetTaint)
367 .Case("freopen", &GenericTaintChecker::postRetTaint)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000368 .Default(0);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000369
370 // If the callee isn't defined, it is not of security concern.
371 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000372 const ProgramState *State = 0;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000373 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000374 State = (this->*evalFunction)(CE, C);
375 if (!State)
376 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000377
Anna Zaks9ffbe242011-12-17 00:26:34 +0000378 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000379}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000380
Anna Zaks9f03b622012-01-07 02:33:10 +0000381bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
382
383 if (checkUncontrolledFormatString(CE, C))
384 return true;
385
Anna Zaks4e462212012-01-18 02:45:11 +0000386 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
387 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks8568ee72012-01-14 02:48:40 +0000388 if (Name.empty())
389 return false;
390
391 if (checkSystemCall(CE, Name, C))
392 return true;
393
Anna Zaks4e462212012-01-18 02:45:11 +0000394 if (checkTaintedBufferSize(CE, FDecl, C))
395 return true;
396
Anna Zaks9f03b622012-01-07 02:33:10 +0000397 return false;
398}
399
Anna Zaks8f4caf52011-11-18 02:26:36 +0000400SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
401 const Expr* Arg,
402 bool IssueWarning) const {
403 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000404 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zaksd3d85482011-12-16 18:28:50 +0000405 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000406 return 0;
407
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000408 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks8f4caf52011-11-18 02:26:36 +0000409
410 if (!AddrLoc && !IssueWarning)
411 return 0;
412
413 // If the Expr is not a location, issue a warning.
414 if (!AddrLoc) {
415 assert(IssueWarning);
416 if (ExplodedNode *N = C.generateSink(State)) {
417 initBugType();
418 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
419 report->addRange(Arg->getSourceRange());
420 C.EmitReport(report);
421 }
422 return 0;
423 }
424
Anna Zaks71d29092012-01-13 00:56:51 +0000425 const PointerType *ArgTy =
426 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
427 assert(ArgTy);
428 SVal Val = State->getSVal(*AddrLoc, ArgTy->getPointeeType());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000429 return Val.getAsSymbol();
430}
431
Anna Zaks022b3f42012-01-17 00:37:02 +0000432const ProgramState *
433GenericTaintChecker::prePropagateTaint(const CallExpr *CE,
434 CheckerContext &C,
435 const TaintPropagationRule PR) const {
436 const ProgramState *State = C.getState();
437
438 // Check for taint in arguments.
439 bool IsTainted = false;
440 for (ArgVector::const_iterator I = PR.SrcArgs.begin(),
441 E = PR.SrcArgs.end(); I != E; ++I) {
442 unsigned ArgNum = *I;
443
444 if (ArgNum == InvalidArgIndex) {
Anna Zaks9b0c7492012-01-18 02:45:07 +0000445 // Check if any of the arguments is tainted, but skip the
446 // destination arguments.
447 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
448 if (PR.isDestinationArgument(i))
449 continue;
Anna Zaks022b3f42012-01-17 00:37:02 +0000450 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
451 break;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000452 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000453 break;
454 }
455
456 assert(ArgNum < CE->getNumArgs());
457 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
458 break;
459 }
460 if (!IsTainted)
461 return State;
462
463 // Mark the arguments which should be tainted after the function returns.
464 for (ArgVector::const_iterator I = PR.DstArgs.begin(),
465 E = PR.DstArgs.end(); I != E; ++I) {
466 unsigned ArgNum = *I;
467
468 // Should we mark all arguments as tainted?
469 if (ArgNum == InvalidArgIndex) {
470 // For all pointer and references that were passed in:
471 // If they are not pointing to const data, mark data as tainted.
472 // TODO: So far we are just going one level down; ideally we'd need to
473 // recurse here.
474 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
475 const Expr *Arg = CE->getArg(i);
476 // Process pointer argument.
477 const Type *ArgTy = Arg->getType().getTypePtr();
478 QualType PType = ArgTy->getPointeeType();
479 if ((!PType.isNull() && !PType.isConstQualified())
480 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
481 State = State->add<TaintArgsOnPostVisit>(i);
482 }
483 continue;
484 }
485
486 // Should mark the return value?
487 if (ArgNum == ReturnValueIndex) {
488 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
489 continue;
490 }
491
492 // Mark the given argument.
493 assert(ArgNum < CE->getNumArgs());
494 State = State->add<TaintArgsOnPostVisit>(ArgNum);
495 }
496
497 return State;
498}
499
500
Anna Zaks1fb826a2012-01-12 02:22:34 +0000501// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
502// and arg 1 should get taint.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000503const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
504 CheckerContext &C) const {
505 assert(CE->getNumArgs() >= 2);
506 const ProgramState *State = C.getState();
507
508 // Check is the file descriptor is tainted.
Ted Kremenek5eca4822012-01-06 22:09:28 +0000509 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
Anna Zaks1fb826a2012-01-12 02:22:34 +0000510 isStdin(CE->getArg(0), C)) {
511 // All arguments except for the first two should get taint.
512 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
513 State = State->add<TaintArgsOnPostVisit>(i);
514 return State;
515 }
516
Anna Zaks9ffbe242011-12-17 00:26:34 +0000517 return 0;
518}
519
Anna Zaks9ffbe242011-12-17 00:26:34 +0000520const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
521 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000522 const ProgramState *State = C.getState();
Anna Zaks1009ac72011-12-14 00:56:02 +0000523 assert(CE->getNumArgs() >= 2);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000524 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000525 // All arguments except for the very first one should get taint.
526 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
527 // The arguments are pointer arguments. The data they are pointing at is
528 // tainted after the call.
529 const Expr* Arg = CE->getArg(i);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000530 SymbolRef Sym = getPointedToSymbol(C, Arg, true);
Anna Zaks1009ac72011-12-14 00:56:02 +0000531 if (Sym)
532 State = State->addTaint(Sym);
533 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000534 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000535}
536
Anna Zaks9ffbe242011-12-17 00:26:34 +0000537const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
538 CheckerContext &C) const {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000539 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000540}
541
Anna Zaksefd69892011-12-14 00:56:18 +0000542bool GenericTaintChecker::isStdin(const Expr *E,
543 CheckerContext &C) const {
Anna Zaksd3d85482011-12-16 18:28:50 +0000544 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000545 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaksefd69892011-12-14 00:56:18 +0000546
Anna Zaksd3d85482011-12-16 18:28:50 +0000547 // stdin is a pointer, so it would be a region.
548 const MemRegion *MemReg = Val.getAsRegion();
549
550 // The region should be symbolic, we do not know it's value.
551 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
552 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000553 return false;
554
Anna Zaksd3d85482011-12-16 18:28:50 +0000555 // Get it's symbol and find the declaration region it's pointing to.
556 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
557 if (!Sm)
558 return false;
559 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
560 if (!DeclReg)
561 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000562
Anna Zaksd3d85482011-12-16 18:28:50 +0000563 // This region corresponds to a declaration, find out if it's a global/extern
564 // variable named stdin with the proper type.
565 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
566 D = D->getCanonicalDecl();
567 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
568 if (const PointerType * PtrTy =
569 dyn_cast<PointerType>(D->getType().getTypePtr()))
570 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
571 return true;
572 }
Anna Zaksefd69892011-12-14 00:56:18 +0000573 return false;
574}
575
Anna Zaks9f03b622012-01-07 02:33:10 +0000576static bool getPrintfFormatArgumentNum(const CallExpr *CE,
577 const CheckerContext &C,
578 unsigned int &ArgNum) {
579 // Find if the function contains a format string argument.
580 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
581 // vsnprintf, syslog, custom annotated functions.
582 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
583 if (!FDecl)
584 return false;
585 for (specific_attr_iterator<FormatAttr>
586 i = FDecl->specific_attr_begin<FormatAttr>(),
587 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
588
589 const FormatAttr *Format = *i;
590 ArgNum = Format->getFormatIdx() - 1;
591 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
592 return true;
593 }
594
595 // Or if a function is named setproctitle (this is a heuristic).
596 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
597 ArgNum = 0;
598 return true;
599 }
600
601 return false;
602}
603
Anna Zaks8568ee72012-01-14 02:48:40 +0000604bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
605 const char Msg[],
606 CheckerContext &C) const {
607 assert(E);
608
609 // Check for taint.
610 const ProgramState *State = C.getState();
611 if (!State->isTainted(getPointedToSymbol(C, E)) &&
612 !State->isTainted(E, C.getLocationContext()))
613 return false;
614
615 // Generate diagnostic.
616 if (ExplodedNode *N = C.addTransition()) {
617 initBugType();
618 BugReport *report = new BugReport(*BT, Msg, N);
619 report->addRange(E->getSourceRange());
620 C.EmitReport(report);
621 return true;
622 }
623 return false;
624}
625
Anna Zaks9f03b622012-01-07 02:33:10 +0000626bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
627 CheckerContext &C) const{
628 // Check if the function contains a format string argument.
629 unsigned int ArgNum = 0;
630 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
631 return false;
632
633 // If either the format string content or the pointer itself are tainted, warn.
Anna Zaks8568ee72012-01-14 02:48:40 +0000634 if (generateReportIfTainted(CE->getArg(ArgNum),
635 MsgUncontrolledFormatString, C))
636 return true;
637 return false;
638}
639
640bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
641 StringRef Name,
642 CheckerContext &C) const {
643 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
644 .Case("system", 0)
645 .Case("popen", 0)
646 .Default(UINT_MAX);
647
648 if (ArgNum == UINT_MAX)
649 return false;
650
651 if (generateReportIfTainted(CE->getArg(ArgNum),
652 MsgSanitizeSystemArgs, C))
653 return true;
654
Anna Zaks9f03b622012-01-07 02:33:10 +0000655 return false;
656}
657
Anna Zaks4e462212012-01-18 02:45:11 +0000658// TODO: Should this check be a part of the CString checker?
659// If yes, should taint be a global setting?
660bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
661 const FunctionDecl *FDecl,
662 CheckerContext &C) const {
663 // If the function has a buffer size argument, set ArgNum.
664 unsigned ArgNum = InvalidArgIndex;
665 unsigned BId = 0;
666 if ( (BId = FDecl->getMemoryFunctionKind()) )
667 switch(BId) {
668 case Builtin::BImemcpy:
669 case Builtin::BImemmove:
670 case Builtin::BIstrncpy:
671 ArgNum = 2;
672 break;
673 case Builtin::BIstrndup:
674 ArgNum = 1;
675 break;
676 default:
677 break;
678 };
679
680 if (ArgNum == InvalidArgIndex) {
681 if (C.isCLibraryFunction(FDecl, "malloc") ||
682 C.isCLibraryFunction(FDecl, "calloc") ||
683 C.isCLibraryFunction(FDecl, "alloca"))
684 ArgNum = 0;
685 else if (C.isCLibraryFunction(FDecl, "memccpy"))
686 ArgNum = 3;
687 else if (C.isCLibraryFunction(FDecl, "realloc"))
688 ArgNum = 1;
689 else if (C.isCLibraryFunction(FDecl, "bcopy"))
690 ArgNum = 2;
691 }
692
693 if (ArgNum != InvalidArgIndex &&
694 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
695 return true;
696
697 return false;
698}
699
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000700void ento::registerGenericTaintChecker(CheckerManager &mgr) {
701 mgr.registerChecker<GenericTaintChecker>();
702}