blob: 52e01b619c15aab0ab13822dace5f20f278cca0c [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Anna Zaks9b0c7492012-01-18 02:45:07 +000023#include "clang/Basic/Builtins.h"
Anna Zaks1fb826a2012-01-12 02:22:34 +000024#include <climits>
Anna Zaksdf18c5a2011-11-16 19:58:13 +000025
26using namespace clang;
27using namespace ento;
28
29namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000030class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000031 check::PreStmt<CallExpr> > {
32public:
Anna Zaks8568ee72012-01-14 02:48:40 +000033 static void *getTag() { static int Tag; return &Tag; }
34
35 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
36 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
37
38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000039
Anna Zaks9ffbe242011-12-17 00:26:34 +000040private:
Anna Zaks8568ee72012-01-14 02:48:40 +000041 static const unsigned ReturnValueIndex = UINT_MAX;
Anna Zaks022b3f42012-01-17 00:37:02 +000042 static const unsigned InvalidArgIndex = UINT_MAX - 1;
Anna Zaks8568ee72012-01-14 02:48:40 +000043
Anna Zaks8f4caf52011-11-18 02:26:36 +000044 mutable llvm::OwningPtr<BugType> BT;
Anna Zaks9b0c7492012-01-18 02:45:07 +000045 inline void initBugType() const {
46 if (!BT)
47 BT.reset(new BugType("Taint Analysis", "General"));
48 }
Anna Zaks8f4caf52011-11-18 02:26:36 +000049
Anna Zaks1fb826a2012-01-12 02:22:34 +000050 /// \brief Catch taint related bugs. Check if tainted data is passed to a
51 /// system call etc.
Anna Zaks9f03b622012-01-07 02:33:10 +000052 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
53
Anna Zaks1fb826a2012-01-12 02:22:34 +000054 /// \brief Add taint sources on a pre-visit.
55 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
56
57 /// \brief Propagate taint generated at pre-visit.
58 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
59
60 /// \brief Add taint sources on a post visit.
61 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
62
63 /// \brief Given a pointer argument, get the symbol of the value it contains
Anna Zaks8f4caf52011-11-18 02:26:36 +000064 /// (points to).
65 SymbolRef getPointedToSymbol(CheckerContext &C,
Anna Zaks9ffbe242011-12-17 00:26:34 +000066 const Expr *Arg,
Anna Zaks1fb826a2012-01-12 02:22:34 +000067 bool IssueWarning = false) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000068
Anna Zaks022b3f42012-01-17 00:37:02 +000069 inline bool isTaintedOrPointsToTainted(const Expr *E,
70 const ProgramState *State,
71 CheckerContext &C) const {
72 return (State->isTainted(E, C.getLocationContext()) ||
73 (E->getType().getTypePtr()->isPointerType() &&
74 State->isTainted(getPointedToSymbol(C, E))));
75 }
76
Anna Zaks9ffbe242011-12-17 00:26:34 +000077 /// Functions defining the attack surface.
78 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
79 CheckerContext &C) const;
80 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000081 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000082
83 /// Taint the scanned input if the file is tainted.
84 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000085
Anna Zaksd3d85482011-12-16 18:28:50 +000086 /// Check if the region the expression evaluates to is the standard input,
87 /// and thus, is tainted.
Anna Zaksefd69892011-12-14 00:56:18 +000088 bool isStdin(const Expr *E, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000089
Anna Zaks9f03b622012-01-07 02:33:10 +000090 /// Check for CWE-134: Uncontrolled Format String.
Anna Zaks8568ee72012-01-14 02:48:40 +000091 static const char MsgUncontrolledFormatString[];
Anna Zaks9f03b622012-01-07 02:33:10 +000092 bool checkUncontrolledFormatString(const CallExpr *CE,
93 CheckerContext &C) const;
94
Anna Zaks8568ee72012-01-14 02:48:40 +000095 /// Check for:
96 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
97 /// CWE-78, "Failure to Sanitize Data into an OS Command"
98 static const char MsgSanitizeSystemArgs[];
99 bool checkSystemCall(const CallExpr *CE, StringRef Name,
100 CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000101
Anna Zaks8568ee72012-01-14 02:48:40 +0000102 /// Generate a report if the expression is tainted or points to tainted data.
103 bool generateReportIfTainted(const Expr *E, const char Msg[],
104 CheckerContext &C) const;
Anna Zaks022b3f42012-01-17 00:37:02 +0000105
106
107 typedef llvm::SmallVector<unsigned, 2> ArgVector;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000108
Anna Zaks022b3f42012-01-17 00:37:02 +0000109 /// \brief A struct used to specify taint propagation rules for a function.
110 ///
111 /// If any of the possible taint source arguments is tainted, all of the
112 /// destination arguments should also be tainted. Use InvalidArgIndex in the
113 /// src list to specify that all of the arguments can introduce taint. Use
114 /// InvalidArgIndex in the dst arguments to signify that all the non-const
115 /// pointer and reference arguments might be tainted on return. If
116 /// ReturnValueIndex is added to the dst list, the return value will be
117 /// tainted.
118 struct TaintPropagationRule {
119 /// List of arguments which can be taint sources and should be checked.
120 ArgVector SrcArgs;
121 /// List of arguments which should be tainted on function return.
122 ArgVector DstArgs;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000123 // TODO: Check if using other data structures would be more optimal.
Anna Zaks022b3f42012-01-17 00:37:02 +0000124
125 TaintPropagationRule() {}
126
Anna Zaks9b0c7492012-01-18 02:45:07 +0000127 TaintPropagationRule(unsigned SArg,
128 unsigned DArg, bool TaintRet = false) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000129 SrcArgs.push_back(SArg);
130 DstArgs.push_back(DArg);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000131 if (TaintRet)
132 DstArgs.push_back(ReturnValueIndex);
Anna Zaks022b3f42012-01-17 00:37:02 +0000133 }
134
Anna Zaks9b0c7492012-01-18 02:45:07 +0000135 TaintPropagationRule(unsigned SArg1, unsigned SArg2,
136 unsigned DArg, bool TaintRet = false) {
137 SrcArgs.push_back(SArg1);
138 SrcArgs.push_back(SArg2);
139 DstArgs.push_back(DArg);
140 if (TaintRet)
141 DstArgs.push_back(ReturnValueIndex);
142 }
143
144 /// Get the propagation rule for a given function.
145 static TaintPropagationRule
146 getTaintPropagationRule(const FunctionDecl *FDecl,
147 StringRef Name,
148 CheckerContext &C);
149
Anna Zaks022b3f42012-01-17 00:37:02 +0000150 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
151 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
152
Anna Zaks9b0c7492012-01-18 02:45:07 +0000153 inline bool isNull() const { return SrcArgs.empty(); }
154
155 inline bool isDestinationArgument(unsigned ArgNum) const {
156 return (std::find(DstArgs.begin(),
157 DstArgs.end(), ArgNum) != DstArgs.end());
158 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000159 };
160
161 /// \brief Pre-process a function which propagates taint according to the
162 /// given taint rule.
163 const ProgramState *prePropagateTaint(const CallExpr *CE,
164 CheckerContext &C,
165 const TaintPropagationRule PR) const;
166
167
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000168};
Anna Zaks9b0c7492012-01-18 02:45:07 +0000169
170const unsigned GenericTaintChecker::ReturnValueIndex;
171const unsigned GenericTaintChecker::InvalidArgIndex;
172
Anna Zaks8568ee72012-01-14 02:48:40 +0000173const char GenericTaintChecker::MsgUncontrolledFormatString[] =
174 "Tainted format string (CWE-134: Uncontrolled Format String)";
175
176const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
177 "Tainted data passed to a system call "
178 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000179}
180
Anna Zaks1fb826a2012-01-12 02:22:34 +0000181/// A set which is used to pass information from call pre-visit instruction
182/// to the call post-visit. The values are unsigned integers, which are either
183/// ReturnValueIndex, or indexes of the pointer/reference argument, which
184/// points to data, which should be tainted on return.
185namespace { struct TaintArgsOnPostVisit{}; }
186namespace clang { namespace ento {
187template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
188 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
189 static void *GDMIndex() { return GenericTaintChecker::getTag(); }
190};
191}}
Anna Zaks9ffbe242011-12-17 00:26:34 +0000192
Anna Zaks9b0c7492012-01-18 02:45:07 +0000193GenericTaintChecker::TaintPropagationRule
194GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
195 const FunctionDecl *FDecl,
196 StringRef Name,
197 CheckerContext &C) {
198 // Check for exact name match for functions without builtin substitutes.
199 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
200 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
201 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
202 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
203 .Default(TaintPropagationRule());
204
205 if (!Rule.isNull())
206 return Rule;
207
208 // Check if it's one of the memory setting/copying functions.
209 // This check is specialized but faster then calling isCLibraryFunction.
210 unsigned BId = 0;
211 if ( (BId = FDecl->getMemoryFunctionKind()) )
212 switch(BId) {
213 case Builtin::BImemcpy:
214 case Builtin::BImemmove:
215 case Builtin::BIstrncpy:
216 case Builtin::BIstrncat:
217 return TaintPropagationRule(1, 2, 0, true);
218 break;
219 case Builtin::BIstrlcpy:
220 case Builtin::BIstrlcat:
221 return TaintPropagationRule(1, 2, 0, false);
222 break;
223 case Builtin::BIstrndup:
224 return TaintPropagationRule(0, 1, ReturnValueIndex);
225 break;
226
227 default:
228 break;
229 };
230
231 // Process all other functions which could be defined as builtins.
232 if (Rule.isNull()) {
233 if (C.isCLibraryFunction(FDecl, "snprintf") ||
234 C.isCLibraryFunction(FDecl, "sprintf"))
235 return TaintPropagationRule(InvalidArgIndex, 0, true);
236 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
237 C.isCLibraryFunction(FDecl, "stpcpy") ||
238 C.isCLibraryFunction(FDecl, "strcat"))
239 return TaintPropagationRule(1, 0, true);
240 else if (C.isCLibraryFunction(FDecl, "bcopy"))
241 return TaintPropagationRule(0, 2, 1, false);
242 else if (C.isCLibraryFunction(FDecl, "strdup") ||
243 C.isCLibraryFunction(FDecl, "strdupa"))
244 return TaintPropagationRule(0, ReturnValueIndex);
245 else if (C.isCLibraryFunction(FDecl, "strndupa"))
246 return TaintPropagationRule(0, 1, ReturnValueIndex);
247 }
248
249 // Skipping the following functions, since they might be used for cleansing
250 // or smart memory copy:
251 // - memccpy - copying untill hitting a special character.
252
253 return TaintPropagationRule();
Anna Zaks8f4caf52011-11-18 02:26:36 +0000254}
255
Anna Zaks9ffbe242011-12-17 00:26:34 +0000256void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
257 CheckerContext &C) const {
Anna Zaks9f03b622012-01-07 02:33:10 +0000258 // Check for errors first.
259 if (checkPre(CE, C))
260 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000261
Anna Zaks9f03b622012-01-07 02:33:10 +0000262 // Add taint second.
Anna Zaks1fb826a2012-01-12 02:22:34 +0000263 addSourcesPre(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000264}
265
266void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
267 CheckerContext &C) const {
Anna Zaks1fb826a2012-01-12 02:22:34 +0000268 if (propagateFromPre(CE, C))
269 return;
270 addSourcesPost(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000271}
272
Anna Zaks1fb826a2012-01-12 02:22:34 +0000273void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
274 CheckerContext &C) const {
Anna Zaks9b0c7492012-01-18 02:45:07 +0000275 const ProgramState *State = 0;
276 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
277 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000278 if (Name.empty())
279 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000280
Anna Zaks9b0c7492012-01-18 02:45:07 +0000281 // First, try generating a propagation rule for this function.
282 TaintPropagationRule Rule =
283 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
Anna Zaks022b3f42012-01-17 00:37:02 +0000284 if (!Rule.isNull()) {
285 State = prePropagateTaint(CE, C, Rule);
286 if (!State)
287 return;
288 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000289 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000290 }
291
Anna Zaks9b0c7492012-01-18 02:45:07 +0000292 // Otherwise, check if we have custom pre-processing implemented.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000293 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000294 .Case("fscanf", &GenericTaintChecker::preFscanf)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000295 .Default(0);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000296 // Check and evaluate the call.
297 if (evalFunction)
298 State = (this->*evalFunction)(CE, C);
299 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000300 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000301 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000302
Anna Zaks9ffbe242011-12-17 00:26:34 +0000303}
304
Anna Zaks1fb826a2012-01-12 02:22:34 +0000305bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
306 CheckerContext &C) const {
307 const ProgramState *State = C.getState();
308
309 // Depending on what was tainted at pre-visit, we determined a set of
310 // arguments which should be tainted after the function returns. These are
311 // stored in the state as TaintArgsOnPostVisit set.
312 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
313 for (llvm::ImmutableSet<unsigned>::iterator
314 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
315 unsigned ArgNum = *I;
316
317 // Special handling for the tainted return value.
318 if (ArgNum == ReturnValueIndex) {
319 State = State->addTaint(CE, C.getLocationContext());
320 continue;
321 }
322
323 // The arguments are pointer arguments. The data they are pointing at is
324 // tainted after the call.
325 const Expr* Arg = CE->getArg(ArgNum);
326 SymbolRef Sym = getPointedToSymbol(C, Arg, true);
327 if (Sym)
328 State = State->addTaint(Sym);
329 }
330
331 // Clear up the taint info from the state.
332 State = State->remove<TaintArgsOnPostVisit>();
333
334 if (State != C.getState()) {
335 C.addTransition(State);
336 return true;
337 }
338 return false;
339}
340
341void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
342 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000343 // Define the attack surface.
344 // Set the evaluation function by switching on the callee name.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000345 StringRef Name = C.getCalleeName(CE);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000346 if (Name.empty())
347 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000348 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000349 .Case("scanf", &GenericTaintChecker::postScanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000350 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000351 .Case("getchar", &GenericTaintChecker::postRetTaint)
352 .Case("getenv", &GenericTaintChecker::postRetTaint)
353 .Case("fopen", &GenericTaintChecker::postRetTaint)
354 .Case("fdopen", &GenericTaintChecker::postRetTaint)
355 .Case("freopen", &GenericTaintChecker::postRetTaint)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000356 .Default(0);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000357
358 // If the callee isn't defined, it is not of security concern.
359 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000360 const ProgramState *State = 0;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000361 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000362 State = (this->*evalFunction)(CE, C);
363 if (!State)
364 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000365
Anna Zaks9ffbe242011-12-17 00:26:34 +0000366 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000367}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000368
Anna Zaks9f03b622012-01-07 02:33:10 +0000369bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
370
371 if (checkUncontrolledFormatString(CE, C))
372 return true;
373
Anna Zaks8568ee72012-01-14 02:48:40 +0000374 StringRef Name = C.getCalleeName(CE);
375 if (Name.empty())
376 return false;
377
378 if (checkSystemCall(CE, Name, C))
379 return true;
380
Anna Zaks9f03b622012-01-07 02:33:10 +0000381 return false;
382}
383
Anna Zaks8f4caf52011-11-18 02:26:36 +0000384SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
385 const Expr* Arg,
386 bool IssueWarning) const {
387 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000388 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zaksd3d85482011-12-16 18:28:50 +0000389 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000390 return 0;
391
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000392 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks8f4caf52011-11-18 02:26:36 +0000393
394 if (!AddrLoc && !IssueWarning)
395 return 0;
396
397 // If the Expr is not a location, issue a warning.
398 if (!AddrLoc) {
399 assert(IssueWarning);
400 if (ExplodedNode *N = C.generateSink(State)) {
401 initBugType();
402 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
403 report->addRange(Arg->getSourceRange());
404 C.EmitReport(report);
405 }
406 return 0;
407 }
408
Anna Zaks71d29092012-01-13 00:56:51 +0000409 const PointerType *ArgTy =
410 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
411 assert(ArgTy);
412 SVal Val = State->getSVal(*AddrLoc, ArgTy->getPointeeType());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000413 return Val.getAsSymbol();
414}
415
Anna Zaks022b3f42012-01-17 00:37:02 +0000416const ProgramState *
417GenericTaintChecker::prePropagateTaint(const CallExpr *CE,
418 CheckerContext &C,
419 const TaintPropagationRule PR) const {
420 const ProgramState *State = C.getState();
421
422 // Check for taint in arguments.
423 bool IsTainted = false;
424 for (ArgVector::const_iterator I = PR.SrcArgs.begin(),
425 E = PR.SrcArgs.end(); I != E; ++I) {
426 unsigned ArgNum = *I;
427
428 if (ArgNum == InvalidArgIndex) {
Anna Zaks9b0c7492012-01-18 02:45:07 +0000429 // Check if any of the arguments is tainted, but skip the
430 // destination arguments.
431 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
432 if (PR.isDestinationArgument(i))
433 continue;
Anna Zaks022b3f42012-01-17 00:37:02 +0000434 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
435 break;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000436 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000437 break;
438 }
439
440 assert(ArgNum < CE->getNumArgs());
441 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
442 break;
443 }
444 if (!IsTainted)
445 return State;
446
447 // Mark the arguments which should be tainted after the function returns.
448 for (ArgVector::const_iterator I = PR.DstArgs.begin(),
449 E = PR.DstArgs.end(); I != E; ++I) {
450 unsigned ArgNum = *I;
451
452 // Should we mark all arguments as tainted?
453 if (ArgNum == InvalidArgIndex) {
454 // For all pointer and references that were passed in:
455 // If they are not pointing to const data, mark data as tainted.
456 // TODO: So far we are just going one level down; ideally we'd need to
457 // recurse here.
458 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
459 const Expr *Arg = CE->getArg(i);
460 // Process pointer argument.
461 const Type *ArgTy = Arg->getType().getTypePtr();
462 QualType PType = ArgTy->getPointeeType();
463 if ((!PType.isNull() && !PType.isConstQualified())
464 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
465 State = State->add<TaintArgsOnPostVisit>(i);
466 }
467 continue;
468 }
469
470 // Should mark the return value?
471 if (ArgNum == ReturnValueIndex) {
472 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
473 continue;
474 }
475
476 // Mark the given argument.
477 assert(ArgNum < CE->getNumArgs());
478 State = State->add<TaintArgsOnPostVisit>(ArgNum);
479 }
480
481 return State;
482}
483
484
Anna Zaks1fb826a2012-01-12 02:22:34 +0000485// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
486// and arg 1 should get taint.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000487const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
488 CheckerContext &C) const {
489 assert(CE->getNumArgs() >= 2);
490 const ProgramState *State = C.getState();
491
492 // Check is the file descriptor is tainted.
Ted Kremenek5eca4822012-01-06 22:09:28 +0000493 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
Anna Zaks1fb826a2012-01-12 02:22:34 +0000494 isStdin(CE->getArg(0), C)) {
495 // All arguments except for the first two should get taint.
496 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
497 State = State->add<TaintArgsOnPostVisit>(i);
498 return State;
499 }
500
Anna Zaks9ffbe242011-12-17 00:26:34 +0000501 return 0;
502}
503
Anna Zaks9ffbe242011-12-17 00:26:34 +0000504const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
505 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000506 const ProgramState *State = C.getState();
Anna Zaks1009ac72011-12-14 00:56:02 +0000507 assert(CE->getNumArgs() >= 2);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000508 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000509 // All arguments except for the very first one should get taint.
510 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
511 // The arguments are pointer arguments. The data they are pointing at is
512 // tainted after the call.
513 const Expr* Arg = CE->getArg(i);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000514 SymbolRef Sym = getPointedToSymbol(C, Arg, true);
Anna Zaks1009ac72011-12-14 00:56:02 +0000515 if (Sym)
516 State = State->addTaint(Sym);
517 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000518 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000519}
520
Anna Zaks9ffbe242011-12-17 00:26:34 +0000521const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
522 CheckerContext &C) const {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000523 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000524}
525
Anna Zaksefd69892011-12-14 00:56:18 +0000526bool GenericTaintChecker::isStdin(const Expr *E,
527 CheckerContext &C) const {
Anna Zaksd3d85482011-12-16 18:28:50 +0000528 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000529 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaksefd69892011-12-14 00:56:18 +0000530
Anna Zaksd3d85482011-12-16 18:28:50 +0000531 // stdin is a pointer, so it would be a region.
532 const MemRegion *MemReg = Val.getAsRegion();
533
534 // The region should be symbolic, we do not know it's value.
535 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
536 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000537 return false;
538
Anna Zaksd3d85482011-12-16 18:28:50 +0000539 // Get it's symbol and find the declaration region it's pointing to.
540 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
541 if (!Sm)
542 return false;
543 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
544 if (!DeclReg)
545 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000546
Anna Zaksd3d85482011-12-16 18:28:50 +0000547 // This region corresponds to a declaration, find out if it's a global/extern
548 // variable named stdin with the proper type.
549 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
550 D = D->getCanonicalDecl();
551 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
552 if (const PointerType * PtrTy =
553 dyn_cast<PointerType>(D->getType().getTypePtr()))
554 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
555 return true;
556 }
Anna Zaksefd69892011-12-14 00:56:18 +0000557 return false;
558}
559
Anna Zaks9f03b622012-01-07 02:33:10 +0000560static bool getPrintfFormatArgumentNum(const CallExpr *CE,
561 const CheckerContext &C,
562 unsigned int &ArgNum) {
563 // Find if the function contains a format string argument.
564 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
565 // vsnprintf, syslog, custom annotated functions.
566 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
567 if (!FDecl)
568 return false;
569 for (specific_attr_iterator<FormatAttr>
570 i = FDecl->specific_attr_begin<FormatAttr>(),
571 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
572
573 const FormatAttr *Format = *i;
574 ArgNum = Format->getFormatIdx() - 1;
575 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
576 return true;
577 }
578
579 // Or if a function is named setproctitle (this is a heuristic).
580 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
581 ArgNum = 0;
582 return true;
583 }
584
585 return false;
586}
587
Anna Zaks8568ee72012-01-14 02:48:40 +0000588bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
589 const char Msg[],
590 CheckerContext &C) const {
591 assert(E);
592
593 // Check for taint.
594 const ProgramState *State = C.getState();
595 if (!State->isTainted(getPointedToSymbol(C, E)) &&
596 !State->isTainted(E, C.getLocationContext()))
597 return false;
598
599 // Generate diagnostic.
600 if (ExplodedNode *N = C.addTransition()) {
601 initBugType();
602 BugReport *report = new BugReport(*BT, Msg, N);
603 report->addRange(E->getSourceRange());
604 C.EmitReport(report);
605 return true;
606 }
607 return false;
608}
609
Anna Zaks9f03b622012-01-07 02:33:10 +0000610bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
611 CheckerContext &C) const{
612 // Check if the function contains a format string argument.
613 unsigned int ArgNum = 0;
614 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
615 return false;
616
617 // If either the format string content or the pointer itself are tainted, warn.
Anna Zaks8568ee72012-01-14 02:48:40 +0000618 if (generateReportIfTainted(CE->getArg(ArgNum),
619 MsgUncontrolledFormatString, C))
620 return true;
621 return false;
622}
623
624bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
625 StringRef Name,
626 CheckerContext &C) const {
627 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
628 .Case("system", 0)
629 .Case("popen", 0)
630 .Default(UINT_MAX);
631
632 if (ArgNum == UINT_MAX)
633 return false;
634
635 if (generateReportIfTainted(CE->getArg(ArgNum),
636 MsgSanitizeSystemArgs, C))
637 return true;
638
Anna Zaks9f03b622012-01-07 02:33:10 +0000639 return false;
640}
641
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000642void ento::registerGenericTaintChecker(CheckerManager &mgr) {
643 mgr.registerChecker<GenericTaintChecker>();
644}