blob: c3cd3b05c5a2e3efd05bc6cab5c67a18edd90676 [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Anna Zaks9b0c7492012-01-18 02:45:07 +000023#include "clang/Basic/Builtins.h"
Anna Zaks1fb826a2012-01-12 02:22:34 +000024#include <climits>
Anna Zaksdf18c5a2011-11-16 19:58:13 +000025
26using namespace clang;
27using namespace ento;
28
29namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000030class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000031 check::PreStmt<CallExpr> > {
32public:
Anna Zaks8568ee72012-01-14 02:48:40 +000033 static void *getTag() { static int Tag; return &Tag; }
34
35 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
36 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
37
38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000039
Anna Zaks9ffbe242011-12-17 00:26:34 +000040private:
Anna Zaks8568ee72012-01-14 02:48:40 +000041 static const unsigned ReturnValueIndex = UINT_MAX;
Anna Zaks022b3f42012-01-17 00:37:02 +000042 static const unsigned InvalidArgIndex = UINT_MAX - 1;
Anna Zaks8568ee72012-01-14 02:48:40 +000043
Anna Zaks8f4caf52011-11-18 02:26:36 +000044 mutable llvm::OwningPtr<BugType> BT;
Anna Zaks9b0c7492012-01-18 02:45:07 +000045 inline void initBugType() const {
46 if (!BT)
47 BT.reset(new BugType("Taint Analysis", "General"));
48 }
Anna Zaks8f4caf52011-11-18 02:26:36 +000049
Anna Zaks1fb826a2012-01-12 02:22:34 +000050 /// \brief Catch taint related bugs. Check if tainted data is passed to a
51 /// system call etc.
Anna Zaks9f03b622012-01-07 02:33:10 +000052 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
53
Anna Zaks1fb826a2012-01-12 02:22:34 +000054 /// \brief Add taint sources on a pre-visit.
55 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
56
57 /// \brief Propagate taint generated at pre-visit.
58 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
59
60 /// \brief Add taint sources on a post visit.
61 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
62
63 /// \brief Given a pointer argument, get the symbol of the value it contains
Anna Zaks8f4caf52011-11-18 02:26:36 +000064 /// (points to).
Anna Zaks7cdfe292012-01-18 02:45:13 +000065 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
Anna Zaksdf18c5a2011-11-16 19:58:13 +000066
Anna Zaks7cdfe292012-01-18 02:45:13 +000067 static inline bool isTaintedOrPointsToTainted(const Expr *E,
68 const ProgramState *State,
69 CheckerContext &C) {
Anna Zaks022b3f42012-01-17 00:37:02 +000070 return (State->isTainted(E, C.getLocationContext()) ||
71 (E->getType().getTypePtr()->isPointerType() &&
72 State->isTainted(getPointedToSymbol(C, E))));
73 }
74
Anna Zaks9ffbe242011-12-17 00:26:34 +000075 /// Functions defining the attack surface.
76 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
77 CheckerContext &C) const;
78 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks2bf8fd82012-01-20 00:11:19 +000079 const ProgramState *postSocket(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000080 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000081
82 /// Taint the scanned input if the file is tainted.
83 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000084
Anna Zaksd3d85482011-12-16 18:28:50 +000085 /// Check if the region the expression evaluates to is the standard input,
86 /// and thus, is tainted.
Anna Zaksefd69892011-12-14 00:56:18 +000087 bool isStdin(const Expr *E, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000088
Anna Zaks9f03b622012-01-07 02:33:10 +000089 /// Check for CWE-134: Uncontrolled Format String.
Anna Zaks8568ee72012-01-14 02:48:40 +000090 static const char MsgUncontrolledFormatString[];
Anna Zaks9f03b622012-01-07 02:33:10 +000091 bool checkUncontrolledFormatString(const CallExpr *CE,
92 CheckerContext &C) const;
93
Anna Zaks8568ee72012-01-14 02:48:40 +000094 /// Check for:
95 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
96 /// CWE-78, "Failure to Sanitize Data into an OS Command"
97 static const char MsgSanitizeSystemArgs[];
98 bool checkSystemCall(const CallExpr *CE, StringRef Name,
99 CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000100
Anna Zaks4e462212012-01-18 02:45:11 +0000101 /// Check if tainted data is used as a buffer size ins strn.. functions,
102 /// and allocators.
103 static const char MsgTaintedBufferSize[];
104 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
105 CheckerContext &C) const;
106
Anna Zaks8568ee72012-01-14 02:48:40 +0000107 /// Generate a report if the expression is tainted or points to tainted data.
108 bool generateReportIfTainted(const Expr *E, const char Msg[],
109 CheckerContext &C) const;
Anna Zaks022b3f42012-01-17 00:37:02 +0000110
111
112 typedef llvm::SmallVector<unsigned, 2> ArgVector;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000113
Anna Zaks022b3f42012-01-17 00:37:02 +0000114 /// \brief A struct used to specify taint propagation rules for a function.
115 ///
116 /// If any of the possible taint source arguments is tainted, all of the
117 /// destination arguments should also be tainted. Use InvalidArgIndex in the
118 /// src list to specify that all of the arguments can introduce taint. Use
119 /// InvalidArgIndex in the dst arguments to signify that all the non-const
120 /// pointer and reference arguments might be tainted on return. If
121 /// ReturnValueIndex is added to the dst list, the return value will be
122 /// tainted.
123 struct TaintPropagationRule {
124 /// List of arguments which can be taint sources and should be checked.
125 ArgVector SrcArgs;
126 /// List of arguments which should be tainted on function return.
127 ArgVector DstArgs;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000128 // TODO: Check if using other data structures would be more optimal.
Anna Zaks022b3f42012-01-17 00:37:02 +0000129
130 TaintPropagationRule() {}
131
Anna Zaks9b0c7492012-01-18 02:45:07 +0000132 TaintPropagationRule(unsigned SArg,
133 unsigned DArg, bool TaintRet = false) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000134 SrcArgs.push_back(SArg);
135 DstArgs.push_back(DArg);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000136 if (TaintRet)
137 DstArgs.push_back(ReturnValueIndex);
Anna Zaks022b3f42012-01-17 00:37:02 +0000138 }
139
Anna Zaks9b0c7492012-01-18 02:45:07 +0000140 TaintPropagationRule(unsigned SArg1, unsigned SArg2,
141 unsigned DArg, bool TaintRet = false) {
142 SrcArgs.push_back(SArg1);
143 SrcArgs.push_back(SArg2);
144 DstArgs.push_back(DArg);
145 if (TaintRet)
146 DstArgs.push_back(ReturnValueIndex);
147 }
148
149 /// Get the propagation rule for a given function.
150 static TaintPropagationRule
151 getTaintPropagationRule(const FunctionDecl *FDecl,
152 StringRef Name,
153 CheckerContext &C);
154
Anna Zaks022b3f42012-01-17 00:37:02 +0000155 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
156 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
157
Anna Zaks9b0c7492012-01-18 02:45:07 +0000158 inline bool isNull() const { return SrcArgs.empty(); }
159
160 inline bool isDestinationArgument(unsigned ArgNum) const {
161 return (std::find(DstArgs.begin(),
162 DstArgs.end(), ArgNum) != DstArgs.end());
163 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000164
Anna Zaks7cdfe292012-01-18 02:45:13 +0000165 /// \brief Pre-process a function which propagates taint according to the
166 /// taint rule.
167 const ProgramState *process(const CallExpr *CE, CheckerContext &C) const;
168
169 };
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000170};
Anna Zaks9b0c7492012-01-18 02:45:07 +0000171
172const unsigned GenericTaintChecker::ReturnValueIndex;
173const unsigned GenericTaintChecker::InvalidArgIndex;
174
Anna Zaks8568ee72012-01-14 02:48:40 +0000175const char GenericTaintChecker::MsgUncontrolledFormatString[] =
176 "Tainted format string (CWE-134: Uncontrolled Format String)";
177
178const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
179 "Tainted data passed to a system call "
180 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
Anna Zaks4e462212012-01-18 02:45:11 +0000181
182const char GenericTaintChecker::MsgTaintedBufferSize[] =
183 "Tainted data is used to specify the buffer size "
184 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
185 "character data and the null terminator)";
186
187} // end of anonymous namespace
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000188
Anna Zaks1fb826a2012-01-12 02:22:34 +0000189/// A set which is used to pass information from call pre-visit instruction
190/// to the call post-visit. The values are unsigned integers, which are either
191/// ReturnValueIndex, or indexes of the pointer/reference argument, which
192/// points to data, which should be tainted on return.
193namespace { struct TaintArgsOnPostVisit{}; }
194namespace clang { namespace ento {
195template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
196 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
197 static void *GDMIndex() { return GenericTaintChecker::getTag(); }
198};
199}}
Anna Zaks9ffbe242011-12-17 00:26:34 +0000200
Anna Zaks9b0c7492012-01-18 02:45:07 +0000201GenericTaintChecker::TaintPropagationRule
202GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
203 const FunctionDecl *FDecl,
204 StringRef Name,
205 CheckerContext &C) {
206 // Check for exact name match for functions without builtin substitutes.
207 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
208 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
209 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
210 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000211 .Case("read", TaintPropagationRule(0, 2, 1, true))
212 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
Anna Zaks9b0c7492012-01-18 02:45:07 +0000213 .Default(TaintPropagationRule());
214
215 if (!Rule.isNull())
216 return Rule;
217
218 // Check if it's one of the memory setting/copying functions.
219 // This check is specialized but faster then calling isCLibraryFunction.
220 unsigned BId = 0;
221 if ( (BId = FDecl->getMemoryFunctionKind()) )
222 switch(BId) {
223 case Builtin::BImemcpy:
224 case Builtin::BImemmove:
225 case Builtin::BIstrncpy:
226 case Builtin::BIstrncat:
227 return TaintPropagationRule(1, 2, 0, true);
228 break;
229 case Builtin::BIstrlcpy:
230 case Builtin::BIstrlcat:
231 return TaintPropagationRule(1, 2, 0, false);
232 break;
233 case Builtin::BIstrndup:
234 return TaintPropagationRule(0, 1, ReturnValueIndex);
235 break;
236
237 default:
238 break;
239 };
240
241 // Process all other functions which could be defined as builtins.
242 if (Rule.isNull()) {
243 if (C.isCLibraryFunction(FDecl, "snprintf") ||
244 C.isCLibraryFunction(FDecl, "sprintf"))
245 return TaintPropagationRule(InvalidArgIndex, 0, true);
246 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
247 C.isCLibraryFunction(FDecl, "stpcpy") ||
248 C.isCLibraryFunction(FDecl, "strcat"))
249 return TaintPropagationRule(1, 0, true);
250 else if (C.isCLibraryFunction(FDecl, "bcopy"))
251 return TaintPropagationRule(0, 2, 1, false);
252 else if (C.isCLibraryFunction(FDecl, "strdup") ||
253 C.isCLibraryFunction(FDecl, "strdupa"))
254 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks4e462212012-01-18 02:45:11 +0000255 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
256 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000257 }
258
259 // Skipping the following functions, since they might be used for cleansing
260 // or smart memory copy:
261 // - memccpy - copying untill hitting a special character.
262
263 return TaintPropagationRule();
Anna Zaks8f4caf52011-11-18 02:26:36 +0000264}
265
Anna Zaks9ffbe242011-12-17 00:26:34 +0000266void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
267 CheckerContext &C) const {
Anna Zaks9f03b622012-01-07 02:33:10 +0000268 // Check for errors first.
269 if (checkPre(CE, C))
270 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000271
Anna Zaks9f03b622012-01-07 02:33:10 +0000272 // Add taint second.
Anna Zaks1fb826a2012-01-12 02:22:34 +0000273 addSourcesPre(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000274}
275
276void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
277 CheckerContext &C) const {
Anna Zaks1fb826a2012-01-12 02:22:34 +0000278 if (propagateFromPre(CE, C))
279 return;
280 addSourcesPost(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000281}
282
Anna Zaks1fb826a2012-01-12 02:22:34 +0000283void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
284 CheckerContext &C) const {
Anna Zaks9b0c7492012-01-18 02:45:07 +0000285 const ProgramState *State = 0;
286 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
287 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000288 if (Name.empty())
289 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000290
Anna Zaks9b0c7492012-01-18 02:45:07 +0000291 // First, try generating a propagation rule for this function.
292 TaintPropagationRule Rule =
293 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
Anna Zaks022b3f42012-01-17 00:37:02 +0000294 if (!Rule.isNull()) {
Anna Zaks7cdfe292012-01-18 02:45:13 +0000295 State = Rule.process(CE, C);
Anna Zaks022b3f42012-01-17 00:37:02 +0000296 if (!State)
297 return;
298 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000299 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000300 }
301
Anna Zaks9b0c7492012-01-18 02:45:07 +0000302 // Otherwise, check if we have custom pre-processing implemented.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000303 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000304 .Case("fscanf", &GenericTaintChecker::preFscanf)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000305 .Default(0);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000306 // Check and evaluate the call.
307 if (evalFunction)
308 State = (this->*evalFunction)(CE, C);
309 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000310 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000311 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000312
Anna Zaks9ffbe242011-12-17 00:26:34 +0000313}
314
Anna Zaks1fb826a2012-01-12 02:22:34 +0000315bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
316 CheckerContext &C) const {
317 const ProgramState *State = C.getState();
318
319 // Depending on what was tainted at pre-visit, we determined a set of
320 // arguments which should be tainted after the function returns. These are
321 // stored in the state as TaintArgsOnPostVisit set.
322 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
323 for (llvm::ImmutableSet<unsigned>::iterator
324 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
325 unsigned ArgNum = *I;
326
327 // Special handling for the tainted return value.
328 if (ArgNum == ReturnValueIndex) {
329 State = State->addTaint(CE, C.getLocationContext());
330 continue;
331 }
332
333 // The arguments are pointer arguments. The data they are pointing at is
334 // tainted after the call.
335 const Expr* Arg = CE->getArg(ArgNum);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000336 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000337 if (Sym)
338 State = State->addTaint(Sym);
339 }
340
341 // Clear up the taint info from the state.
342 State = State->remove<TaintArgsOnPostVisit>();
343
344 if (State != C.getState()) {
345 C.addTransition(State);
346 return true;
347 }
348 return false;
349}
350
351void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
352 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000353 // Define the attack surface.
354 // Set the evaluation function by switching on the callee name.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000355 StringRef Name = C.getCalleeName(CE);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000356 if (Name.empty())
357 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000358 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000359 .Case("scanf", &GenericTaintChecker::postScanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000360 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000361 .Case("getchar", &GenericTaintChecker::postRetTaint)
362 .Case("getenv", &GenericTaintChecker::postRetTaint)
363 .Case("fopen", &GenericTaintChecker::postRetTaint)
364 .Case("fdopen", &GenericTaintChecker::postRetTaint)
365 .Case("freopen", &GenericTaintChecker::postRetTaint)
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000366 .Case("socket", &GenericTaintChecker::postSocket)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000367 .Default(0);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000368
369 // If the callee isn't defined, it is not of security concern.
370 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000371 const ProgramState *State = 0;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000372 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000373 State = (this->*evalFunction)(CE, C);
374 if (!State)
375 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000376
Anna Zaks9ffbe242011-12-17 00:26:34 +0000377 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000378}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000379
Anna Zaks9f03b622012-01-07 02:33:10 +0000380bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
381
382 if (checkUncontrolledFormatString(CE, C))
383 return true;
384
Anna Zaks4e462212012-01-18 02:45:11 +0000385 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
386 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks8568ee72012-01-14 02:48:40 +0000387 if (Name.empty())
388 return false;
389
390 if (checkSystemCall(CE, Name, C))
391 return true;
392
Anna Zaks4e462212012-01-18 02:45:11 +0000393 if (checkTaintedBufferSize(CE, FDecl, C))
394 return true;
395
Anna Zaks9f03b622012-01-07 02:33:10 +0000396 return false;
397}
398
Anna Zaks8f4caf52011-11-18 02:26:36 +0000399SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
Anna Zaks7cdfe292012-01-18 02:45:13 +0000400 const Expr* Arg) {
Anna Zaks8f4caf52011-11-18 02:26:36 +0000401 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000402 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zaksd3d85482011-12-16 18:28:50 +0000403 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000404 return 0;
405
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000406 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000407 if (!AddrLoc)
Anna Zaks8f4caf52011-11-18 02:26:36 +0000408 return 0;
409
Anna Zaks71d29092012-01-13 00:56:51 +0000410 const PointerType *ArgTy =
411 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
412 assert(ArgTy);
413 SVal Val = State->getSVal(*AddrLoc, ArgTy->getPointeeType());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000414 return Val.getAsSymbol();
415}
416
Anna Zaks022b3f42012-01-17 00:37:02 +0000417const ProgramState *
Anna Zaks7cdfe292012-01-18 02:45:13 +0000418GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
419 CheckerContext &C) const {
Anna Zaks022b3f42012-01-17 00:37:02 +0000420 const ProgramState *State = C.getState();
421
422 // Check for taint in arguments.
423 bool IsTainted = false;
Anna Zaks7cdfe292012-01-18 02:45:13 +0000424 for (ArgVector::const_iterator I = SrcArgs.begin(),
425 E = SrcArgs.end(); I != E; ++I) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000426 unsigned ArgNum = *I;
427
428 if (ArgNum == InvalidArgIndex) {
Anna Zaks9b0c7492012-01-18 02:45:07 +0000429 // Check if any of the arguments is tainted, but skip the
430 // destination arguments.
431 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
Anna Zaks7cdfe292012-01-18 02:45:13 +0000432 if (isDestinationArgument(i))
Anna Zaks9b0c7492012-01-18 02:45:07 +0000433 continue;
Anna Zaks7cdfe292012-01-18 02:45:13 +0000434 if ((IsTainted =
435 GenericTaintChecker::isTaintedOrPointsToTainted(CE->getArg(i),
436 State, C)))
Anna Zaks022b3f42012-01-17 00:37:02 +0000437 break;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000438 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000439 break;
440 }
441
442 assert(ArgNum < CE->getNumArgs());
Anna Zaks7cdfe292012-01-18 02:45:13 +0000443 if ((IsTainted =
444 GenericTaintChecker::isTaintedOrPointsToTainted(CE->getArg(ArgNum),
445 State, C)))
Anna Zaks022b3f42012-01-17 00:37:02 +0000446 break;
447 }
448 if (!IsTainted)
449 return State;
450
451 // Mark the arguments which should be tainted after the function returns.
Anna Zaks7cdfe292012-01-18 02:45:13 +0000452 for (ArgVector::const_iterator I = DstArgs.begin(),
453 E = DstArgs.end(); I != E; ++I) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000454 unsigned ArgNum = *I;
455
456 // Should we mark all arguments as tainted?
457 if (ArgNum == InvalidArgIndex) {
458 // For all pointer and references that were passed in:
459 // If they are not pointing to const data, mark data as tainted.
460 // TODO: So far we are just going one level down; ideally we'd need to
461 // recurse here.
462 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
463 const Expr *Arg = CE->getArg(i);
464 // Process pointer argument.
465 const Type *ArgTy = Arg->getType().getTypePtr();
466 QualType PType = ArgTy->getPointeeType();
467 if ((!PType.isNull() && !PType.isConstQualified())
468 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
469 State = State->add<TaintArgsOnPostVisit>(i);
470 }
471 continue;
472 }
473
474 // Should mark the return value?
475 if (ArgNum == ReturnValueIndex) {
476 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
477 continue;
478 }
479
480 // Mark the given argument.
481 assert(ArgNum < CE->getNumArgs());
482 State = State->add<TaintArgsOnPostVisit>(ArgNum);
483 }
484
485 return State;
486}
487
488
Anna Zaks1fb826a2012-01-12 02:22:34 +0000489// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
490// and arg 1 should get taint.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000491const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
492 CheckerContext &C) const {
493 assert(CE->getNumArgs() >= 2);
494 const ProgramState *State = C.getState();
495
496 // Check is the file descriptor is tainted.
Ted Kremenek5eca4822012-01-06 22:09:28 +0000497 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
Anna Zaks1fb826a2012-01-12 02:22:34 +0000498 isStdin(CE->getArg(0), C)) {
499 // All arguments except for the first two should get taint.
500 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
501 State = State->add<TaintArgsOnPostVisit>(i);
502 return State;
503 }
504
Anna Zaks9ffbe242011-12-17 00:26:34 +0000505 return 0;
506}
507
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000508
509// If argument 0(protocol domain) is network, the return value should get taint.
510const ProgramState *GenericTaintChecker::postSocket(const CallExpr *CE,
511 CheckerContext &C) const {
512 assert(CE->getNumArgs() >= 3);
513 const ProgramState *State = C.getState();
514
515 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
516 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
517 // White list the internal communication protocols.
518 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
519 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
520 return State;
521 State = State->addTaint(CE, C.getLocationContext());
522 return State;
523}
524
Anna Zaks9ffbe242011-12-17 00:26:34 +0000525const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
526 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000527 const ProgramState *State = C.getState();
Anna Zaks1009ac72011-12-14 00:56:02 +0000528 assert(CE->getNumArgs() >= 2);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000529 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000530 // All arguments except for the very first one should get taint.
531 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
532 // The arguments are pointer arguments. The data they are pointing at is
533 // tainted after the call.
534 const Expr* Arg = CE->getArg(i);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000535 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaks1009ac72011-12-14 00:56:02 +0000536 if (Sym)
537 State = State->addTaint(Sym);
538 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000539 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000540}
541
Anna Zaks9ffbe242011-12-17 00:26:34 +0000542const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
543 CheckerContext &C) const {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000544 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000545}
546
Anna Zaksefd69892011-12-14 00:56:18 +0000547bool GenericTaintChecker::isStdin(const Expr *E,
548 CheckerContext &C) const {
Anna Zaksd3d85482011-12-16 18:28:50 +0000549 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000550 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaksefd69892011-12-14 00:56:18 +0000551
Anna Zaksd3d85482011-12-16 18:28:50 +0000552 // stdin is a pointer, so it would be a region.
553 const MemRegion *MemReg = Val.getAsRegion();
554
555 // The region should be symbolic, we do not know it's value.
556 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
557 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000558 return false;
559
Anna Zaksd3d85482011-12-16 18:28:50 +0000560 // Get it's symbol and find the declaration region it's pointing to.
561 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
562 if (!Sm)
563 return false;
564 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
565 if (!DeclReg)
566 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000567
Anna Zaksd3d85482011-12-16 18:28:50 +0000568 // This region corresponds to a declaration, find out if it's a global/extern
569 // variable named stdin with the proper type.
570 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
571 D = D->getCanonicalDecl();
572 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
573 if (const PointerType * PtrTy =
574 dyn_cast<PointerType>(D->getType().getTypePtr()))
575 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
576 return true;
577 }
Anna Zaksefd69892011-12-14 00:56:18 +0000578 return false;
579}
580
Anna Zaks9f03b622012-01-07 02:33:10 +0000581static bool getPrintfFormatArgumentNum(const CallExpr *CE,
582 const CheckerContext &C,
583 unsigned int &ArgNum) {
584 // Find if the function contains a format string argument.
585 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
586 // vsnprintf, syslog, custom annotated functions.
587 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
588 if (!FDecl)
589 return false;
590 for (specific_attr_iterator<FormatAttr>
591 i = FDecl->specific_attr_begin<FormatAttr>(),
592 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
593
594 const FormatAttr *Format = *i;
595 ArgNum = Format->getFormatIdx() - 1;
596 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
597 return true;
598 }
599
600 // Or if a function is named setproctitle (this is a heuristic).
601 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
602 ArgNum = 0;
603 return true;
604 }
605
606 return false;
607}
608
Anna Zaks8568ee72012-01-14 02:48:40 +0000609bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
610 const char Msg[],
611 CheckerContext &C) const {
612 assert(E);
613
614 // Check for taint.
615 const ProgramState *State = C.getState();
616 if (!State->isTainted(getPointedToSymbol(C, E)) &&
617 !State->isTainted(E, C.getLocationContext()))
618 return false;
619
620 // Generate diagnostic.
621 if (ExplodedNode *N = C.addTransition()) {
622 initBugType();
623 BugReport *report = new BugReport(*BT, Msg, N);
624 report->addRange(E->getSourceRange());
625 C.EmitReport(report);
626 return true;
627 }
628 return false;
629}
630
Anna Zaks9f03b622012-01-07 02:33:10 +0000631bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
632 CheckerContext &C) const{
633 // Check if the function contains a format string argument.
634 unsigned int ArgNum = 0;
635 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
636 return false;
637
638 // If either the format string content or the pointer itself are tainted, warn.
Anna Zaks8568ee72012-01-14 02:48:40 +0000639 if (generateReportIfTainted(CE->getArg(ArgNum),
640 MsgUncontrolledFormatString, C))
641 return true;
642 return false;
643}
644
645bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
646 StringRef Name,
647 CheckerContext &C) const {
648 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
649 .Case("system", 0)
650 .Case("popen", 0)
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000651 .Case("execl", 0)
652 .Case("execle", 0)
653 .Case("execlp", 0)
654 .Case("execv", 0)
655 .Case("execvp", 0)
656 .Case("execvP", 0)
Anna Zaks8568ee72012-01-14 02:48:40 +0000657 .Default(UINT_MAX);
658
659 if (ArgNum == UINT_MAX)
660 return false;
661
662 if (generateReportIfTainted(CE->getArg(ArgNum),
663 MsgSanitizeSystemArgs, C))
664 return true;
665
Anna Zaks9f03b622012-01-07 02:33:10 +0000666 return false;
667}
668
Anna Zaks4e462212012-01-18 02:45:11 +0000669// TODO: Should this check be a part of the CString checker?
670// If yes, should taint be a global setting?
671bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
672 const FunctionDecl *FDecl,
673 CheckerContext &C) const {
674 // If the function has a buffer size argument, set ArgNum.
675 unsigned ArgNum = InvalidArgIndex;
676 unsigned BId = 0;
677 if ( (BId = FDecl->getMemoryFunctionKind()) )
678 switch(BId) {
679 case Builtin::BImemcpy:
680 case Builtin::BImemmove:
681 case Builtin::BIstrncpy:
682 ArgNum = 2;
683 break;
684 case Builtin::BIstrndup:
685 ArgNum = 1;
686 break;
687 default:
688 break;
689 };
690
691 if (ArgNum == InvalidArgIndex) {
692 if (C.isCLibraryFunction(FDecl, "malloc") ||
693 C.isCLibraryFunction(FDecl, "calloc") ||
694 C.isCLibraryFunction(FDecl, "alloca"))
695 ArgNum = 0;
696 else if (C.isCLibraryFunction(FDecl, "memccpy"))
697 ArgNum = 3;
698 else if (C.isCLibraryFunction(FDecl, "realloc"))
699 ArgNum = 1;
700 else if (C.isCLibraryFunction(FDecl, "bcopy"))
701 ArgNum = 2;
702 }
703
704 if (ArgNum != InvalidArgIndex &&
705 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
706 return true;
707
708 return false;
709}
710
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000711void ento::registerGenericTaintChecker(CheckerManager &mgr) {
712 mgr.registerChecker<GenericTaintChecker>();
713}