blob: 83656716cb989d23c2dea7e0d539607d3d520081 [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Anna Zaks9b0c7492012-01-18 02:45:07 +000023#include "clang/Basic/Builtins.h"
Anna Zaks1fb826a2012-01-12 02:22:34 +000024#include <climits>
Anna Zaksdf18c5a2011-11-16 19:58:13 +000025
26using namespace clang;
27using namespace ento;
28
29namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000030class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000031 check::PreStmt<CallExpr> > {
32public:
Anna Zaks8568ee72012-01-14 02:48:40 +000033 static void *getTag() { static int Tag; return &Tag; }
34
35 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
36 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
37
38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000039
Anna Zaks9ffbe242011-12-17 00:26:34 +000040private:
Anna Zaks8568ee72012-01-14 02:48:40 +000041 static const unsigned ReturnValueIndex = UINT_MAX;
Anna Zaks022b3f42012-01-17 00:37:02 +000042 static const unsigned InvalidArgIndex = UINT_MAX - 1;
Anna Zaks8568ee72012-01-14 02:48:40 +000043
Anna Zaks8f4caf52011-11-18 02:26:36 +000044 mutable llvm::OwningPtr<BugType> BT;
Anna Zaks9b0c7492012-01-18 02:45:07 +000045 inline void initBugType() const {
46 if (!BT)
47 BT.reset(new BugType("Taint Analysis", "General"));
48 }
Anna Zaks8f4caf52011-11-18 02:26:36 +000049
Anna Zaks1fb826a2012-01-12 02:22:34 +000050 /// \brief Catch taint related bugs. Check if tainted data is passed to a
51 /// system call etc.
Anna Zaks9f03b622012-01-07 02:33:10 +000052 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
53
Anna Zaks1fb826a2012-01-12 02:22:34 +000054 /// \brief Add taint sources on a pre-visit.
55 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
56
57 /// \brief Propagate taint generated at pre-visit.
58 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
59
60 /// \brief Add taint sources on a post visit.
61 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
62
63 /// \brief Given a pointer argument, get the symbol of the value it contains
Anna Zaks8f4caf52011-11-18 02:26:36 +000064 /// (points to).
Anna Zaks7cdfe292012-01-18 02:45:13 +000065 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
Anna Zaksdf18c5a2011-11-16 19:58:13 +000066
Anna Zaks7cdfe292012-01-18 02:45:13 +000067 static inline bool isTaintedOrPointsToTainted(const Expr *E,
68 const ProgramState *State,
69 CheckerContext &C) {
Anna Zaks022b3f42012-01-17 00:37:02 +000070 return (State->isTainted(E, C.getLocationContext()) ||
71 (E->getType().getTypePtr()->isPointerType() &&
72 State->isTainted(getPointedToSymbol(C, E))));
73 }
74
Anna Zaks9ffbe242011-12-17 00:26:34 +000075 /// Functions defining the attack surface.
76 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
77 CheckerContext &C) const;
78 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks2bf8fd82012-01-20 00:11:19 +000079 const ProgramState *postSocket(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000080 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000081
82 /// Taint the scanned input if the file is tainted.
83 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000084
Anna Zaksd3d85482011-12-16 18:28:50 +000085 /// Check if the region the expression evaluates to is the standard input,
86 /// and thus, is tainted.
Anna Zaksefd69892011-12-14 00:56:18 +000087 bool isStdin(const Expr *E, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000088
Anna Zaks9f03b622012-01-07 02:33:10 +000089 /// Check for CWE-134: Uncontrolled Format String.
Anna Zaks8568ee72012-01-14 02:48:40 +000090 static const char MsgUncontrolledFormatString[];
Anna Zaks9f03b622012-01-07 02:33:10 +000091 bool checkUncontrolledFormatString(const CallExpr *CE,
92 CheckerContext &C) const;
93
Anna Zaks8568ee72012-01-14 02:48:40 +000094 /// Check for:
95 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
96 /// CWE-78, "Failure to Sanitize Data into an OS Command"
97 static const char MsgSanitizeSystemArgs[];
98 bool checkSystemCall(const CallExpr *CE, StringRef Name,
99 CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000100
Anna Zaks4e462212012-01-18 02:45:11 +0000101 /// Check if tainted data is used as a buffer size ins strn.. functions,
102 /// and allocators.
103 static const char MsgTaintedBufferSize[];
104 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
105 CheckerContext &C) const;
106
Anna Zaks8568ee72012-01-14 02:48:40 +0000107 /// Generate a report if the expression is tainted or points to tainted data.
108 bool generateReportIfTainted(const Expr *E, const char Msg[],
109 CheckerContext &C) const;
Anna Zaks022b3f42012-01-17 00:37:02 +0000110
111
112 typedef llvm::SmallVector<unsigned, 2> ArgVector;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000113
Anna Zaks022b3f42012-01-17 00:37:02 +0000114 /// \brief A struct used to specify taint propagation rules for a function.
115 ///
116 /// If any of the possible taint source arguments is tainted, all of the
117 /// destination arguments should also be tainted. Use InvalidArgIndex in the
118 /// src list to specify that all of the arguments can introduce taint. Use
119 /// InvalidArgIndex in the dst arguments to signify that all the non-const
120 /// pointer and reference arguments might be tainted on return. If
121 /// ReturnValueIndex is added to the dst list, the return value will be
122 /// tainted.
123 struct TaintPropagationRule {
124 /// List of arguments which can be taint sources and should be checked.
125 ArgVector SrcArgs;
126 /// List of arguments which should be tainted on function return.
127 ArgVector DstArgs;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000128 // TODO: Check if using other data structures would be more optimal.
Anna Zaks022b3f42012-01-17 00:37:02 +0000129
130 TaintPropagationRule() {}
131
Anna Zaks9b0c7492012-01-18 02:45:07 +0000132 TaintPropagationRule(unsigned SArg,
133 unsigned DArg, bool TaintRet = false) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000134 SrcArgs.push_back(SArg);
135 DstArgs.push_back(DArg);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000136 if (TaintRet)
137 DstArgs.push_back(ReturnValueIndex);
Anna Zaks022b3f42012-01-17 00:37:02 +0000138 }
139
Anna Zaks9b0c7492012-01-18 02:45:07 +0000140 TaintPropagationRule(unsigned SArg1, unsigned SArg2,
141 unsigned DArg, bool TaintRet = false) {
142 SrcArgs.push_back(SArg1);
143 SrcArgs.push_back(SArg2);
144 DstArgs.push_back(DArg);
145 if (TaintRet)
146 DstArgs.push_back(ReturnValueIndex);
147 }
148
149 /// Get the propagation rule for a given function.
150 static TaintPropagationRule
151 getTaintPropagationRule(const FunctionDecl *FDecl,
152 StringRef Name,
153 CheckerContext &C);
154
Anna Zaks022b3f42012-01-17 00:37:02 +0000155 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
156 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
157
Anna Zaks9b0c7492012-01-18 02:45:07 +0000158 inline bool isNull() const { return SrcArgs.empty(); }
159
160 inline bool isDestinationArgument(unsigned ArgNum) const {
161 return (std::find(DstArgs.begin(),
162 DstArgs.end(), ArgNum) != DstArgs.end());
163 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000164
Anna Zaks7cdfe292012-01-18 02:45:13 +0000165 /// \brief Pre-process a function which propagates taint according to the
166 /// taint rule.
167 const ProgramState *process(const CallExpr *CE, CheckerContext &C) const;
168
169 };
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000170};
Anna Zaks9b0c7492012-01-18 02:45:07 +0000171
172const unsigned GenericTaintChecker::ReturnValueIndex;
173const unsigned GenericTaintChecker::InvalidArgIndex;
174
Anna Zaks8568ee72012-01-14 02:48:40 +0000175const char GenericTaintChecker::MsgUncontrolledFormatString[] =
176 "Tainted format string (CWE-134: Uncontrolled Format String)";
177
178const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
179 "Tainted data passed to a system call "
180 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
Anna Zaks4e462212012-01-18 02:45:11 +0000181
182const char GenericTaintChecker::MsgTaintedBufferSize[] =
183 "Tainted data is used to specify the buffer size "
184 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
185 "character data and the null terminator)";
186
187} // end of anonymous namespace
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000188
Anna Zaks1fb826a2012-01-12 02:22:34 +0000189/// A set which is used to pass information from call pre-visit instruction
190/// to the call post-visit. The values are unsigned integers, which are either
191/// ReturnValueIndex, or indexes of the pointer/reference argument, which
192/// points to data, which should be tainted on return.
193namespace { struct TaintArgsOnPostVisit{}; }
194namespace clang { namespace ento {
195template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
196 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
197 static void *GDMIndex() { return GenericTaintChecker::getTag(); }
198};
199}}
Anna Zaks9ffbe242011-12-17 00:26:34 +0000200
Anna Zaks9b0c7492012-01-18 02:45:07 +0000201GenericTaintChecker::TaintPropagationRule
202GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
203 const FunctionDecl *FDecl,
204 StringRef Name,
205 CheckerContext &C) {
206 // Check for exact name match for functions without builtin substitutes.
207 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
208 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
209 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
210 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000211 .Case("read", TaintPropagationRule(0, 2, 1, true))
212 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
Anna Zaks9b0c7492012-01-18 02:45:07 +0000213 .Default(TaintPropagationRule());
214
215 if (!Rule.isNull())
216 return Rule;
217
218 // Check if it's one of the memory setting/copying functions.
219 // This check is specialized but faster then calling isCLibraryFunction.
220 unsigned BId = 0;
221 if ( (BId = FDecl->getMemoryFunctionKind()) )
222 switch(BId) {
223 case Builtin::BImemcpy:
224 case Builtin::BImemmove:
225 case Builtin::BIstrncpy:
226 case Builtin::BIstrncat:
227 return TaintPropagationRule(1, 2, 0, true);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000228 case Builtin::BIstrlcpy:
229 case Builtin::BIstrlcat:
230 return TaintPropagationRule(1, 2, 0, false);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000231 case Builtin::BIstrndup:
232 return TaintPropagationRule(0, 1, ReturnValueIndex);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000233
234 default:
235 break;
236 };
237
238 // Process all other functions which could be defined as builtins.
239 if (Rule.isNull()) {
240 if (C.isCLibraryFunction(FDecl, "snprintf") ||
241 C.isCLibraryFunction(FDecl, "sprintf"))
242 return TaintPropagationRule(InvalidArgIndex, 0, true);
243 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
244 C.isCLibraryFunction(FDecl, "stpcpy") ||
245 C.isCLibraryFunction(FDecl, "strcat"))
246 return TaintPropagationRule(1, 0, true);
247 else if (C.isCLibraryFunction(FDecl, "bcopy"))
248 return TaintPropagationRule(0, 2, 1, false);
249 else if (C.isCLibraryFunction(FDecl, "strdup") ||
250 C.isCLibraryFunction(FDecl, "strdupa"))
251 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks4e462212012-01-18 02:45:11 +0000252 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
253 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000254 }
255
256 // Skipping the following functions, since they might be used for cleansing
257 // or smart memory copy:
258 // - memccpy - copying untill hitting a special character.
259
260 return TaintPropagationRule();
Anna Zaks8f4caf52011-11-18 02:26:36 +0000261}
262
Anna Zaks9ffbe242011-12-17 00:26:34 +0000263void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
264 CheckerContext &C) const {
Anna Zaks9f03b622012-01-07 02:33:10 +0000265 // Check for errors first.
266 if (checkPre(CE, C))
267 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000268
Anna Zaks9f03b622012-01-07 02:33:10 +0000269 // Add taint second.
Anna Zaks1fb826a2012-01-12 02:22:34 +0000270 addSourcesPre(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000271}
272
273void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
274 CheckerContext &C) const {
Anna Zaks1fb826a2012-01-12 02:22:34 +0000275 if (propagateFromPre(CE, C))
276 return;
277 addSourcesPost(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000278}
279
Anna Zaks1fb826a2012-01-12 02:22:34 +0000280void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
281 CheckerContext &C) const {
Anna Zaks9b0c7492012-01-18 02:45:07 +0000282 const ProgramState *State = 0;
283 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
284 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000285 if (Name.empty())
286 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000287
Anna Zaks9b0c7492012-01-18 02:45:07 +0000288 // First, try generating a propagation rule for this function.
289 TaintPropagationRule Rule =
290 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
Anna Zaks022b3f42012-01-17 00:37:02 +0000291 if (!Rule.isNull()) {
Anna Zaks7cdfe292012-01-18 02:45:13 +0000292 State = Rule.process(CE, C);
Anna Zaks022b3f42012-01-17 00:37:02 +0000293 if (!State)
294 return;
295 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000296 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000297 }
298
Anna Zaks9b0c7492012-01-18 02:45:07 +0000299 // Otherwise, check if we have custom pre-processing implemented.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000300 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000301 .Case("fscanf", &GenericTaintChecker::preFscanf)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000302 .Default(0);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000303 // Check and evaluate the call.
304 if (evalFunction)
305 State = (this->*evalFunction)(CE, C);
306 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000307 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000308 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000309
Anna Zaks9ffbe242011-12-17 00:26:34 +0000310}
311
Anna Zaks1fb826a2012-01-12 02:22:34 +0000312bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
313 CheckerContext &C) const {
314 const ProgramState *State = C.getState();
315
316 // Depending on what was tainted at pre-visit, we determined a set of
317 // arguments which should be tainted after the function returns. These are
318 // stored in the state as TaintArgsOnPostVisit set.
319 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
320 for (llvm::ImmutableSet<unsigned>::iterator
321 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
322 unsigned ArgNum = *I;
323
324 // Special handling for the tainted return value.
325 if (ArgNum == ReturnValueIndex) {
326 State = State->addTaint(CE, C.getLocationContext());
327 continue;
328 }
329
330 // The arguments are pointer arguments. The data they are pointing at is
331 // tainted after the call.
332 const Expr* Arg = CE->getArg(ArgNum);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000333 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000334 if (Sym)
335 State = State->addTaint(Sym);
336 }
337
338 // Clear up the taint info from the state.
339 State = State->remove<TaintArgsOnPostVisit>();
340
341 if (State != C.getState()) {
342 C.addTransition(State);
343 return true;
344 }
345 return false;
346}
347
348void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
349 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000350 // Define the attack surface.
351 // Set the evaluation function by switching on the callee name.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000352 StringRef Name = C.getCalleeName(CE);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000353 if (Name.empty())
354 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000355 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000356 .Case("scanf", &GenericTaintChecker::postScanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000357 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000358 .Case("getchar", &GenericTaintChecker::postRetTaint)
359 .Case("getenv", &GenericTaintChecker::postRetTaint)
360 .Case("fopen", &GenericTaintChecker::postRetTaint)
361 .Case("fdopen", &GenericTaintChecker::postRetTaint)
362 .Case("freopen", &GenericTaintChecker::postRetTaint)
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000363 .Case("socket", &GenericTaintChecker::postSocket)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000364 .Default(0);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000365
366 // If the callee isn't defined, it is not of security concern.
367 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000368 const ProgramState *State = 0;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000369 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000370 State = (this->*evalFunction)(CE, C);
371 if (!State)
372 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000373
Anna Zaks9ffbe242011-12-17 00:26:34 +0000374 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000375}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000376
Anna Zaks9f03b622012-01-07 02:33:10 +0000377bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
378
379 if (checkUncontrolledFormatString(CE, C))
380 return true;
381
Anna Zaks4e462212012-01-18 02:45:11 +0000382 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
383 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks8568ee72012-01-14 02:48:40 +0000384 if (Name.empty())
385 return false;
386
387 if (checkSystemCall(CE, Name, C))
388 return true;
389
Anna Zaks4e462212012-01-18 02:45:11 +0000390 if (checkTaintedBufferSize(CE, FDecl, C))
391 return true;
392
Anna Zaks9f03b622012-01-07 02:33:10 +0000393 return false;
394}
395
Anna Zaks8f4caf52011-11-18 02:26:36 +0000396SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
Anna Zaks7cdfe292012-01-18 02:45:13 +0000397 const Expr* Arg) {
Anna Zaks8f4caf52011-11-18 02:26:36 +0000398 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000399 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zaksd3d85482011-12-16 18:28:50 +0000400 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000401 return 0;
402
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000403 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000404 if (!AddrLoc)
Anna Zaks8f4caf52011-11-18 02:26:36 +0000405 return 0;
406
Anna Zaks71d29092012-01-13 00:56:51 +0000407 const PointerType *ArgTy =
408 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
Anna Zaks665b0022012-01-21 06:59:01 +0000409 SVal Val = State->getSVal(*AddrLoc,
410 ArgTy ? ArgTy->getPointeeType(): QualType());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000411 return Val.getAsSymbol();
412}
413
Anna Zaks022b3f42012-01-17 00:37:02 +0000414const ProgramState *
Anna Zaks7cdfe292012-01-18 02:45:13 +0000415GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
416 CheckerContext &C) const {
Anna Zaks022b3f42012-01-17 00:37:02 +0000417 const ProgramState *State = C.getState();
418
419 // Check for taint in arguments.
420 bool IsTainted = false;
Anna Zaks7cdfe292012-01-18 02:45:13 +0000421 for (ArgVector::const_iterator I = SrcArgs.begin(),
422 E = SrcArgs.end(); I != E; ++I) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000423 unsigned ArgNum = *I;
424
425 if (ArgNum == InvalidArgIndex) {
Anna Zaks9b0c7492012-01-18 02:45:07 +0000426 // Check if any of the arguments is tainted, but skip the
427 // destination arguments.
428 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
Anna Zaks7cdfe292012-01-18 02:45:13 +0000429 if (isDestinationArgument(i))
Anna Zaks9b0c7492012-01-18 02:45:07 +0000430 continue;
Anna Zaks7cdfe292012-01-18 02:45:13 +0000431 if ((IsTainted =
432 GenericTaintChecker::isTaintedOrPointsToTainted(CE->getArg(i),
433 State, C)))
Anna Zaks022b3f42012-01-17 00:37:02 +0000434 break;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000435 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000436 break;
437 }
438
439 assert(ArgNum < CE->getNumArgs());
Anna Zaks7cdfe292012-01-18 02:45:13 +0000440 if ((IsTainted =
441 GenericTaintChecker::isTaintedOrPointsToTainted(CE->getArg(ArgNum),
442 State, C)))
Anna Zaks022b3f42012-01-17 00:37:02 +0000443 break;
444 }
445 if (!IsTainted)
446 return State;
447
448 // Mark the arguments which should be tainted after the function returns.
Anna Zaks7cdfe292012-01-18 02:45:13 +0000449 for (ArgVector::const_iterator I = DstArgs.begin(),
450 E = DstArgs.end(); I != E; ++I) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000451 unsigned ArgNum = *I;
452
453 // Should we mark all arguments as tainted?
454 if (ArgNum == InvalidArgIndex) {
455 // For all pointer and references that were passed in:
456 // If they are not pointing to const data, mark data as tainted.
457 // TODO: So far we are just going one level down; ideally we'd need to
458 // recurse here.
459 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
460 const Expr *Arg = CE->getArg(i);
461 // Process pointer argument.
462 const Type *ArgTy = Arg->getType().getTypePtr();
463 QualType PType = ArgTy->getPointeeType();
464 if ((!PType.isNull() && !PType.isConstQualified())
465 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
466 State = State->add<TaintArgsOnPostVisit>(i);
467 }
468 continue;
469 }
470
471 // Should mark the return value?
472 if (ArgNum == ReturnValueIndex) {
473 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
474 continue;
475 }
476
477 // Mark the given argument.
478 assert(ArgNum < CE->getNumArgs());
479 State = State->add<TaintArgsOnPostVisit>(ArgNum);
480 }
481
482 return State;
483}
484
485
Anna Zaks1fb826a2012-01-12 02:22:34 +0000486// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
487// and arg 1 should get taint.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000488const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
489 CheckerContext &C) const {
490 assert(CE->getNumArgs() >= 2);
491 const ProgramState *State = C.getState();
492
493 // Check is the file descriptor is tainted.
Ted Kremenek5eca4822012-01-06 22:09:28 +0000494 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
Anna Zaks1fb826a2012-01-12 02:22:34 +0000495 isStdin(CE->getArg(0), C)) {
496 // All arguments except for the first two should get taint.
497 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
498 State = State->add<TaintArgsOnPostVisit>(i);
499 return State;
500 }
501
Anna Zaks9ffbe242011-12-17 00:26:34 +0000502 return 0;
503}
504
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000505
506// If argument 0(protocol domain) is network, the return value should get taint.
507const ProgramState *GenericTaintChecker::postSocket(const CallExpr *CE,
508 CheckerContext &C) const {
509 assert(CE->getNumArgs() >= 3);
510 const ProgramState *State = C.getState();
511
512 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
513 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
514 // White list the internal communication protocols.
515 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
516 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
517 return State;
518 State = State->addTaint(CE, C.getLocationContext());
519 return State;
520}
521
Anna Zaks9ffbe242011-12-17 00:26:34 +0000522const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
523 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000524 const ProgramState *State = C.getState();
Anna Zaks1009ac72011-12-14 00:56:02 +0000525 assert(CE->getNumArgs() >= 2);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000526 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000527 // All arguments except for the very first one should get taint.
528 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
529 // The arguments are pointer arguments. The data they are pointing at is
530 // tainted after the call.
531 const Expr* Arg = CE->getArg(i);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000532 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaks1009ac72011-12-14 00:56:02 +0000533 if (Sym)
534 State = State->addTaint(Sym);
535 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000536 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000537}
538
Anna Zaks9ffbe242011-12-17 00:26:34 +0000539const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
540 CheckerContext &C) const {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000541 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000542}
543
Anna Zaksefd69892011-12-14 00:56:18 +0000544bool GenericTaintChecker::isStdin(const Expr *E,
545 CheckerContext &C) const {
Anna Zaksd3d85482011-12-16 18:28:50 +0000546 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000547 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaksefd69892011-12-14 00:56:18 +0000548
Anna Zaksd3d85482011-12-16 18:28:50 +0000549 // stdin is a pointer, so it would be a region.
550 const MemRegion *MemReg = Val.getAsRegion();
551
552 // The region should be symbolic, we do not know it's value.
553 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
554 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000555 return false;
556
Anna Zaksd3d85482011-12-16 18:28:50 +0000557 // Get it's symbol and find the declaration region it's pointing to.
558 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
559 if (!Sm)
560 return false;
561 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
562 if (!DeclReg)
563 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000564
Anna Zaksd3d85482011-12-16 18:28:50 +0000565 // This region corresponds to a declaration, find out if it's a global/extern
566 // variable named stdin with the proper type.
567 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
568 D = D->getCanonicalDecl();
569 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
570 if (const PointerType * PtrTy =
571 dyn_cast<PointerType>(D->getType().getTypePtr()))
572 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
573 return true;
574 }
Anna Zaksefd69892011-12-14 00:56:18 +0000575 return false;
576}
577
Anna Zaks9f03b622012-01-07 02:33:10 +0000578static bool getPrintfFormatArgumentNum(const CallExpr *CE,
579 const CheckerContext &C,
580 unsigned int &ArgNum) {
581 // Find if the function contains a format string argument.
582 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
583 // vsnprintf, syslog, custom annotated functions.
584 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
585 if (!FDecl)
586 return false;
587 for (specific_attr_iterator<FormatAttr>
588 i = FDecl->specific_attr_begin<FormatAttr>(),
589 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
590
591 const FormatAttr *Format = *i;
592 ArgNum = Format->getFormatIdx() - 1;
593 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
594 return true;
595 }
596
597 // Or if a function is named setproctitle (this is a heuristic).
598 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
599 ArgNum = 0;
600 return true;
601 }
602
603 return false;
604}
605
Anna Zaks8568ee72012-01-14 02:48:40 +0000606bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
607 const char Msg[],
608 CheckerContext &C) const {
609 assert(E);
610
611 // Check for taint.
612 const ProgramState *State = C.getState();
613 if (!State->isTainted(getPointedToSymbol(C, E)) &&
614 !State->isTainted(E, C.getLocationContext()))
615 return false;
616
617 // Generate diagnostic.
618 if (ExplodedNode *N = C.addTransition()) {
619 initBugType();
620 BugReport *report = new BugReport(*BT, Msg, N);
621 report->addRange(E->getSourceRange());
622 C.EmitReport(report);
623 return true;
624 }
625 return false;
626}
627
Anna Zaks9f03b622012-01-07 02:33:10 +0000628bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
629 CheckerContext &C) const{
630 // Check if the function contains a format string argument.
631 unsigned int ArgNum = 0;
632 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
633 return false;
634
635 // If either the format string content or the pointer itself are tainted, warn.
Anna Zaks8568ee72012-01-14 02:48:40 +0000636 if (generateReportIfTainted(CE->getArg(ArgNum),
637 MsgUncontrolledFormatString, C))
638 return true;
639 return false;
640}
641
642bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
643 StringRef Name,
644 CheckerContext &C) const {
645 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
646 .Case("system", 0)
647 .Case("popen", 0)
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000648 .Case("execl", 0)
649 .Case("execle", 0)
650 .Case("execlp", 0)
651 .Case("execv", 0)
652 .Case("execvp", 0)
653 .Case("execvP", 0)
Anna Zaks8568ee72012-01-14 02:48:40 +0000654 .Default(UINT_MAX);
655
656 if (ArgNum == UINT_MAX)
657 return false;
658
659 if (generateReportIfTainted(CE->getArg(ArgNum),
660 MsgSanitizeSystemArgs, C))
661 return true;
662
Anna Zaks9f03b622012-01-07 02:33:10 +0000663 return false;
664}
665
Anna Zaks4e462212012-01-18 02:45:11 +0000666// TODO: Should this check be a part of the CString checker?
667// If yes, should taint be a global setting?
668bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
669 const FunctionDecl *FDecl,
670 CheckerContext &C) const {
671 // If the function has a buffer size argument, set ArgNum.
672 unsigned ArgNum = InvalidArgIndex;
673 unsigned BId = 0;
674 if ( (BId = FDecl->getMemoryFunctionKind()) )
675 switch(BId) {
676 case Builtin::BImemcpy:
677 case Builtin::BImemmove:
678 case Builtin::BIstrncpy:
679 ArgNum = 2;
680 break;
681 case Builtin::BIstrndup:
682 ArgNum = 1;
683 break;
684 default:
685 break;
686 };
687
688 if (ArgNum == InvalidArgIndex) {
689 if (C.isCLibraryFunction(FDecl, "malloc") ||
690 C.isCLibraryFunction(FDecl, "calloc") ||
691 C.isCLibraryFunction(FDecl, "alloca"))
692 ArgNum = 0;
693 else if (C.isCLibraryFunction(FDecl, "memccpy"))
694 ArgNum = 3;
695 else if (C.isCLibraryFunction(FDecl, "realloc"))
696 ArgNum = 1;
697 else if (C.isCLibraryFunction(FDecl, "bcopy"))
698 ArgNum = 2;
699 }
700
701 if (ArgNum != InvalidArgIndex &&
702 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
703 return true;
704
705 return false;
706}
707
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000708void ento::registerGenericTaintChecker(CheckerManager &mgr) {
709 mgr.registerChecker<GenericTaintChecker>();
710}