blob: 3db088f844cac906eb3ffb00195efd4a1fe4c5a8 [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Benjamin Kramer2fa67ef2012-12-01 15:09:41 +000023#include "clang/AST/Attr.h"
Anna Zaks9b0c7492012-01-18 02:45:07 +000024#include "clang/Basic/Builtins.h"
Anna Zaks1fb826a2012-01-12 02:22:34 +000025#include <climits>
Anna Zaksdf18c5a2011-11-16 19:58:13 +000026
27using namespace clang;
28using namespace ento;
29
30namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000031class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000032 check::PreStmt<CallExpr> > {
33public:
Anna Zaks8568ee72012-01-14 02:48:40 +000034 static void *getTag() { static int Tag; return &Tag; }
35
36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
38
39 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000040
Anna Zaks9ffbe242011-12-17 00:26:34 +000041private:
Anna Zaksb9ac30c2012-01-24 19:32:25 +000042 static const unsigned InvalidArgIndex = UINT_MAX;
43 /// Denotes the return vale.
44 static const unsigned ReturnValueIndex = UINT_MAX - 1;
Anna Zaks8568ee72012-01-14 02:48:40 +000045
Dylan Noblesmith6f42b622012-02-05 02:12:40 +000046 mutable OwningPtr<BugType> BT;
Anna Zaks9b0c7492012-01-18 02:45:07 +000047 inline void initBugType() const {
48 if (!BT)
Anna Zaks5fdadf42012-02-22 02:35:58 +000049 BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data"));
Anna Zaks9b0c7492012-01-18 02:45:07 +000050 }
Anna Zaks8f4caf52011-11-18 02:26:36 +000051
Anna Zaks1fb826a2012-01-12 02:22:34 +000052 /// \brief Catch taint related bugs. Check if tainted data is passed to a
53 /// system call etc.
Anna Zaks9f03b622012-01-07 02:33:10 +000054 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
55
Anna Zaks1fb826a2012-01-12 02:22:34 +000056 /// \brief Add taint sources on a pre-visit.
57 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
58
59 /// \brief Propagate taint generated at pre-visit.
60 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
61
62 /// \brief Add taint sources on a post visit.
63 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
64
Anna Zaksb9ac30c2012-01-24 19:32:25 +000065 /// Check if the region the expression evaluates to is the standard input,
66 /// and thus, is tainted.
67 static bool isStdin(const Expr *E, CheckerContext &C);
68
Anna Zaks1fb826a2012-01-12 02:22:34 +000069 /// \brief Given a pointer argument, get the symbol of the value it contains
Anna Zaks8f4caf52011-11-18 02:26:36 +000070 /// (points to).
Anna Zaks7cdfe292012-01-18 02:45:13 +000071 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
Anna Zaksdf18c5a2011-11-16 19:58:13 +000072
Anna Zaks9ffbe242011-12-17 00:26:34 +000073 /// Functions defining the attack surface.
Ted Kremenek8bef8232012-01-26 21:29:00 +000074 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
Anna Zaks9ffbe242011-12-17 00:26:34 +000075 CheckerContext &C) const;
Ted Kremenek8bef8232012-01-26 21:29:00 +000076 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
77 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
78 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000079
80 /// Taint the scanned input if the file is tainted.
Ted Kremenek8bef8232012-01-26 21:29:00 +000081 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000082
Anna Zaks9f03b622012-01-07 02:33:10 +000083 /// Check for CWE-134: Uncontrolled Format String.
Anna Zaks8568ee72012-01-14 02:48:40 +000084 static const char MsgUncontrolledFormatString[];
Anna Zaks9f03b622012-01-07 02:33:10 +000085 bool checkUncontrolledFormatString(const CallExpr *CE,
86 CheckerContext &C) const;
87
Anna Zaks8568ee72012-01-14 02:48:40 +000088 /// Check for:
89 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
90 /// CWE-78, "Failure to Sanitize Data into an OS Command"
91 static const char MsgSanitizeSystemArgs[];
92 bool checkSystemCall(const CallExpr *CE, StringRef Name,
93 CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000094
Anna Zaks4e462212012-01-18 02:45:11 +000095 /// Check if tainted data is used as a buffer size ins strn.. functions,
96 /// and allocators.
97 static const char MsgTaintedBufferSize[];
98 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
99 CheckerContext &C) const;
100
Anna Zaks8568ee72012-01-14 02:48:40 +0000101 /// Generate a report if the expression is tainted or points to tainted data.
102 bool generateReportIfTainted(const Expr *E, const char Msg[],
103 CheckerContext &C) const;
Anna Zaks022b3f42012-01-17 00:37:02 +0000104
105
106 typedef llvm::SmallVector<unsigned, 2> ArgVector;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000107
Anna Zaks022b3f42012-01-17 00:37:02 +0000108 /// \brief A struct used to specify taint propagation rules for a function.
109 ///
110 /// If any of the possible taint source arguments is tainted, all of the
111 /// destination arguments should also be tainted. Use InvalidArgIndex in the
112 /// src list to specify that all of the arguments can introduce taint. Use
113 /// InvalidArgIndex in the dst arguments to signify that all the non-const
114 /// pointer and reference arguments might be tainted on return. If
115 /// ReturnValueIndex is added to the dst list, the return value will be
116 /// tainted.
117 struct TaintPropagationRule {
118 /// List of arguments which can be taint sources and should be checked.
119 ArgVector SrcArgs;
120 /// List of arguments which should be tainted on function return.
121 ArgVector DstArgs;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000122 // TODO: Check if using other data structures would be more optimal.
Anna Zaks022b3f42012-01-17 00:37:02 +0000123
124 TaintPropagationRule() {}
125
Anna Zaks9b0c7492012-01-18 02:45:07 +0000126 TaintPropagationRule(unsigned SArg,
127 unsigned DArg, bool TaintRet = false) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000128 SrcArgs.push_back(SArg);
129 DstArgs.push_back(DArg);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000130 if (TaintRet)
131 DstArgs.push_back(ReturnValueIndex);
Anna Zaks022b3f42012-01-17 00:37:02 +0000132 }
133
Anna Zaks9b0c7492012-01-18 02:45:07 +0000134 TaintPropagationRule(unsigned SArg1, unsigned SArg2,
135 unsigned DArg, bool TaintRet = false) {
136 SrcArgs.push_back(SArg1);
137 SrcArgs.push_back(SArg2);
138 DstArgs.push_back(DArg);
139 if (TaintRet)
140 DstArgs.push_back(ReturnValueIndex);
141 }
142
143 /// Get the propagation rule for a given function.
144 static TaintPropagationRule
145 getTaintPropagationRule(const FunctionDecl *FDecl,
146 StringRef Name,
147 CheckerContext &C);
148
Anna Zaks022b3f42012-01-17 00:37:02 +0000149 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
150 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
151
Anna Zaks9b0c7492012-01-18 02:45:07 +0000152 inline bool isNull() const { return SrcArgs.empty(); }
153
154 inline bool isDestinationArgument(unsigned ArgNum) const {
155 return (std::find(DstArgs.begin(),
156 DstArgs.end(), ArgNum) != DstArgs.end());
157 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000158
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000159 static inline bool isTaintedOrPointsToTainted(const Expr *E,
Ted Kremenek8bef8232012-01-26 21:29:00 +0000160 ProgramStateRef State,
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000161 CheckerContext &C) {
162 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
163 (E->getType().getTypePtr()->isPointerType() &&
164 State->isTainted(getPointedToSymbol(C, E))));
165 }
166
Anna Zaks7cdfe292012-01-18 02:45:13 +0000167 /// \brief Pre-process a function which propagates taint according to the
168 /// taint rule.
Ted Kremenek8bef8232012-01-26 21:29:00 +0000169 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks7cdfe292012-01-18 02:45:13 +0000170
171 };
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000172};
Anna Zaks9b0c7492012-01-18 02:45:07 +0000173
174const unsigned GenericTaintChecker::ReturnValueIndex;
175const unsigned GenericTaintChecker::InvalidArgIndex;
176
Anna Zaks8568ee72012-01-14 02:48:40 +0000177const char GenericTaintChecker::MsgUncontrolledFormatString[] =
Anna Zaks5fdadf42012-02-22 02:35:58 +0000178 "Untrusted data is used as a format string "
179 "(CWE-134: Uncontrolled Format String)";
Anna Zaks8568ee72012-01-14 02:48:40 +0000180
181const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
Anna Zaks5fdadf42012-02-22 02:35:58 +0000182 "Untrusted data is passed to a system call "
Anna Zaks8568ee72012-01-14 02:48:40 +0000183 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
Anna Zaks4e462212012-01-18 02:45:11 +0000184
185const char GenericTaintChecker::MsgTaintedBufferSize[] =
Anna Zaks5fdadf42012-02-22 02:35:58 +0000186 "Untrusted data is used to specify the buffer size "
Anna Zaks4e462212012-01-18 02:45:11 +0000187 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
188 "character data and the null terminator)";
189
190} // end of anonymous namespace
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000191
Anna Zaks1fb826a2012-01-12 02:22:34 +0000192/// A set which is used to pass information from call pre-visit instruction
193/// to the call post-visit. The values are unsigned integers, which are either
194/// ReturnValueIndex, or indexes of the pointer/reference argument, which
195/// points to data, which should be tainted on return.
Jordan Rose166d5022012-11-02 01:54:06 +0000196REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000197
Anna Zaks9b0c7492012-01-18 02:45:07 +0000198GenericTaintChecker::TaintPropagationRule
199GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
200 const FunctionDecl *FDecl,
201 StringRef Name,
202 CheckerContext &C) {
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000203 // TODO: Currently, we might loose precision here: we always mark a return
204 // value as tainted even if it's just a pointer, pointing to tainted data.
205
Anna Zaks9b0c7492012-01-18 02:45:07 +0000206 // Check for exact name match for functions without builtin substitutes.
207 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
208 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
209 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
210 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000211 .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
212 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
213 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
214 .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
215 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
216 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
217 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
218 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000219 .Case("read", TaintPropagationRule(0, 2, 1, true))
220 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000221 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
222 .Case("fgets", TaintPropagationRule(2, 0, true))
223 .Case("getline", TaintPropagationRule(2, 0))
224 .Case("getdelim", TaintPropagationRule(3, 0))
225 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaks9b0c7492012-01-18 02:45:07 +0000226 .Default(TaintPropagationRule());
227
228 if (!Rule.isNull())
229 return Rule;
230
231 // Check if it's one of the memory setting/copying functions.
232 // This check is specialized but faster then calling isCLibraryFunction.
233 unsigned BId = 0;
234 if ( (BId = FDecl->getMemoryFunctionKind()) )
235 switch(BId) {
236 case Builtin::BImemcpy:
237 case Builtin::BImemmove:
238 case Builtin::BIstrncpy:
239 case Builtin::BIstrncat:
240 return TaintPropagationRule(1, 2, 0, true);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000241 case Builtin::BIstrlcpy:
242 case Builtin::BIstrlcat:
243 return TaintPropagationRule(1, 2, 0, false);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000244 case Builtin::BIstrndup:
245 return TaintPropagationRule(0, 1, ReturnValueIndex);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000246
247 default:
248 break;
249 };
250
251 // Process all other functions which could be defined as builtins.
252 if (Rule.isNull()) {
253 if (C.isCLibraryFunction(FDecl, "snprintf") ||
254 C.isCLibraryFunction(FDecl, "sprintf"))
255 return TaintPropagationRule(InvalidArgIndex, 0, true);
256 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
257 C.isCLibraryFunction(FDecl, "stpcpy") ||
258 C.isCLibraryFunction(FDecl, "strcat"))
259 return TaintPropagationRule(1, 0, true);
260 else if (C.isCLibraryFunction(FDecl, "bcopy"))
261 return TaintPropagationRule(0, 2, 1, false);
262 else if (C.isCLibraryFunction(FDecl, "strdup") ||
263 C.isCLibraryFunction(FDecl, "strdupa"))
264 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks4e462212012-01-18 02:45:11 +0000265 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
266 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000267 }
268
269 // Skipping the following functions, since they might be used for cleansing
270 // or smart memory copy:
Benjamin Kramer48d798c2012-06-02 10:20:41 +0000271 // - memccpy - copying until hitting a special character.
Anna Zaks9b0c7492012-01-18 02:45:07 +0000272
273 return TaintPropagationRule();
Anna Zaks8f4caf52011-11-18 02:26:36 +0000274}
275
Anna Zaks9ffbe242011-12-17 00:26:34 +0000276void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
277 CheckerContext &C) const {
Anna Zaks9f03b622012-01-07 02:33:10 +0000278 // Check for errors first.
279 if (checkPre(CE, C))
280 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000281
Anna Zaks9f03b622012-01-07 02:33:10 +0000282 // Add taint second.
Anna Zaks1fb826a2012-01-12 02:22:34 +0000283 addSourcesPre(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000284}
285
286void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
287 CheckerContext &C) const {
Anna Zaks1fb826a2012-01-12 02:22:34 +0000288 if (propagateFromPre(CE, C))
289 return;
290 addSourcesPost(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000291}
292
Anna Zaks1fb826a2012-01-12 02:22:34 +0000293void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
294 CheckerContext &C) const {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000295 ProgramStateRef State = 0;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000296 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
Jordan Rose5ef6e942012-07-10 23:13:01 +0000297 if (!FDecl || FDecl->getKind() != Decl::Function)
298 return;
299
Anna Zaks9b0c7492012-01-18 02:45:07 +0000300 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000301 if (Name.empty())
302 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000303
Anna Zaks9b0c7492012-01-18 02:45:07 +0000304 // First, try generating a propagation rule for this function.
305 TaintPropagationRule Rule =
306 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
Anna Zaks022b3f42012-01-17 00:37:02 +0000307 if (!Rule.isNull()) {
Anna Zaks7cdfe292012-01-18 02:45:13 +0000308 State = Rule.process(CE, C);
Anna Zaks022b3f42012-01-17 00:37:02 +0000309 if (!State)
310 return;
311 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000312 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000313 }
314
Anna Zaks9b0c7492012-01-18 02:45:07 +0000315 // Otherwise, check if we have custom pre-processing implemented.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000316 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000317 .Case("fscanf", &GenericTaintChecker::preFscanf)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000318 .Default(0);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000319 // Check and evaluate the call.
320 if (evalFunction)
321 State = (this->*evalFunction)(CE, C);
322 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000323 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000324 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000325
Anna Zaks9ffbe242011-12-17 00:26:34 +0000326}
327
Anna Zaks1fb826a2012-01-12 02:22:34 +0000328bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
329 CheckerContext &C) const {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000330 ProgramStateRef State = C.getState();
Anna Zaks1fb826a2012-01-12 02:22:34 +0000331
332 // Depending on what was tainted at pre-visit, we determined a set of
333 // arguments which should be tainted after the function returns. These are
334 // stored in the state as TaintArgsOnPostVisit set.
Jordan Rose166d5022012-11-02 01:54:06 +0000335 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000336 if (TaintArgs.isEmpty())
337 return false;
338
Anna Zaks1fb826a2012-01-12 02:22:34 +0000339 for (llvm::ImmutableSet<unsigned>::iterator
340 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
341 unsigned ArgNum = *I;
342
343 // Special handling for the tainted return value.
344 if (ArgNum == ReturnValueIndex) {
345 State = State->addTaint(CE, C.getLocationContext());
346 continue;
347 }
348
349 // The arguments are pointer arguments. The data they are pointing at is
350 // tainted after the call.
Anna Zaks259052d2012-04-10 23:41:11 +0000351 if (CE->getNumArgs() < (ArgNum + 1))
352 return false;
Anna Zaks1fb826a2012-01-12 02:22:34 +0000353 const Expr* Arg = CE->getArg(ArgNum);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000354 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000355 if (Sym)
356 State = State->addTaint(Sym);
357 }
358
359 // Clear up the taint info from the state.
360 State = State->remove<TaintArgsOnPostVisit>();
361
362 if (State != C.getState()) {
363 C.addTransition(State);
364 return true;
365 }
366 return false;
367}
368
369void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
370 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000371 // Define the attack surface.
372 // Set the evaluation function by switching on the callee name.
Jordan Rose5ef6e942012-07-10 23:13:01 +0000373 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
374 if (!FDecl || FDecl->getKind() != Decl::Function)
375 return;
376
377 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000378 if (Name.empty())
379 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000380 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000381 .Case("scanf", &GenericTaintChecker::postScanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000382 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000383 .Case("getchar", &GenericTaintChecker::postRetTaint)
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000384 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000385 .Case("getenv", &GenericTaintChecker::postRetTaint)
386 .Case("fopen", &GenericTaintChecker::postRetTaint)
387 .Case("fdopen", &GenericTaintChecker::postRetTaint)
388 .Case("freopen", &GenericTaintChecker::postRetTaint)
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000389 .Case("getch", &GenericTaintChecker::postRetTaint)
390 .Case("wgetch", &GenericTaintChecker::postRetTaint)
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000391 .Case("socket", &GenericTaintChecker::postSocket)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000392 .Default(0);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000393
394 // If the callee isn't defined, it is not of security concern.
395 // Check and evaluate the call.
Ted Kremenek8bef8232012-01-26 21:29:00 +0000396 ProgramStateRef State = 0;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000397 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000398 State = (this->*evalFunction)(CE, C);
399 if (!State)
400 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000401
Anna Zaks9ffbe242011-12-17 00:26:34 +0000402 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000403}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000404
Anna Zaks9f03b622012-01-07 02:33:10 +0000405bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
406
407 if (checkUncontrolledFormatString(CE, C))
408 return true;
409
Anna Zaks4e462212012-01-18 02:45:11 +0000410 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
Jordan Rose5ef6e942012-07-10 23:13:01 +0000411 if (!FDecl || FDecl->getKind() != Decl::Function)
412 return false;
413
Anna Zaks4e462212012-01-18 02:45:11 +0000414 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks8568ee72012-01-14 02:48:40 +0000415 if (Name.empty())
416 return false;
417
418 if (checkSystemCall(CE, Name, C))
419 return true;
420
Anna Zaks4e462212012-01-18 02:45:11 +0000421 if (checkTaintedBufferSize(CE, FDecl, C))
422 return true;
423
Anna Zaks9f03b622012-01-07 02:33:10 +0000424 return false;
425}
426
Anna Zaks8f4caf52011-11-18 02:26:36 +0000427SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
Anna Zaks7cdfe292012-01-18 02:45:13 +0000428 const Expr* Arg) {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000429 ProgramStateRef State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000430 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zaksd3d85482011-12-16 18:28:50 +0000431 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000432 return 0;
433
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000434 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000435 if (!AddrLoc)
Anna Zaks8f4caf52011-11-18 02:26:36 +0000436 return 0;
437
Anna Zaks71d29092012-01-13 00:56:51 +0000438 const PointerType *ArgTy =
439 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
Anna Zaks665b0022012-01-21 06:59:01 +0000440 SVal Val = State->getSVal(*AddrLoc,
441 ArgTy ? ArgTy->getPointeeType(): QualType());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000442 return Val.getAsSymbol();
443}
444
Ted Kremenek8bef8232012-01-26 21:29:00 +0000445ProgramStateRef
Anna Zaks7cdfe292012-01-18 02:45:13 +0000446GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
447 CheckerContext &C) const {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000448 ProgramStateRef State = C.getState();
Anna Zaks022b3f42012-01-17 00:37:02 +0000449
450 // Check for taint in arguments.
451 bool IsTainted = false;
Anna Zaks7cdfe292012-01-18 02:45:13 +0000452 for (ArgVector::const_iterator I = SrcArgs.begin(),
453 E = SrcArgs.end(); I != E; ++I) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000454 unsigned ArgNum = *I;
455
456 if (ArgNum == InvalidArgIndex) {
Anna Zaks9b0c7492012-01-18 02:45:07 +0000457 // Check if any of the arguments is tainted, but skip the
458 // destination arguments.
459 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
Anna Zaks7cdfe292012-01-18 02:45:13 +0000460 if (isDestinationArgument(i))
Anna Zaks9b0c7492012-01-18 02:45:07 +0000461 continue;
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000462 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
Anna Zaks022b3f42012-01-17 00:37:02 +0000463 break;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000464 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000465 break;
466 }
467
Anna Zaks259052d2012-04-10 23:41:11 +0000468 if (CE->getNumArgs() < (ArgNum + 1))
469 return State;
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000470 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
Anna Zaks022b3f42012-01-17 00:37:02 +0000471 break;
472 }
473 if (!IsTainted)
474 return State;
475
476 // Mark the arguments which should be tainted after the function returns.
Anna Zaks7cdfe292012-01-18 02:45:13 +0000477 for (ArgVector::const_iterator I = DstArgs.begin(),
478 E = DstArgs.end(); I != E; ++I) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000479 unsigned ArgNum = *I;
480
481 // Should we mark all arguments as tainted?
482 if (ArgNum == InvalidArgIndex) {
483 // For all pointer and references that were passed in:
484 // If they are not pointing to const data, mark data as tainted.
485 // TODO: So far we are just going one level down; ideally we'd need to
486 // recurse here.
487 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
488 const Expr *Arg = CE->getArg(i);
489 // Process pointer argument.
490 const Type *ArgTy = Arg->getType().getTypePtr();
491 QualType PType = ArgTy->getPointeeType();
492 if ((!PType.isNull() && !PType.isConstQualified())
493 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
494 State = State->add<TaintArgsOnPostVisit>(i);
495 }
496 continue;
497 }
498
499 // Should mark the return value?
500 if (ArgNum == ReturnValueIndex) {
501 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
502 continue;
503 }
504
505 // Mark the given argument.
506 assert(ArgNum < CE->getNumArgs());
507 State = State->add<TaintArgsOnPostVisit>(ArgNum);
508 }
509
510 return State;
511}
512
513
Anna Zaks1fb826a2012-01-12 02:22:34 +0000514// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
515// and arg 1 should get taint.
Ted Kremenek8bef8232012-01-26 21:29:00 +0000516ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
Anna Zaks9ffbe242011-12-17 00:26:34 +0000517 CheckerContext &C) const {
518 assert(CE->getNumArgs() >= 2);
Ted Kremenek8bef8232012-01-26 21:29:00 +0000519 ProgramStateRef State = C.getState();
Anna Zaks9ffbe242011-12-17 00:26:34 +0000520
521 // Check is the file descriptor is tainted.
Ted Kremenek5eca4822012-01-06 22:09:28 +0000522 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
Anna Zaks1fb826a2012-01-12 02:22:34 +0000523 isStdin(CE->getArg(0), C)) {
524 // All arguments except for the first two should get taint.
525 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
526 State = State->add<TaintArgsOnPostVisit>(i);
527 return State;
528 }
529
Anna Zaks9ffbe242011-12-17 00:26:34 +0000530 return 0;
531}
532
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000533
534// If argument 0(protocol domain) is network, the return value should get taint.
Ted Kremenek8bef8232012-01-26 21:29:00 +0000535ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
Anna Zaks259052d2012-04-10 23:41:11 +0000536 CheckerContext &C) const {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000537 ProgramStateRef State = C.getState();
Anna Zaks259052d2012-04-10 23:41:11 +0000538 if (CE->getNumArgs() < 3)
539 return State;
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000540
541 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
542 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
543 // White list the internal communication protocols.
544 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
545 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
546 return State;
547 State = State->addTaint(CE, C.getLocationContext());
548 return State;
549}
550
Ted Kremenek8bef8232012-01-26 21:29:00 +0000551ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
Anna Zaks9ffbe242011-12-17 00:26:34 +0000552 CheckerContext &C) const {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000553 ProgramStateRef State = C.getState();
Anna Zaks259052d2012-04-10 23:41:11 +0000554 if (CE->getNumArgs() < 2)
555 return State;
556
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000557 // All arguments except for the very first one should get taint.
558 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
559 // The arguments are pointer arguments. The data they are pointing at is
560 // tainted after the call.
561 const Expr* Arg = CE->getArg(i);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000562 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaks1009ac72011-12-14 00:56:02 +0000563 if (Sym)
564 State = State->addTaint(Sym);
565 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000566 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000567}
568
Ted Kremenek8bef8232012-01-26 21:29:00 +0000569ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
Anna Zaks259052d2012-04-10 23:41:11 +0000570 CheckerContext &C) const {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000571 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000572}
573
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000574bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000575 ProgramStateRef State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000576 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaksefd69892011-12-14 00:56:18 +0000577
Anna Zaksd3d85482011-12-16 18:28:50 +0000578 // stdin is a pointer, so it would be a region.
579 const MemRegion *MemReg = Val.getAsRegion();
580
581 // The region should be symbolic, we do not know it's value.
582 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
583 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000584 return false;
585
Anna Zaksd3d85482011-12-16 18:28:50 +0000586 // Get it's symbol and find the declaration region it's pointing to.
587 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
588 if (!Sm)
589 return false;
590 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
591 if (!DeclReg)
592 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000593
Anna Zaksd3d85482011-12-16 18:28:50 +0000594 // This region corresponds to a declaration, find out if it's a global/extern
595 // variable named stdin with the proper type.
596 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
597 D = D->getCanonicalDecl();
598 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
599 if (const PointerType * PtrTy =
600 dyn_cast<PointerType>(D->getType().getTypePtr()))
601 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
602 return true;
603 }
Anna Zaksefd69892011-12-14 00:56:18 +0000604 return false;
605}
606
Anna Zaks9f03b622012-01-07 02:33:10 +0000607static bool getPrintfFormatArgumentNum(const CallExpr *CE,
608 const CheckerContext &C,
609 unsigned int &ArgNum) {
610 // Find if the function contains a format string argument.
611 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
612 // vsnprintf, syslog, custom annotated functions.
613 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
614 if (!FDecl)
615 return false;
616 for (specific_attr_iterator<FormatAttr>
617 i = FDecl->specific_attr_begin<FormatAttr>(),
618 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
619
620 const FormatAttr *Format = *i;
621 ArgNum = Format->getFormatIdx() - 1;
622 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
623 return true;
624 }
625
626 // Or if a function is named setproctitle (this is a heuristic).
627 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
628 ArgNum = 0;
629 return true;
630 }
631
632 return false;
633}
634
Anna Zaks8568ee72012-01-14 02:48:40 +0000635bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
636 const char Msg[],
637 CheckerContext &C) const {
638 assert(E);
639
640 // Check for taint.
Ted Kremenek8bef8232012-01-26 21:29:00 +0000641 ProgramStateRef State = C.getState();
Anna Zaks8568ee72012-01-14 02:48:40 +0000642 if (!State->isTainted(getPointedToSymbol(C, E)) &&
643 !State->isTainted(E, C.getLocationContext()))
644 return false;
645
646 // Generate diagnostic.
647 if (ExplodedNode *N = C.addTransition()) {
648 initBugType();
649 BugReport *report = new BugReport(*BT, Msg, N);
650 report->addRange(E->getSourceRange());
Jordan Rose785950e2012-11-02 01:53:40 +0000651 C.emitReport(report);
Anna Zaks8568ee72012-01-14 02:48:40 +0000652 return true;
653 }
654 return false;
655}
656
Anna Zaks9f03b622012-01-07 02:33:10 +0000657bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
658 CheckerContext &C) const{
659 // Check if the function contains a format string argument.
660 unsigned int ArgNum = 0;
661 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
662 return false;
663
664 // If either the format string content or the pointer itself are tainted, warn.
Anna Zaks8568ee72012-01-14 02:48:40 +0000665 if (generateReportIfTainted(CE->getArg(ArgNum),
666 MsgUncontrolledFormatString, C))
667 return true;
668 return false;
669}
670
671bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
672 StringRef Name,
673 CheckerContext &C) const {
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000674 // TODO: It might make sense to run this check on demand. In some cases,
675 // we should check if the environment has been cleansed here. We also might
676 // need to know if the user was reset before these calls(seteuid).
Anna Zaks8568ee72012-01-14 02:48:40 +0000677 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
678 .Case("system", 0)
679 .Case("popen", 0)
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000680 .Case("execl", 0)
681 .Case("execle", 0)
682 .Case("execlp", 0)
683 .Case("execv", 0)
684 .Case("execvp", 0)
685 .Case("execvP", 0)
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000686 .Case("execve", 0)
687 .Case("dlopen", 0)
Anna Zaks8568ee72012-01-14 02:48:40 +0000688 .Default(UINT_MAX);
689
Anna Zaks259052d2012-04-10 23:41:11 +0000690 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
Anna Zaks8568ee72012-01-14 02:48:40 +0000691 return false;
692
693 if (generateReportIfTainted(CE->getArg(ArgNum),
694 MsgSanitizeSystemArgs, C))
695 return true;
696
Anna Zaks9f03b622012-01-07 02:33:10 +0000697 return false;
698}
699
Anna Zaks4e462212012-01-18 02:45:11 +0000700// TODO: Should this check be a part of the CString checker?
701// If yes, should taint be a global setting?
702bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
703 const FunctionDecl *FDecl,
704 CheckerContext &C) const {
705 // If the function has a buffer size argument, set ArgNum.
706 unsigned ArgNum = InvalidArgIndex;
707 unsigned BId = 0;
708 if ( (BId = FDecl->getMemoryFunctionKind()) )
709 switch(BId) {
710 case Builtin::BImemcpy:
711 case Builtin::BImemmove:
712 case Builtin::BIstrncpy:
713 ArgNum = 2;
714 break;
715 case Builtin::BIstrndup:
716 ArgNum = 1;
717 break;
718 default:
719 break;
720 };
721
722 if (ArgNum == InvalidArgIndex) {
723 if (C.isCLibraryFunction(FDecl, "malloc") ||
724 C.isCLibraryFunction(FDecl, "calloc") ||
725 C.isCLibraryFunction(FDecl, "alloca"))
726 ArgNum = 0;
727 else if (C.isCLibraryFunction(FDecl, "memccpy"))
728 ArgNum = 3;
729 else if (C.isCLibraryFunction(FDecl, "realloc"))
730 ArgNum = 1;
731 else if (C.isCLibraryFunction(FDecl, "bcopy"))
732 ArgNum = 2;
733 }
734
Anna Zaks259052d2012-04-10 23:41:11 +0000735 if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
Anna Zaks4e462212012-01-18 02:45:11 +0000736 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
737 return true;
738
739 return false;
740}
741
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000742void ento::registerGenericTaintChecker(CheckerManager &mgr) {
743 mgr.registerChecker<GenericTaintChecker>();
744}