blob: a9e02173c3a972213b0f9d27c77955f15ec152bd [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Anna Zaks9b0c7492012-01-18 02:45:07 +000023#include "clang/Basic/Builtins.h"
Anna Zaks1fb826a2012-01-12 02:22:34 +000024#include <climits>
Anna Zaksdf18c5a2011-11-16 19:58:13 +000025
26using namespace clang;
27using namespace ento;
28
29namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000030class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000031 check::PreStmt<CallExpr> > {
32public:
Anna Zaks8568ee72012-01-14 02:48:40 +000033 static void *getTag() { static int Tag; return &Tag; }
34
35 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
36 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
37
38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000039
Anna Zaks9ffbe242011-12-17 00:26:34 +000040private:
Anna Zaksb9ac30c2012-01-24 19:32:25 +000041 static const unsigned InvalidArgIndex = UINT_MAX;
42 /// Denotes the return vale.
43 static const unsigned ReturnValueIndex = UINT_MAX - 1;
Anna Zaks8568ee72012-01-14 02:48:40 +000044
Dylan Noblesmith6f42b622012-02-05 02:12:40 +000045 mutable OwningPtr<BugType> BT;
Anna Zaks9b0c7492012-01-18 02:45:07 +000046 inline void initBugType() const {
47 if (!BT)
Anna Zaks5fdadf42012-02-22 02:35:58 +000048 BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data"));
Anna Zaks9b0c7492012-01-18 02:45:07 +000049 }
Anna Zaks8f4caf52011-11-18 02:26:36 +000050
Anna Zaks1fb826a2012-01-12 02:22:34 +000051 /// \brief Catch taint related bugs. Check if tainted data is passed to a
52 /// system call etc.
Anna Zaks9f03b622012-01-07 02:33:10 +000053 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
Anna Zaks1fb826a2012-01-12 02:22:34 +000055 /// \brief Add taint sources on a pre-visit.
56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57
58 /// \brief Propagate taint generated at pre-visit.
59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60
61 /// \brief Add taint sources on a post visit.
62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63
Anna Zaksb9ac30c2012-01-24 19:32:25 +000064 /// Check if the region the expression evaluates to is the standard input,
65 /// and thus, is tainted.
66 static bool isStdin(const Expr *E, CheckerContext &C);
67
Anna Zaks1fb826a2012-01-12 02:22:34 +000068 /// \brief Given a pointer argument, get the symbol of the value it contains
Anna Zaks8f4caf52011-11-18 02:26:36 +000069 /// (points to).
Anna Zaks7cdfe292012-01-18 02:45:13 +000070 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
Anna Zaksdf18c5a2011-11-16 19:58:13 +000071
Anna Zaks9ffbe242011-12-17 00:26:34 +000072 /// Functions defining the attack surface.
Ted Kremenek8bef8232012-01-26 21:29:00 +000073 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
Anna Zaks9ffbe242011-12-17 00:26:34 +000074 CheckerContext &C) const;
Ted Kremenek8bef8232012-01-26 21:29:00 +000075 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000078
79 /// Taint the scanned input if the file is tainted.
Ted Kremenek8bef8232012-01-26 21:29:00 +000080 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000081
Anna Zaks9f03b622012-01-07 02:33:10 +000082 /// Check for CWE-134: Uncontrolled Format String.
Anna Zaks8568ee72012-01-14 02:48:40 +000083 static const char MsgUncontrolledFormatString[];
Anna Zaks9f03b622012-01-07 02:33:10 +000084 bool checkUncontrolledFormatString(const CallExpr *CE,
85 CheckerContext &C) const;
86
Anna Zaks8568ee72012-01-14 02:48:40 +000087 /// Check for:
88 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89 /// CWE-78, "Failure to Sanitize Data into an OS Command"
90 static const char MsgSanitizeSystemArgs[];
91 bool checkSystemCall(const CallExpr *CE, StringRef Name,
92 CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000093
Anna Zaks4e462212012-01-18 02:45:11 +000094 /// Check if tainted data is used as a buffer size ins strn.. functions,
95 /// and allocators.
96 static const char MsgTaintedBufferSize[];
97 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98 CheckerContext &C) const;
99
Anna Zaks8568ee72012-01-14 02:48:40 +0000100 /// Generate a report if the expression is tainted or points to tainted data.
101 bool generateReportIfTainted(const Expr *E, const char Msg[],
102 CheckerContext &C) const;
Anna Zaks022b3f42012-01-17 00:37:02 +0000103
104
105 typedef llvm::SmallVector<unsigned, 2> ArgVector;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000106
Anna Zaks022b3f42012-01-17 00:37:02 +0000107 /// \brief A struct used to specify taint propagation rules for a function.
108 ///
109 /// If any of the possible taint source arguments is tainted, all of the
110 /// destination arguments should also be tainted. Use InvalidArgIndex in the
111 /// src list to specify that all of the arguments can introduce taint. Use
112 /// InvalidArgIndex in the dst arguments to signify that all the non-const
113 /// pointer and reference arguments might be tainted on return. If
114 /// ReturnValueIndex is added to the dst list, the return value will be
115 /// tainted.
116 struct TaintPropagationRule {
117 /// List of arguments which can be taint sources and should be checked.
118 ArgVector SrcArgs;
119 /// List of arguments which should be tainted on function return.
120 ArgVector DstArgs;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000121 // TODO: Check if using other data structures would be more optimal.
Anna Zaks022b3f42012-01-17 00:37:02 +0000122
123 TaintPropagationRule() {}
124
Anna Zaks9b0c7492012-01-18 02:45:07 +0000125 TaintPropagationRule(unsigned SArg,
126 unsigned DArg, bool TaintRet = false) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000127 SrcArgs.push_back(SArg);
128 DstArgs.push_back(DArg);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000129 if (TaintRet)
130 DstArgs.push_back(ReturnValueIndex);
Anna Zaks022b3f42012-01-17 00:37:02 +0000131 }
132
Anna Zaks9b0c7492012-01-18 02:45:07 +0000133 TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134 unsigned DArg, bool TaintRet = false) {
135 SrcArgs.push_back(SArg1);
136 SrcArgs.push_back(SArg2);
137 DstArgs.push_back(DArg);
138 if (TaintRet)
139 DstArgs.push_back(ReturnValueIndex);
140 }
141
142 /// Get the propagation rule for a given function.
143 static TaintPropagationRule
144 getTaintPropagationRule(const FunctionDecl *FDecl,
145 StringRef Name,
146 CheckerContext &C);
147
Anna Zaks022b3f42012-01-17 00:37:02 +0000148 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
149 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
150
Anna Zaks9b0c7492012-01-18 02:45:07 +0000151 inline bool isNull() const { return SrcArgs.empty(); }
152
153 inline bool isDestinationArgument(unsigned ArgNum) const {
154 return (std::find(DstArgs.begin(),
155 DstArgs.end(), ArgNum) != DstArgs.end());
156 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000157
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000158 static inline bool isTaintedOrPointsToTainted(const Expr *E,
Ted Kremenek8bef8232012-01-26 21:29:00 +0000159 ProgramStateRef State,
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000160 CheckerContext &C) {
161 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162 (E->getType().getTypePtr()->isPointerType() &&
163 State->isTainted(getPointedToSymbol(C, E))));
164 }
165
Anna Zaks7cdfe292012-01-18 02:45:13 +0000166 /// \brief Pre-process a function which propagates taint according to the
167 /// taint rule.
Ted Kremenek8bef8232012-01-26 21:29:00 +0000168 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks7cdfe292012-01-18 02:45:13 +0000169
170 };
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000171};
Anna Zaks9b0c7492012-01-18 02:45:07 +0000172
173const unsigned GenericTaintChecker::ReturnValueIndex;
174const unsigned GenericTaintChecker::InvalidArgIndex;
175
Anna Zaks8568ee72012-01-14 02:48:40 +0000176const char GenericTaintChecker::MsgUncontrolledFormatString[] =
Anna Zaks5fdadf42012-02-22 02:35:58 +0000177 "Untrusted data is used as a format string "
178 "(CWE-134: Uncontrolled Format String)";
Anna Zaks8568ee72012-01-14 02:48:40 +0000179
180const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
Anna Zaks5fdadf42012-02-22 02:35:58 +0000181 "Untrusted data is passed to a system call "
Anna Zaks8568ee72012-01-14 02:48:40 +0000182 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
Anna Zaks4e462212012-01-18 02:45:11 +0000183
184const char GenericTaintChecker::MsgTaintedBufferSize[] =
Anna Zaks5fdadf42012-02-22 02:35:58 +0000185 "Untrusted data is used to specify the buffer size "
Anna Zaks4e462212012-01-18 02:45:11 +0000186 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
187 "character data and the null terminator)";
188
189} // end of anonymous namespace
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000190
Anna Zaks1fb826a2012-01-12 02:22:34 +0000191/// A set which is used to pass information from call pre-visit instruction
192/// to the call post-visit. The values are unsigned integers, which are either
193/// ReturnValueIndex, or indexes of the pointer/reference argument, which
194/// points to data, which should be tainted on return.
Jordan Rose166d5022012-11-02 01:54:06 +0000195REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000196
Anna Zaks9b0c7492012-01-18 02:45:07 +0000197GenericTaintChecker::TaintPropagationRule
198GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
199 const FunctionDecl *FDecl,
200 StringRef Name,
201 CheckerContext &C) {
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000202 // TODO: Currently, we might loose precision here: we always mark a return
203 // value as tainted even if it's just a pointer, pointing to tainted data.
204
Anna Zaks9b0c7492012-01-18 02:45:07 +0000205 // Check for exact name match for functions without builtin substitutes.
206 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
207 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
208 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
209 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000210 .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
211 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
212 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
213 .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
214 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
215 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
216 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
217 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000218 .Case("read", TaintPropagationRule(0, 2, 1, true))
219 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000220 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
221 .Case("fgets", TaintPropagationRule(2, 0, true))
222 .Case("getline", TaintPropagationRule(2, 0))
223 .Case("getdelim", TaintPropagationRule(3, 0))
224 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaks9b0c7492012-01-18 02:45:07 +0000225 .Default(TaintPropagationRule());
226
227 if (!Rule.isNull())
228 return Rule;
229
230 // Check if it's one of the memory setting/copying functions.
231 // This check is specialized but faster then calling isCLibraryFunction.
232 unsigned BId = 0;
233 if ( (BId = FDecl->getMemoryFunctionKind()) )
234 switch(BId) {
235 case Builtin::BImemcpy:
236 case Builtin::BImemmove:
237 case Builtin::BIstrncpy:
238 case Builtin::BIstrncat:
239 return TaintPropagationRule(1, 2, 0, true);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000240 case Builtin::BIstrlcpy:
241 case Builtin::BIstrlcat:
242 return TaintPropagationRule(1, 2, 0, false);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000243 case Builtin::BIstrndup:
244 return TaintPropagationRule(0, 1, ReturnValueIndex);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000245
246 default:
247 break;
248 };
249
250 // Process all other functions which could be defined as builtins.
251 if (Rule.isNull()) {
252 if (C.isCLibraryFunction(FDecl, "snprintf") ||
253 C.isCLibraryFunction(FDecl, "sprintf"))
254 return TaintPropagationRule(InvalidArgIndex, 0, true);
255 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
256 C.isCLibraryFunction(FDecl, "stpcpy") ||
257 C.isCLibraryFunction(FDecl, "strcat"))
258 return TaintPropagationRule(1, 0, true);
259 else if (C.isCLibraryFunction(FDecl, "bcopy"))
260 return TaintPropagationRule(0, 2, 1, false);
261 else if (C.isCLibraryFunction(FDecl, "strdup") ||
262 C.isCLibraryFunction(FDecl, "strdupa"))
263 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks4e462212012-01-18 02:45:11 +0000264 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
265 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000266 }
267
268 // Skipping the following functions, since they might be used for cleansing
269 // or smart memory copy:
Benjamin Kramer48d798c2012-06-02 10:20:41 +0000270 // - memccpy - copying until hitting a special character.
Anna Zaks9b0c7492012-01-18 02:45:07 +0000271
272 return TaintPropagationRule();
Anna Zaks8f4caf52011-11-18 02:26:36 +0000273}
274
Anna Zaks9ffbe242011-12-17 00:26:34 +0000275void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
276 CheckerContext &C) const {
Anna Zaks9f03b622012-01-07 02:33:10 +0000277 // Check for errors first.
278 if (checkPre(CE, C))
279 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000280
Anna Zaks9f03b622012-01-07 02:33:10 +0000281 // Add taint second.
Anna Zaks1fb826a2012-01-12 02:22:34 +0000282 addSourcesPre(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000283}
284
285void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
286 CheckerContext &C) const {
Anna Zaks1fb826a2012-01-12 02:22:34 +0000287 if (propagateFromPre(CE, C))
288 return;
289 addSourcesPost(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000290}
291
Anna Zaks1fb826a2012-01-12 02:22:34 +0000292void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
293 CheckerContext &C) const {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000294 ProgramStateRef State = 0;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000295 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
Jordan Rose5ef6e942012-07-10 23:13:01 +0000296 if (!FDecl || FDecl->getKind() != Decl::Function)
297 return;
298
Anna Zaks9b0c7492012-01-18 02:45:07 +0000299 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000300 if (Name.empty())
301 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000302
Anna Zaks9b0c7492012-01-18 02:45:07 +0000303 // First, try generating a propagation rule for this function.
304 TaintPropagationRule Rule =
305 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
Anna Zaks022b3f42012-01-17 00:37:02 +0000306 if (!Rule.isNull()) {
Anna Zaks7cdfe292012-01-18 02:45:13 +0000307 State = Rule.process(CE, C);
Anna Zaks022b3f42012-01-17 00:37:02 +0000308 if (!State)
309 return;
310 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000311 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000312 }
313
Anna Zaks9b0c7492012-01-18 02:45:07 +0000314 // Otherwise, check if we have custom pre-processing implemented.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000315 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000316 .Case("fscanf", &GenericTaintChecker::preFscanf)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000317 .Default(0);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000318 // Check and evaluate the call.
319 if (evalFunction)
320 State = (this->*evalFunction)(CE, C);
321 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000322 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000323 C.addTransition(State);
Anna Zaks9b0c7492012-01-18 02:45:07 +0000324
Anna Zaks9ffbe242011-12-17 00:26:34 +0000325}
326
Anna Zaks1fb826a2012-01-12 02:22:34 +0000327bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
328 CheckerContext &C) const {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000329 ProgramStateRef State = C.getState();
Anna Zaks1fb826a2012-01-12 02:22:34 +0000330
331 // Depending on what was tainted at pre-visit, we determined a set of
332 // arguments which should be tainted after the function returns. These are
333 // stored in the state as TaintArgsOnPostVisit set.
Jordan Rose166d5022012-11-02 01:54:06 +0000334 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000335 if (TaintArgs.isEmpty())
336 return false;
337
Anna Zaks1fb826a2012-01-12 02:22:34 +0000338 for (llvm::ImmutableSet<unsigned>::iterator
339 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
340 unsigned ArgNum = *I;
341
342 // Special handling for the tainted return value.
343 if (ArgNum == ReturnValueIndex) {
344 State = State->addTaint(CE, C.getLocationContext());
345 continue;
346 }
347
348 // The arguments are pointer arguments. The data they are pointing at is
349 // tainted after the call.
Anna Zaks259052d2012-04-10 23:41:11 +0000350 if (CE->getNumArgs() < (ArgNum + 1))
351 return false;
Anna Zaks1fb826a2012-01-12 02:22:34 +0000352 const Expr* Arg = CE->getArg(ArgNum);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000353 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000354 if (Sym)
355 State = State->addTaint(Sym);
356 }
357
358 // Clear up the taint info from the state.
359 State = State->remove<TaintArgsOnPostVisit>();
360
361 if (State != C.getState()) {
362 C.addTransition(State);
363 return true;
364 }
365 return false;
366}
367
368void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
369 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000370 // Define the attack surface.
371 // Set the evaluation function by switching on the callee name.
Jordan Rose5ef6e942012-07-10 23:13:01 +0000372 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
373 if (!FDecl || FDecl->getKind() != Decl::Function)
374 return;
375
376 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000377 if (Name.empty())
378 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000379 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000380 .Case("scanf", &GenericTaintChecker::postScanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000381 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000382 .Case("getchar", &GenericTaintChecker::postRetTaint)
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000383 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000384 .Case("getenv", &GenericTaintChecker::postRetTaint)
385 .Case("fopen", &GenericTaintChecker::postRetTaint)
386 .Case("fdopen", &GenericTaintChecker::postRetTaint)
387 .Case("freopen", &GenericTaintChecker::postRetTaint)
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000388 .Case("getch", &GenericTaintChecker::postRetTaint)
389 .Case("wgetch", &GenericTaintChecker::postRetTaint)
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000390 .Case("socket", &GenericTaintChecker::postSocket)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000391 .Default(0);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000392
393 // If the callee isn't defined, it is not of security concern.
394 // Check and evaluate the call.
Ted Kremenek8bef8232012-01-26 21:29:00 +0000395 ProgramStateRef State = 0;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000396 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000397 State = (this->*evalFunction)(CE, C);
398 if (!State)
399 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000400
Anna Zaks9ffbe242011-12-17 00:26:34 +0000401 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000402}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000403
Anna Zaks9f03b622012-01-07 02:33:10 +0000404bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
405
406 if (checkUncontrolledFormatString(CE, C))
407 return true;
408
Anna Zaks4e462212012-01-18 02:45:11 +0000409 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
Jordan Rose5ef6e942012-07-10 23:13:01 +0000410 if (!FDecl || FDecl->getKind() != Decl::Function)
411 return false;
412
Anna Zaks4e462212012-01-18 02:45:11 +0000413 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks8568ee72012-01-14 02:48:40 +0000414 if (Name.empty())
415 return false;
416
417 if (checkSystemCall(CE, Name, C))
418 return true;
419
Anna Zaks4e462212012-01-18 02:45:11 +0000420 if (checkTaintedBufferSize(CE, FDecl, C))
421 return true;
422
Anna Zaks9f03b622012-01-07 02:33:10 +0000423 return false;
424}
425
Anna Zaks8f4caf52011-11-18 02:26:36 +0000426SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
Anna Zaks7cdfe292012-01-18 02:45:13 +0000427 const Expr* Arg) {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000428 ProgramStateRef State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000429 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zaksd3d85482011-12-16 18:28:50 +0000430 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000431 return 0;
432
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000433 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000434 if (!AddrLoc)
Anna Zaks8f4caf52011-11-18 02:26:36 +0000435 return 0;
436
Anna Zaks71d29092012-01-13 00:56:51 +0000437 const PointerType *ArgTy =
438 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
Anna Zaks665b0022012-01-21 06:59:01 +0000439 SVal Val = State->getSVal(*AddrLoc,
440 ArgTy ? ArgTy->getPointeeType(): QualType());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000441 return Val.getAsSymbol();
442}
443
Ted Kremenek8bef8232012-01-26 21:29:00 +0000444ProgramStateRef
Anna Zaks7cdfe292012-01-18 02:45:13 +0000445GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
446 CheckerContext &C) const {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000447 ProgramStateRef State = C.getState();
Anna Zaks022b3f42012-01-17 00:37:02 +0000448
449 // Check for taint in arguments.
450 bool IsTainted = false;
Anna Zaks7cdfe292012-01-18 02:45:13 +0000451 for (ArgVector::const_iterator I = SrcArgs.begin(),
452 E = SrcArgs.end(); I != E; ++I) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000453 unsigned ArgNum = *I;
454
455 if (ArgNum == InvalidArgIndex) {
Anna Zaks9b0c7492012-01-18 02:45:07 +0000456 // Check if any of the arguments is tainted, but skip the
457 // destination arguments.
458 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
Anna Zaks7cdfe292012-01-18 02:45:13 +0000459 if (isDestinationArgument(i))
Anna Zaks9b0c7492012-01-18 02:45:07 +0000460 continue;
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000461 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
Anna Zaks022b3f42012-01-17 00:37:02 +0000462 break;
Anna Zaks9b0c7492012-01-18 02:45:07 +0000463 }
Anna Zaks022b3f42012-01-17 00:37:02 +0000464 break;
465 }
466
Anna Zaks259052d2012-04-10 23:41:11 +0000467 if (CE->getNumArgs() < (ArgNum + 1))
468 return State;
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000469 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
Anna Zaks022b3f42012-01-17 00:37:02 +0000470 break;
471 }
472 if (!IsTainted)
473 return State;
474
475 // Mark the arguments which should be tainted after the function returns.
Anna Zaks7cdfe292012-01-18 02:45:13 +0000476 for (ArgVector::const_iterator I = DstArgs.begin(),
477 E = DstArgs.end(); I != E; ++I) {
Anna Zaks022b3f42012-01-17 00:37:02 +0000478 unsigned ArgNum = *I;
479
480 // Should we mark all arguments as tainted?
481 if (ArgNum == InvalidArgIndex) {
482 // For all pointer and references that were passed in:
483 // If they are not pointing to const data, mark data as tainted.
484 // TODO: So far we are just going one level down; ideally we'd need to
485 // recurse here.
486 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
487 const Expr *Arg = CE->getArg(i);
488 // Process pointer argument.
489 const Type *ArgTy = Arg->getType().getTypePtr();
490 QualType PType = ArgTy->getPointeeType();
491 if ((!PType.isNull() && !PType.isConstQualified())
492 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
493 State = State->add<TaintArgsOnPostVisit>(i);
494 }
495 continue;
496 }
497
498 // Should mark the return value?
499 if (ArgNum == ReturnValueIndex) {
500 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
501 continue;
502 }
503
504 // Mark the given argument.
505 assert(ArgNum < CE->getNumArgs());
506 State = State->add<TaintArgsOnPostVisit>(ArgNum);
507 }
508
509 return State;
510}
511
512
Anna Zaks1fb826a2012-01-12 02:22:34 +0000513// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
514// and arg 1 should get taint.
Ted Kremenek8bef8232012-01-26 21:29:00 +0000515ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
Anna Zaks9ffbe242011-12-17 00:26:34 +0000516 CheckerContext &C) const {
517 assert(CE->getNumArgs() >= 2);
Ted Kremenek8bef8232012-01-26 21:29:00 +0000518 ProgramStateRef State = C.getState();
Anna Zaks9ffbe242011-12-17 00:26:34 +0000519
520 // Check is the file descriptor is tainted.
Ted Kremenek5eca4822012-01-06 22:09:28 +0000521 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
Anna Zaks1fb826a2012-01-12 02:22:34 +0000522 isStdin(CE->getArg(0), C)) {
523 // All arguments except for the first two should get taint.
524 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
525 State = State->add<TaintArgsOnPostVisit>(i);
526 return State;
527 }
528
Anna Zaks9ffbe242011-12-17 00:26:34 +0000529 return 0;
530}
531
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000532
533// If argument 0(protocol domain) is network, the return value should get taint.
Ted Kremenek8bef8232012-01-26 21:29:00 +0000534ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
Anna Zaks259052d2012-04-10 23:41:11 +0000535 CheckerContext &C) const {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000536 ProgramStateRef State = C.getState();
Anna Zaks259052d2012-04-10 23:41:11 +0000537 if (CE->getNumArgs() < 3)
538 return State;
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000539
540 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
541 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
542 // White list the internal communication protocols.
543 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
544 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
545 return State;
546 State = State->addTaint(CE, C.getLocationContext());
547 return State;
548}
549
Ted Kremenek8bef8232012-01-26 21:29:00 +0000550ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
Anna Zaks9ffbe242011-12-17 00:26:34 +0000551 CheckerContext &C) const {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000552 ProgramStateRef State = C.getState();
Anna Zaks259052d2012-04-10 23:41:11 +0000553 if (CE->getNumArgs() < 2)
554 return State;
555
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000556 // All arguments except for the very first one should get taint.
557 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
558 // The arguments are pointer arguments. The data they are pointing at is
559 // tainted after the call.
560 const Expr* Arg = CE->getArg(i);
Anna Zaks7cdfe292012-01-18 02:45:13 +0000561 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaks1009ac72011-12-14 00:56:02 +0000562 if (Sym)
563 State = State->addTaint(Sym);
564 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000565 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000566}
567
Ted Kremenek8bef8232012-01-26 21:29:00 +0000568ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
Anna Zaks259052d2012-04-10 23:41:11 +0000569 CheckerContext &C) const {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000570 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000571}
572
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000573bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
Ted Kremenek8bef8232012-01-26 21:29:00 +0000574 ProgramStateRef State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000575 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaksefd69892011-12-14 00:56:18 +0000576
Anna Zaksd3d85482011-12-16 18:28:50 +0000577 // stdin is a pointer, so it would be a region.
578 const MemRegion *MemReg = Val.getAsRegion();
579
580 // The region should be symbolic, we do not know it's value.
581 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
582 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000583 return false;
584
Anna Zaksd3d85482011-12-16 18:28:50 +0000585 // Get it's symbol and find the declaration region it's pointing to.
586 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
587 if (!Sm)
588 return false;
589 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
590 if (!DeclReg)
591 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000592
Anna Zaksd3d85482011-12-16 18:28:50 +0000593 // This region corresponds to a declaration, find out if it's a global/extern
594 // variable named stdin with the proper type.
595 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
596 D = D->getCanonicalDecl();
597 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
598 if (const PointerType * PtrTy =
599 dyn_cast<PointerType>(D->getType().getTypePtr()))
600 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
601 return true;
602 }
Anna Zaksefd69892011-12-14 00:56:18 +0000603 return false;
604}
605
Anna Zaks9f03b622012-01-07 02:33:10 +0000606static bool getPrintfFormatArgumentNum(const CallExpr *CE,
607 const CheckerContext &C,
608 unsigned int &ArgNum) {
609 // Find if the function contains a format string argument.
610 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
611 // vsnprintf, syslog, custom annotated functions.
612 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
613 if (!FDecl)
614 return false;
615 for (specific_attr_iterator<FormatAttr>
616 i = FDecl->specific_attr_begin<FormatAttr>(),
617 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
618
619 const FormatAttr *Format = *i;
620 ArgNum = Format->getFormatIdx() - 1;
621 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
622 return true;
623 }
624
625 // Or if a function is named setproctitle (this is a heuristic).
626 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
627 ArgNum = 0;
628 return true;
629 }
630
631 return false;
632}
633
Anna Zaks8568ee72012-01-14 02:48:40 +0000634bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
635 const char Msg[],
636 CheckerContext &C) const {
637 assert(E);
638
639 // Check for taint.
Ted Kremenek8bef8232012-01-26 21:29:00 +0000640 ProgramStateRef State = C.getState();
Anna Zaks8568ee72012-01-14 02:48:40 +0000641 if (!State->isTainted(getPointedToSymbol(C, E)) &&
642 !State->isTainted(E, C.getLocationContext()))
643 return false;
644
645 // Generate diagnostic.
646 if (ExplodedNode *N = C.addTransition()) {
647 initBugType();
648 BugReport *report = new BugReport(*BT, Msg, N);
649 report->addRange(E->getSourceRange());
Jordan Rose785950e2012-11-02 01:53:40 +0000650 C.emitReport(report);
Anna Zaks8568ee72012-01-14 02:48:40 +0000651 return true;
652 }
653 return false;
654}
655
Anna Zaks9f03b622012-01-07 02:33:10 +0000656bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
657 CheckerContext &C) const{
658 // Check if the function contains a format string argument.
659 unsigned int ArgNum = 0;
660 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
661 return false;
662
663 // If either the format string content or the pointer itself are tainted, warn.
Anna Zaks8568ee72012-01-14 02:48:40 +0000664 if (generateReportIfTainted(CE->getArg(ArgNum),
665 MsgUncontrolledFormatString, C))
666 return true;
667 return false;
668}
669
670bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
671 StringRef Name,
672 CheckerContext &C) const {
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000673 // TODO: It might make sense to run this check on demand. In some cases,
674 // we should check if the environment has been cleansed here. We also might
675 // need to know if the user was reset before these calls(seteuid).
Anna Zaks8568ee72012-01-14 02:48:40 +0000676 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
677 .Case("system", 0)
678 .Case("popen", 0)
Anna Zaks2bf8fd82012-01-20 00:11:19 +0000679 .Case("execl", 0)
680 .Case("execle", 0)
681 .Case("execlp", 0)
682 .Case("execv", 0)
683 .Case("execvp", 0)
684 .Case("execvP", 0)
Anna Zaksb9ac30c2012-01-24 19:32:25 +0000685 .Case("execve", 0)
686 .Case("dlopen", 0)
Anna Zaks8568ee72012-01-14 02:48:40 +0000687 .Default(UINT_MAX);
688
Anna Zaks259052d2012-04-10 23:41:11 +0000689 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
Anna Zaks8568ee72012-01-14 02:48:40 +0000690 return false;
691
692 if (generateReportIfTainted(CE->getArg(ArgNum),
693 MsgSanitizeSystemArgs, C))
694 return true;
695
Anna Zaks9f03b622012-01-07 02:33:10 +0000696 return false;
697}
698
Anna Zaks4e462212012-01-18 02:45:11 +0000699// TODO: Should this check be a part of the CString checker?
700// If yes, should taint be a global setting?
701bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
702 const FunctionDecl *FDecl,
703 CheckerContext &C) const {
704 // If the function has a buffer size argument, set ArgNum.
705 unsigned ArgNum = InvalidArgIndex;
706 unsigned BId = 0;
707 if ( (BId = FDecl->getMemoryFunctionKind()) )
708 switch(BId) {
709 case Builtin::BImemcpy:
710 case Builtin::BImemmove:
711 case Builtin::BIstrncpy:
712 ArgNum = 2;
713 break;
714 case Builtin::BIstrndup:
715 ArgNum = 1;
716 break;
717 default:
718 break;
719 };
720
721 if (ArgNum == InvalidArgIndex) {
722 if (C.isCLibraryFunction(FDecl, "malloc") ||
723 C.isCLibraryFunction(FDecl, "calloc") ||
724 C.isCLibraryFunction(FDecl, "alloca"))
725 ArgNum = 0;
726 else if (C.isCLibraryFunction(FDecl, "memccpy"))
727 ArgNum = 3;
728 else if (C.isCLibraryFunction(FDecl, "realloc"))
729 ArgNum = 1;
730 else if (C.isCLibraryFunction(FDecl, "bcopy"))
731 ArgNum = 2;
732 }
733
Anna Zaks259052d2012-04-10 23:41:11 +0000734 if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
Anna Zaks4e462212012-01-18 02:45:11 +0000735 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
736 return true;
737
738 return false;
739}
740
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000741void ento::registerGenericTaintChecker(CheckerManager &mgr) {
742 mgr.registerChecker<GenericTaintChecker>();
743}