blob: 1d112c5328070216a2548d1097aead86896f8fa8 [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Anna Zaks1fb826a2012-01-12 02:22:34 +000023#include <climits>
Anna Zaksdf18c5a2011-11-16 19:58:13 +000024
25using namespace clang;
26using namespace ento;
27
28namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000029class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000030 check::PreStmt<CallExpr> > {
31public:
Anna Zaks1fb826a2012-01-12 02:22:34 +000032 static const unsigned ReturnValueIndex = UINT_MAX;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000033
Anna Zaks9ffbe242011-12-17 00:26:34 +000034private:
Anna Zaks8f4caf52011-11-18 02:26:36 +000035 mutable llvm::OwningPtr<BugType> BT;
36 void initBugType() const;
37
Anna Zaks1fb826a2012-01-12 02:22:34 +000038 /// \brief Catch taint related bugs. Check if tainted data is passed to a
39 /// system call etc.
Anna Zaks9f03b622012-01-07 02:33:10 +000040 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
41
Anna Zaks1fb826a2012-01-12 02:22:34 +000042 /// \brief Add taint sources on a pre-visit.
43 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
44
45 /// \brief Propagate taint generated at pre-visit.
46 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
47
48 /// \brief Add taint sources on a post visit.
49 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
50
51 /// \brief Given a pointer argument, get the symbol of the value it contains
Anna Zaks8f4caf52011-11-18 02:26:36 +000052 /// (points to).
53 SymbolRef getPointedToSymbol(CheckerContext &C,
Anna Zaks9ffbe242011-12-17 00:26:34 +000054 const Expr *Arg,
Anna Zaks1fb826a2012-01-12 02:22:34 +000055 bool IssueWarning = false) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000056
Anna Zaks9ffbe242011-12-17 00:26:34 +000057 /// Functions defining the attack surface.
58 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
59 CheckerContext &C) const;
60 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000061 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000062
63 /// Taint the scanned input if the file is tainted.
64 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
65 /// Taint if any of the arguments are tainted.
66 const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks1fb826a2012-01-12 02:22:34 +000067 const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000068
Anna Zaksd3d85482011-12-16 18:28:50 +000069 /// Check if the region the expression evaluates to is the standard input,
70 /// and thus, is tainted.
Anna Zaksefd69892011-12-14 00:56:18 +000071 bool isStdin(const Expr *E, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000072
Anna Zaks9f03b622012-01-07 02:33:10 +000073 /// Check for CWE-134: Uncontrolled Format String.
74 bool checkUncontrolledFormatString(const CallExpr *CE,
75 CheckerContext &C) const;
76
Anna Zaksdf18c5a2011-11-16 19:58:13 +000077public:
Anna Zaks9ffbe242011-12-17 00:26:34 +000078 static void *getTag() { static int Tag; return &Tag; }
79
Anna Zaksdf18c5a2011-11-16 19:58:13 +000080 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000081 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000082
83 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
84
Anna Zaksdf18c5a2011-11-16 19:58:13 +000085};
86}
87
Anna Zaks1fb826a2012-01-12 02:22:34 +000088/// A set which is used to pass information from call pre-visit instruction
89/// to the call post-visit. The values are unsigned integers, which are either
90/// ReturnValueIndex, or indexes of the pointer/reference argument, which
91/// points to data, which should be tainted on return.
92namespace { struct TaintArgsOnPostVisit{}; }
93namespace clang { namespace ento {
94template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
95 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
96 static void *GDMIndex() { return GenericTaintChecker::getTag(); }
97};
98}}
Anna Zaks9ffbe242011-12-17 00:26:34 +000099
Anna Zaks8f4caf52011-11-18 02:26:36 +0000100inline void GenericTaintChecker::initBugType() const {
101 if (!BT)
Anna Zaks9f03b622012-01-07 02:33:10 +0000102 BT.reset(new BugType("Taint Analysis", "General"));
Anna Zaks8f4caf52011-11-18 02:26:36 +0000103}
104
Anna Zaks9ffbe242011-12-17 00:26:34 +0000105void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
106 CheckerContext &C) const {
Anna Zaks9f03b622012-01-07 02:33:10 +0000107 // Check for errors first.
108 if (checkPre(CE, C))
109 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000110
Anna Zaks9f03b622012-01-07 02:33:10 +0000111 // Add taint second.
Anna Zaks1fb826a2012-01-12 02:22:34 +0000112 addSourcesPre(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000113}
114
115void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
116 CheckerContext &C) const {
Anna Zaks1fb826a2012-01-12 02:22:34 +0000117 if (propagateFromPre(CE, C))
118 return;
119 addSourcesPost(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000120}
121
Anna Zaks1fb826a2012-01-12 02:22:34 +0000122void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
123 CheckerContext &C) const {
Anna Zaks9ffbe242011-12-17 00:26:34 +0000124 // Set the evaluation function by switching on the callee name.
125 StringRef Name = C.getCalleeName(CE);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000126 if (Name.empty())
127 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000128 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000129 .Case("atoi", &GenericTaintChecker::preAnyArgs)
130 .Case("atol", &GenericTaintChecker::preAnyArgs)
131 .Case("atoll", &GenericTaintChecker::preAnyArgs)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000132 .Case("fscanf", &GenericTaintChecker::preFscanf)
133 .Cases("strcpy", "__builtin___strcpy_chk",
134 "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy)
135 .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy)
136 .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000137 .Default(0);
138
139 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000140 const ProgramState *State = 0;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000141 if (evalFunction)
142 State = (this->*evalFunction)(CE, C);
143 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000144 return;
145
Anna Zaks9ffbe242011-12-17 00:26:34 +0000146 C.addTransition(State);
147}
148
Anna Zaks1fb826a2012-01-12 02:22:34 +0000149bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
150 CheckerContext &C) const {
151 const ProgramState *State = C.getState();
152
153 // Depending on what was tainted at pre-visit, we determined a set of
154 // arguments which should be tainted after the function returns. These are
155 // stored in the state as TaintArgsOnPostVisit set.
156 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
157 for (llvm::ImmutableSet<unsigned>::iterator
158 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
159 unsigned ArgNum = *I;
160
161 // Special handling for the tainted return value.
162 if (ArgNum == ReturnValueIndex) {
163 State = State->addTaint(CE, C.getLocationContext());
164 continue;
165 }
166
167 // The arguments are pointer arguments. The data they are pointing at is
168 // tainted after the call.
169 const Expr* Arg = CE->getArg(ArgNum);
170 SymbolRef Sym = getPointedToSymbol(C, Arg, true);
171 if (Sym)
172 State = State->addTaint(Sym);
173 }
174
175 // Clear up the taint info from the state.
176 State = State->remove<TaintArgsOnPostVisit>();
177
178 if (State != C.getState()) {
179 C.addTransition(State);
180 return true;
181 }
182 return false;
183}
184
185void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
186 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000187 // Define the attack surface.
188 // Set the evaluation function by switching on the callee name.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000189 StringRef Name = C.getCalleeName(CE);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000190 if (Name.empty())
191 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000192 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000193 .Case("scanf", &GenericTaintChecker::postScanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000194 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000195 .Case("getchar", &GenericTaintChecker::postRetTaint)
196 .Case("getenv", &GenericTaintChecker::postRetTaint)
197 .Case("fopen", &GenericTaintChecker::postRetTaint)
198 .Case("fdopen", &GenericTaintChecker::postRetTaint)
199 .Case("freopen", &GenericTaintChecker::postRetTaint)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000200 .Default(0);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000201
202 // If the callee isn't defined, it is not of security concern.
203 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000204 const ProgramState *State = 0;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000205 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000206 State = (this->*evalFunction)(CE, C);
207 if (!State)
208 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000209
Anna Zaks9ffbe242011-12-17 00:26:34 +0000210 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000211}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000212
Anna Zaks9f03b622012-01-07 02:33:10 +0000213bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
214
215 if (checkUncontrolledFormatString(CE, C))
216 return true;
217
Anna Zaks9f03b622012-01-07 02:33:10 +0000218 return false;
219}
220
Anna Zaks8f4caf52011-11-18 02:26:36 +0000221SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
222 const Expr* Arg,
223 bool IssueWarning) const {
224 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000225 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zaksd3d85482011-12-16 18:28:50 +0000226 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000227 return 0;
228
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000229 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks8f4caf52011-11-18 02:26:36 +0000230
231 if (!AddrLoc && !IssueWarning)
232 return 0;
233
234 // If the Expr is not a location, issue a warning.
235 if (!AddrLoc) {
236 assert(IssueWarning);
237 if (ExplodedNode *N = C.generateSink(State)) {
238 initBugType();
239 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
240 report->addRange(Arg->getSourceRange());
241 C.EmitReport(report);
242 }
243 return 0;
244 }
245
Anna Zaks71d29092012-01-13 00:56:51 +0000246 const PointerType *ArgTy =
247 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
248 assert(ArgTy);
249 SVal Val = State->getSVal(*AddrLoc, ArgTy->getPointeeType());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000250 return Val.getAsSymbol();
251}
252
Anna Zaks1fb826a2012-01-12 02:22:34 +0000253// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
254// and arg 1 should get taint.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000255const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
256 CheckerContext &C) const {
257 assert(CE->getNumArgs() >= 2);
258 const ProgramState *State = C.getState();
259
260 // Check is the file descriptor is tainted.
Ted Kremenek5eca4822012-01-06 22:09:28 +0000261 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
Anna Zaks1fb826a2012-01-12 02:22:34 +0000262 isStdin(CE->getArg(0), C)) {
263 // All arguments except for the first two should get taint.
264 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
265 State = State->add<TaintArgsOnPostVisit>(i);
266 return State;
267 }
268
Anna Zaks9ffbe242011-12-17 00:26:34 +0000269 return 0;
270}
271
272// If any other arguments are tainted, mark state as tainted on pre-visit.
273const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
274 CheckerContext &C) const {
275 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
276 const ProgramState *State = C.getState();
277 const Expr *Arg = CE->getArg(i);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000278 if (State->isTainted(Arg, C.getLocationContext()) ||
279 State->isTainted(getPointedToSymbol(C, Arg)))
Anna Zaks1fb826a2012-01-12 02:22:34 +0000280 return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000281 }
282 return 0;
283}
284
Anna Zaks1fb826a2012-01-12 02:22:34 +0000285const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE,
286 CheckerContext &C) const {
287 assert(CE->getNumArgs() >= 2);
288 const Expr *FromArg = CE->getArg(1);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000289 const ProgramState *State = C.getState();
Anna Zaks1fb826a2012-01-12 02:22:34 +0000290 if (State->isTainted(FromArg, C.getLocationContext()) ||
291 State->isTainted(getPointedToSymbol(C, FromArg)))
292 return State = State->add<TaintArgsOnPostVisit>(0);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000293 return 0;
294}
295
296const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
297 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000298 const ProgramState *State = C.getState();
Anna Zaks1009ac72011-12-14 00:56:02 +0000299 assert(CE->getNumArgs() >= 2);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000300 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000301 // All arguments except for the very first one should get taint.
302 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
303 // The arguments are pointer arguments. The data they are pointing at is
304 // tainted after the call.
305 const Expr* Arg = CE->getArg(i);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000306 SymbolRef Sym = getPointedToSymbol(C, Arg, true);
Anna Zaks1009ac72011-12-14 00:56:02 +0000307 if (Sym)
308 State = State->addTaint(Sym);
309 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000310 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000311}
312
Anna Zaks9ffbe242011-12-17 00:26:34 +0000313const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
314 CheckerContext &C) const {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000315 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000316}
317
Anna Zaksefd69892011-12-14 00:56:18 +0000318bool GenericTaintChecker::isStdin(const Expr *E,
319 CheckerContext &C) const {
Anna Zaksd3d85482011-12-16 18:28:50 +0000320 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000321 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaksefd69892011-12-14 00:56:18 +0000322
Anna Zaksd3d85482011-12-16 18:28:50 +0000323 // stdin is a pointer, so it would be a region.
324 const MemRegion *MemReg = Val.getAsRegion();
325
326 // The region should be symbolic, we do not know it's value.
327 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
328 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000329 return false;
330
Anna Zaksd3d85482011-12-16 18:28:50 +0000331 // Get it's symbol and find the declaration region it's pointing to.
332 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
333 if (!Sm)
334 return false;
335 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
336 if (!DeclReg)
337 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000338
Anna Zaksd3d85482011-12-16 18:28:50 +0000339 // This region corresponds to a declaration, find out if it's a global/extern
340 // variable named stdin with the proper type.
341 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
342 D = D->getCanonicalDecl();
343 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
344 if (const PointerType * PtrTy =
345 dyn_cast<PointerType>(D->getType().getTypePtr()))
346 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
347 return true;
348 }
Anna Zaksefd69892011-12-14 00:56:18 +0000349 return false;
350}
351
Anna Zaks9f03b622012-01-07 02:33:10 +0000352static bool getPrintfFormatArgumentNum(const CallExpr *CE,
353 const CheckerContext &C,
354 unsigned int &ArgNum) {
355 // Find if the function contains a format string argument.
356 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
357 // vsnprintf, syslog, custom annotated functions.
358 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
359 if (!FDecl)
360 return false;
361 for (specific_attr_iterator<FormatAttr>
362 i = FDecl->specific_attr_begin<FormatAttr>(),
363 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
364
365 const FormatAttr *Format = *i;
366 ArgNum = Format->getFormatIdx() - 1;
367 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
368 return true;
369 }
370
371 // Or if a function is named setproctitle (this is a heuristic).
372 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
373 ArgNum = 0;
374 return true;
375 }
376
377 return false;
378}
379
380bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
381 CheckerContext &C) const{
382 // Check if the function contains a format string argument.
383 unsigned int ArgNum = 0;
384 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
385 return false;
386
387 // If either the format string content or the pointer itself are tainted, warn.
388 const ProgramState *State = C.getState();
389 const Expr *Arg = CE->getArg(ArgNum);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000390 if (State->isTainted(getPointedToSymbol(C, Arg)) ||
Anna Zaks9f03b622012-01-07 02:33:10 +0000391 State->isTainted(Arg, C.getLocationContext()))
392 if (ExplodedNode *N = C.addTransition()) {
393 initBugType();
394 BugReport *report = new BugReport(*BT,
395 "Tainted format string (CWE-134: Uncontrolled Format String)", N);
396 report->addRange(Arg->getSourceRange());
397 C.EmitReport(report);
398 return true;
399 }
400 return false;
401}
402
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000403void ento::registerGenericTaintChecker(CheckerManager &mgr) {
404 mgr.registerChecker<GenericTaintChecker>();
405}