blob: da57a190b4e57dbdae7306484727394c476bc0f9 [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23
24using namespace clang;
25using namespace ento;
26
27namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000028class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000029 check::PreStmt<CallExpr> > {
30public:
31 enum TaintOnPreVisitKind {
32 /// No taint propagates from pre-visit to post-visit.
33 PrevisitNone = 0,
34 /// Based on the pre-visit, the return argument of the call
35 /// should be tainted.
36 PrevisitTaintRet = 1,
37 /// Based on the pre-visit, the call can taint values through it's
38 /// pointer/reference arguments.
39 PrevisitTaintArgs = 2
40 };
Anna Zaksdf18c5a2011-11-16 19:58:13 +000041
Anna Zaks9ffbe242011-12-17 00:26:34 +000042private:
Anna Zaks8f4caf52011-11-18 02:26:36 +000043 mutable llvm::OwningPtr<BugType> BT;
44 void initBugType() const;
45
Anna Zaks9f03b622012-01-07 02:33:10 +000046 /// Add/propagate taint on a post visit.
47 void taintPost(const CallExpr *CE, CheckerContext &C) const;
48 /// Add/propagate taint on a pre visit.
49 void taintPre(const CallExpr *CE, CheckerContext &C) const;
50
51 /// Catch taint related bugs. Check if tainted data is passed to a system
52 /// call etc.
53 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
Anna Zaks8f4caf52011-11-18 02:26:36 +000055 /// Given a pointer argument, get the symbol of the value it contains
56 /// (points to).
57 SymbolRef getPointedToSymbol(CheckerContext &C,
Anna Zaks9ffbe242011-12-17 00:26:34 +000058 const Expr *Arg,
Anna Zaks8f4caf52011-11-18 02:26:36 +000059 bool IssueWarning = true) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000060
Anna Zaks9ffbe242011-12-17 00:26:34 +000061 /// Functions defining the attack surface.
62 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
63 CheckerContext &C) const;
64 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
65 const ProgramState *postFscanf(const CallExpr *CE, CheckerContext &C) const;
66 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
67 const ProgramState *postDefault(const CallExpr *CE, CheckerContext &C) const;
68
69 /// Taint the scanned input if the file is tainted.
70 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
71 /// Taint if any of the arguments are tainted.
72 const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000073
Anna Zaksd3d85482011-12-16 18:28:50 +000074 /// Check if the region the expression evaluates to is the standard input,
75 /// and thus, is tainted.
Anna Zaksefd69892011-12-14 00:56:18 +000076 bool isStdin(const Expr *E, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000077
Anna Zaks9f03b622012-01-07 02:33:10 +000078 /// Check for CWE-134: Uncontrolled Format String.
79 bool checkUncontrolledFormatString(const CallExpr *CE,
80 CheckerContext &C) const;
81
Anna Zaksdf18c5a2011-11-16 19:58:13 +000082public:
Anna Zaks9ffbe242011-12-17 00:26:34 +000083 static void *getTag() { static int Tag; return &Tag; }
84
Anna Zaksdf18c5a2011-11-16 19:58:13 +000085 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000086 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000087
88 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
89
Anna Zaksdf18c5a2011-11-16 19:58:13 +000090};
91}
92
Anna Zaks9ffbe242011-12-17 00:26:34 +000093/// Definitions for the checker specific state.
94namespace { struct TaintOnPreVisit {};}
95namespace clang {
96namespace ento {
97 /// A flag which is used to pass information from call pre-visit instruction
98 /// to the call post-visit. The value is an unsigned, which takes on values
99 /// of the TaintOnPreVisitKind enumeration.
100 template<>
101 struct ProgramStateTrait<TaintOnPreVisit> :
102 public ProgramStatePartialTrait<unsigned> {
103 static void *GDMIndex() { return GenericTaintChecker::getTag(); }
104 };
105}
106}
107
Anna Zaks8f4caf52011-11-18 02:26:36 +0000108inline void GenericTaintChecker::initBugType() const {
109 if (!BT)
Anna Zaks9f03b622012-01-07 02:33:10 +0000110 BT.reset(new BugType("Taint Analysis", "General"));
Anna Zaks8f4caf52011-11-18 02:26:36 +0000111}
112
Anna Zaks9ffbe242011-12-17 00:26:34 +0000113void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
114 CheckerContext &C) const {
Anna Zaks9f03b622012-01-07 02:33:10 +0000115 // Check for errors first.
116 if (checkPre(CE, C))
117 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000118
Anna Zaks9f03b622012-01-07 02:33:10 +0000119 // Add taint second.
120 taintPre(CE, C);
121}
122
123void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
124 CheckerContext &C) const {
125 taintPost(CE, C);
126}
127
128void GenericTaintChecker::taintPre(const CallExpr *CE,
129 CheckerContext &C) const {
Anna Zaks9ffbe242011-12-17 00:26:34 +0000130 // Set the evaluation function by switching on the callee name.
131 StringRef Name = C.getCalleeName(CE);
132 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
133 .Case("fscanf", &GenericTaintChecker::preFscanf)
134 .Case("atoi", &GenericTaintChecker::preAnyArgs)
135 .Case("atol", &GenericTaintChecker::preAnyArgs)
136 .Case("atoll", &GenericTaintChecker::preAnyArgs)
137 .Default(0);
138
139 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000140 const ProgramState *State = 0;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000141 if (evalFunction)
142 State = (this->*evalFunction)(CE, C);
143 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000144 return;
145
Anna Zaks9ffbe242011-12-17 00:26:34 +0000146 C.addTransition(State);
147}
148
Anna Zaks9f03b622012-01-07 02:33:10 +0000149void GenericTaintChecker::taintPost(const CallExpr *CE,
150 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000151 // Define the attack surface.
152 // Set the evaluation function by switching on the callee name.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000153 StringRef Name = C.getCalleeName(CE);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000154 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000155 .Case("scanf", &GenericTaintChecker::postScanf)
156 .Case("fscanf", &GenericTaintChecker::postFscanf)
157 .Case("sscanf", &GenericTaintChecker::postFscanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000158 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000159 .Case("getchar", &GenericTaintChecker::postRetTaint)
160 .Case("getenv", &GenericTaintChecker::postRetTaint)
161 .Case("fopen", &GenericTaintChecker::postRetTaint)
162 .Case("fdopen", &GenericTaintChecker::postRetTaint)
163 .Case("freopen", &GenericTaintChecker::postRetTaint)
164 .Default(&GenericTaintChecker::postDefault);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000165
166 // If the callee isn't defined, it is not of security concern.
167 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000168 const ProgramState *State = 0;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000169 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000170 State = (this->*evalFunction)(CE, C);
171 if (!State)
172 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000173
Anna Zaks9ffbe242011-12-17 00:26:34 +0000174 assert(State->get<TaintOnPreVisit>() == PrevisitNone &&
175 "State has to be cleared.");
176 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000177}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000178
Anna Zaks9f03b622012-01-07 02:33:10 +0000179bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
180
181 if (checkUncontrolledFormatString(CE, C))
182 return true;
183
184 StringRef Name = C.getCalleeName(CE);
185 return false;
186}
187
Anna Zaks8f4caf52011-11-18 02:26:36 +0000188SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
189 const Expr* Arg,
190 bool IssueWarning) const {
191 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000192 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zaksd3d85482011-12-16 18:28:50 +0000193 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000194 return 0;
195
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000196 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks8f4caf52011-11-18 02:26:36 +0000197
198 if (!AddrLoc && !IssueWarning)
199 return 0;
200
201 // If the Expr is not a location, issue a warning.
202 if (!AddrLoc) {
203 assert(IssueWarning);
204 if (ExplodedNode *N = C.generateSink(State)) {
205 initBugType();
206 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
207 report->addRange(Arg->getSourceRange());
208 C.EmitReport(report);
209 }
210 return 0;
211 }
212
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000213 SVal Val = State->getSVal(*AddrLoc);
214 return Val.getAsSymbol();
215}
216
Anna Zaks9ffbe242011-12-17 00:26:34 +0000217const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
218 CheckerContext &C) const {
219 assert(CE->getNumArgs() >= 2);
220 const ProgramState *State = C.getState();
221
222 // Check is the file descriptor is tainted.
Ted Kremenek5eca4822012-01-06 22:09:28 +0000223 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
224 isStdin(CE->getArg(0), C))
Anna Zaks9ffbe242011-12-17 00:26:34 +0000225 return State->set<TaintOnPreVisit>(PrevisitTaintArgs);
226 return 0;
227}
228
229// If any other arguments are tainted, mark state as tainted on pre-visit.
230const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
231 CheckerContext &C) const {
232 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
233 const ProgramState *State = C.getState();
234 const Expr *Arg = CE->getArg(i);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000235 if (State->isTainted(Arg, C.getLocationContext()) ||
236 State->isTainted(getPointedToSymbol(C, Arg)))
Anna Zaks9ffbe242011-12-17 00:26:34 +0000237 return State = State->set<TaintOnPreVisit>(PrevisitTaintRet);
238 }
239 return 0;
240}
241
242const ProgramState *GenericTaintChecker::postDefault(const CallExpr *CE,
243 CheckerContext &C) const {
244 const ProgramState *State = C.getState();
245
246 // Check if we know that the result needs to be tainted based on the
247 // pre-visit analysis.
248 if (State->get<TaintOnPreVisit>() == PrevisitTaintRet) {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000249 State = State->addTaint(CE, C.getLocationContext());
Anna Zaks9ffbe242011-12-17 00:26:34 +0000250 return State->set<TaintOnPreVisit>(PrevisitNone);
251 }
252
253 return 0;
254}
255
256const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
257 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000258 const ProgramState *State = C.getState();
Anna Zaks1009ac72011-12-14 00:56:02 +0000259 assert(CE->getNumArgs() >= 2);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000260 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000261 // All arguments except for the very first one should get taint.
262 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
263 // The arguments are pointer arguments. The data they are pointing at is
264 // tainted after the call.
265 const Expr* Arg = CE->getArg(i);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000266 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000267 if (Sym)
268 State = State->addTaint(Sym);
269 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000270 return State;
Anna Zaks1009ac72011-12-14 00:56:02 +0000271}
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000272
Anna Zaks1009ac72011-12-14 00:56:02 +0000273/// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
274/// and arg 1 should get taint.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000275const ProgramState *GenericTaintChecker::postFscanf(const CallExpr *CE,
276 CheckerContext &C) const {
Anna Zaks1009ac72011-12-14 00:56:02 +0000277 const ProgramState *State = C.getState();
278 assert(CE->getNumArgs() >= 2);
279
Anna Zaks9ffbe242011-12-17 00:26:34 +0000280 // Fscanf is only tainted if the input file is tainted at pre visit, so
281 // check for that first.
282 if (State->get<TaintOnPreVisit>() == PrevisitNone)
283 return 0;
284
285 // Reset the taint state.
286 State = State->set<TaintOnPreVisit>(PrevisitNone);
Anna Zaks1009ac72011-12-14 00:56:02 +0000287
288 // All arguments except for the first two should get taint.
289 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) {
290 // The arguments are pointer arguments. The data they are pointing at is
291 // tainted after the call.
292 const Expr* Arg = CE->getArg(i);
293 SymbolRef Sym = getPointedToSymbol(C, Arg);
294 if (Sym)
295 State = State->addTaint(Sym);
296 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000297 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000298}
299
Anna Zaks9ffbe242011-12-17 00:26:34 +0000300const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
301 CheckerContext &C) const {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000302 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000303}
304
Anna Zaksefd69892011-12-14 00:56:18 +0000305bool GenericTaintChecker::isStdin(const Expr *E,
306 CheckerContext &C) const {
Anna Zaksd3d85482011-12-16 18:28:50 +0000307 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000308 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaksefd69892011-12-14 00:56:18 +0000309
Anna Zaksd3d85482011-12-16 18:28:50 +0000310 // stdin is a pointer, so it would be a region.
311 const MemRegion *MemReg = Val.getAsRegion();
312
313 // The region should be symbolic, we do not know it's value.
314 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
315 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000316 return false;
317
Anna Zaksd3d85482011-12-16 18:28:50 +0000318 // Get it's symbol and find the declaration region it's pointing to.
319 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
320 if (!Sm)
321 return false;
322 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
323 if (!DeclReg)
324 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000325
Anna Zaksd3d85482011-12-16 18:28:50 +0000326 // This region corresponds to a declaration, find out if it's a global/extern
327 // variable named stdin with the proper type.
328 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
329 D = D->getCanonicalDecl();
330 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
331 if (const PointerType * PtrTy =
332 dyn_cast<PointerType>(D->getType().getTypePtr()))
333 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
334 return true;
335 }
Anna Zaksefd69892011-12-14 00:56:18 +0000336 return false;
337}
338
Anna Zaks9f03b622012-01-07 02:33:10 +0000339static bool getPrintfFormatArgumentNum(const CallExpr *CE,
340 const CheckerContext &C,
341 unsigned int &ArgNum) {
342 // Find if the function contains a format string argument.
343 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
344 // vsnprintf, syslog, custom annotated functions.
345 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
346 if (!FDecl)
347 return false;
348 for (specific_attr_iterator<FormatAttr>
349 i = FDecl->specific_attr_begin<FormatAttr>(),
350 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
351
352 const FormatAttr *Format = *i;
353 ArgNum = Format->getFormatIdx() - 1;
354 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
355 return true;
356 }
357
358 // Or if a function is named setproctitle (this is a heuristic).
359 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
360 ArgNum = 0;
361 return true;
362 }
363
364 return false;
365}
366
367bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
368 CheckerContext &C) const{
369 // Check if the function contains a format string argument.
370 unsigned int ArgNum = 0;
371 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
372 return false;
373
374 // If either the format string content or the pointer itself are tainted, warn.
375 const ProgramState *State = C.getState();
376 const Expr *Arg = CE->getArg(ArgNum);
377 if (State->isTainted(getPointedToSymbol(C, Arg, false)) ||
378 State->isTainted(Arg, C.getLocationContext()))
379 if (ExplodedNode *N = C.addTransition()) {
380 initBugType();
381 BugReport *report = new BugReport(*BT,
382 "Tainted format string (CWE-134: Uncontrolled Format String)", N);
383 report->addRange(Arg->getSourceRange());
384 C.EmitReport(report);
385 return true;
386 }
387 return false;
388}
389
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000390void ento::registerGenericTaintChecker(CheckerManager &mgr) {
391 mgr.registerChecker<GenericTaintChecker>();
392}