blob: 12f5be416a21d2e9419f7edb6365d7c741fc8701 [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23
24using namespace clang;
25using namespace ento;
26
27namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000028class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000029 check::PreStmt<CallExpr> > {
30public:
31 enum TaintOnPreVisitKind {
32 /// No taint propagates from pre-visit to post-visit.
33 PrevisitNone = 0,
34 /// Based on the pre-visit, the return argument of the call
35 /// should be tainted.
36 PrevisitTaintRet = 1,
37 /// Based on the pre-visit, the call can taint values through it's
38 /// pointer/reference arguments.
39 PrevisitTaintArgs = 2
40 };
Anna Zaksdf18c5a2011-11-16 19:58:13 +000041
Anna Zaks9ffbe242011-12-17 00:26:34 +000042private:
Anna Zaks8f4caf52011-11-18 02:26:36 +000043 mutable llvm::OwningPtr<BugType> BT;
44 void initBugType() const;
45
Anna Zaks9f03b622012-01-07 02:33:10 +000046 /// Add/propagate taint on a post visit.
47 void taintPost(const CallExpr *CE, CheckerContext &C) const;
48 /// Add/propagate taint on a pre visit.
49 void taintPre(const CallExpr *CE, CheckerContext &C) const;
50
51 /// Catch taint related bugs. Check if tainted data is passed to a system
52 /// call etc.
53 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
Anna Zaks8f4caf52011-11-18 02:26:36 +000055 /// Given a pointer argument, get the symbol of the value it contains
56 /// (points to).
57 SymbolRef getPointedToSymbol(CheckerContext &C,
Anna Zaks9ffbe242011-12-17 00:26:34 +000058 const Expr *Arg,
Anna Zaks8f4caf52011-11-18 02:26:36 +000059 bool IssueWarning = true) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000060
Anna Zaks9ffbe242011-12-17 00:26:34 +000061 /// Functions defining the attack surface.
62 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
63 CheckerContext &C) const;
64 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
65 const ProgramState *postFscanf(const CallExpr *CE, CheckerContext &C) const;
66 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
67 const ProgramState *postDefault(const CallExpr *CE, CheckerContext &C) const;
68
69 /// Taint the scanned input if the file is tainted.
70 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
71 /// Taint if any of the arguments are tainted.
72 const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000073
Anna Zaksd3d85482011-12-16 18:28:50 +000074 /// Check if the region the expression evaluates to is the standard input,
75 /// and thus, is tainted.
Anna Zaksefd69892011-12-14 00:56:18 +000076 bool isStdin(const Expr *E, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000077
Anna Zaks9f03b622012-01-07 02:33:10 +000078 /// Check for CWE-134: Uncontrolled Format String.
79 bool checkUncontrolledFormatString(const CallExpr *CE,
80 CheckerContext &C) const;
81
Anna Zaksdf18c5a2011-11-16 19:58:13 +000082public:
Anna Zaks9ffbe242011-12-17 00:26:34 +000083 static void *getTag() { static int Tag; return &Tag; }
84
Anna Zaksdf18c5a2011-11-16 19:58:13 +000085 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000086 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000087
88 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
89
Anna Zaksdf18c5a2011-11-16 19:58:13 +000090};
91}
92
Anna Zaks9ffbe242011-12-17 00:26:34 +000093/// Definitions for the checker specific state.
94namespace { struct TaintOnPreVisit {};}
95namespace clang {
96namespace ento {
97 /// A flag which is used to pass information from call pre-visit instruction
98 /// to the call post-visit. The value is an unsigned, which takes on values
99 /// of the TaintOnPreVisitKind enumeration.
100 template<>
101 struct ProgramStateTrait<TaintOnPreVisit> :
102 public ProgramStatePartialTrait<unsigned> {
103 static void *GDMIndex() { return GenericTaintChecker::getTag(); }
104 };
105}
106}
107
Anna Zaks8f4caf52011-11-18 02:26:36 +0000108inline void GenericTaintChecker::initBugType() const {
109 if (!BT)
Anna Zaks9f03b622012-01-07 02:33:10 +0000110 BT.reset(new BugType("Taint Analysis", "General"));
Anna Zaks8f4caf52011-11-18 02:26:36 +0000111}
112
Anna Zaks9ffbe242011-12-17 00:26:34 +0000113void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
114 CheckerContext &C) const {
Anna Zaks9f03b622012-01-07 02:33:10 +0000115 // Check for errors first.
116 if (checkPre(CE, C))
117 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000118
Anna Zaks9f03b622012-01-07 02:33:10 +0000119 // Add taint second.
120 taintPre(CE, C);
121}
122
123void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
124 CheckerContext &C) const {
125 taintPost(CE, C);
126}
127
128void GenericTaintChecker::taintPre(const CallExpr *CE,
129 CheckerContext &C) const {
Anna Zaks9ffbe242011-12-17 00:26:34 +0000130 // Set the evaluation function by switching on the callee name.
131 StringRef Name = C.getCalleeName(CE);
132 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
133 .Case("fscanf", &GenericTaintChecker::preFscanf)
134 .Case("atoi", &GenericTaintChecker::preAnyArgs)
135 .Case("atol", &GenericTaintChecker::preAnyArgs)
136 .Case("atoll", &GenericTaintChecker::preAnyArgs)
137 .Default(0);
138
139 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000140 const ProgramState *State = 0;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000141 if (evalFunction)
142 State = (this->*evalFunction)(CE, C);
143 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000144 return;
145
Anna Zaks9ffbe242011-12-17 00:26:34 +0000146 C.addTransition(State);
147}
148
Anna Zaks9f03b622012-01-07 02:33:10 +0000149void GenericTaintChecker::taintPost(const CallExpr *CE,
150 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000151 // Define the attack surface.
152 // Set the evaluation function by switching on the callee name.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000153 StringRef Name = C.getCalleeName(CE);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000154 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000155 .Case("scanf", &GenericTaintChecker::postScanf)
156 .Case("fscanf", &GenericTaintChecker::postFscanf)
157 .Case("sscanf", &GenericTaintChecker::postFscanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000158 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000159 .Case("getchar", &GenericTaintChecker::postRetTaint)
160 .Case("getenv", &GenericTaintChecker::postRetTaint)
161 .Case("fopen", &GenericTaintChecker::postRetTaint)
162 .Case("fdopen", &GenericTaintChecker::postRetTaint)
163 .Case("freopen", &GenericTaintChecker::postRetTaint)
164 .Default(&GenericTaintChecker::postDefault);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000165
166 // If the callee isn't defined, it is not of security concern.
167 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000168 const ProgramState *State = 0;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000169 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000170 State = (this->*evalFunction)(CE, C);
171 if (!State)
172 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000173
Anna Zaks9ffbe242011-12-17 00:26:34 +0000174 assert(State->get<TaintOnPreVisit>() == PrevisitNone &&
175 "State has to be cleared.");
176 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000177}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000178
Anna Zaks9f03b622012-01-07 02:33:10 +0000179bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
180
181 if (checkUncontrolledFormatString(CE, C))
182 return true;
183
Anna Zaks9f03b622012-01-07 02:33:10 +0000184 return false;
185}
186
Anna Zaks8f4caf52011-11-18 02:26:36 +0000187SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
188 const Expr* Arg,
189 bool IssueWarning) const {
190 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000191 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zaksd3d85482011-12-16 18:28:50 +0000192 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000193 return 0;
194
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000195 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks8f4caf52011-11-18 02:26:36 +0000196
197 if (!AddrLoc && !IssueWarning)
198 return 0;
199
200 // If the Expr is not a location, issue a warning.
201 if (!AddrLoc) {
202 assert(IssueWarning);
203 if (ExplodedNode *N = C.generateSink(State)) {
204 initBugType();
205 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
206 report->addRange(Arg->getSourceRange());
207 C.EmitReport(report);
208 }
209 return 0;
210 }
211
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000212 SVal Val = State->getSVal(*AddrLoc);
213 return Val.getAsSymbol();
214}
215
Anna Zaks9ffbe242011-12-17 00:26:34 +0000216const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
217 CheckerContext &C) const {
218 assert(CE->getNumArgs() >= 2);
219 const ProgramState *State = C.getState();
220
221 // Check is the file descriptor is tainted.
Ted Kremenek5eca4822012-01-06 22:09:28 +0000222 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
223 isStdin(CE->getArg(0), C))
Anna Zaks9ffbe242011-12-17 00:26:34 +0000224 return State->set<TaintOnPreVisit>(PrevisitTaintArgs);
225 return 0;
226}
227
228// If any other arguments are tainted, mark state as tainted on pre-visit.
229const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
230 CheckerContext &C) const {
231 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
232 const ProgramState *State = C.getState();
233 const Expr *Arg = CE->getArg(i);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000234 if (State->isTainted(Arg, C.getLocationContext()) ||
235 State->isTainted(getPointedToSymbol(C, Arg)))
Anna Zaks9ffbe242011-12-17 00:26:34 +0000236 return State = State->set<TaintOnPreVisit>(PrevisitTaintRet);
237 }
238 return 0;
239}
240
241const ProgramState *GenericTaintChecker::postDefault(const CallExpr *CE,
242 CheckerContext &C) const {
243 const ProgramState *State = C.getState();
244
245 // Check if we know that the result needs to be tainted based on the
246 // pre-visit analysis.
247 if (State->get<TaintOnPreVisit>() == PrevisitTaintRet) {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000248 State = State->addTaint(CE, C.getLocationContext());
Anna Zaks9ffbe242011-12-17 00:26:34 +0000249 return State->set<TaintOnPreVisit>(PrevisitNone);
250 }
251
252 return 0;
253}
254
255const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
256 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000257 const ProgramState *State = C.getState();
Anna Zaks1009ac72011-12-14 00:56:02 +0000258 assert(CE->getNumArgs() >= 2);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000259 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000260 // All arguments except for the very first one should get taint.
261 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
262 // The arguments are pointer arguments. The data they are pointing at is
263 // tainted after the call.
264 const Expr* Arg = CE->getArg(i);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000265 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000266 if (Sym)
267 State = State->addTaint(Sym);
268 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000269 return State;
Anna Zaks1009ac72011-12-14 00:56:02 +0000270}
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000271
Anna Zaks1009ac72011-12-14 00:56:02 +0000272/// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
273/// and arg 1 should get taint.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000274const ProgramState *GenericTaintChecker::postFscanf(const CallExpr *CE,
275 CheckerContext &C) const {
Anna Zaks1009ac72011-12-14 00:56:02 +0000276 const ProgramState *State = C.getState();
277 assert(CE->getNumArgs() >= 2);
278
Anna Zaks9ffbe242011-12-17 00:26:34 +0000279 // Fscanf is only tainted if the input file is tainted at pre visit, so
280 // check for that first.
281 if (State->get<TaintOnPreVisit>() == PrevisitNone)
282 return 0;
283
284 // Reset the taint state.
285 State = State->set<TaintOnPreVisit>(PrevisitNone);
Anna Zaks1009ac72011-12-14 00:56:02 +0000286
287 // All arguments except for the first two should get taint.
288 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) {
289 // The arguments are pointer arguments. The data they are pointing at is
290 // tainted after the call.
291 const Expr* Arg = CE->getArg(i);
292 SymbolRef Sym = getPointedToSymbol(C, Arg);
293 if (Sym)
294 State = State->addTaint(Sym);
295 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000296 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000297}
298
Anna Zaks9ffbe242011-12-17 00:26:34 +0000299const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
300 CheckerContext &C) const {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000301 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000302}
303
Anna Zaksefd69892011-12-14 00:56:18 +0000304bool GenericTaintChecker::isStdin(const Expr *E,
305 CheckerContext &C) const {
Anna Zaksd3d85482011-12-16 18:28:50 +0000306 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000307 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaksefd69892011-12-14 00:56:18 +0000308
Anna Zaksd3d85482011-12-16 18:28:50 +0000309 // stdin is a pointer, so it would be a region.
310 const MemRegion *MemReg = Val.getAsRegion();
311
312 // The region should be symbolic, we do not know it's value.
313 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
314 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000315 return false;
316
Anna Zaksd3d85482011-12-16 18:28:50 +0000317 // Get it's symbol and find the declaration region it's pointing to.
318 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
319 if (!Sm)
320 return false;
321 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
322 if (!DeclReg)
323 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000324
Anna Zaksd3d85482011-12-16 18:28:50 +0000325 // This region corresponds to a declaration, find out if it's a global/extern
326 // variable named stdin with the proper type.
327 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
328 D = D->getCanonicalDecl();
329 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
330 if (const PointerType * PtrTy =
331 dyn_cast<PointerType>(D->getType().getTypePtr()))
332 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
333 return true;
334 }
Anna Zaksefd69892011-12-14 00:56:18 +0000335 return false;
336}
337
Anna Zaks9f03b622012-01-07 02:33:10 +0000338static bool getPrintfFormatArgumentNum(const CallExpr *CE,
339 const CheckerContext &C,
340 unsigned int &ArgNum) {
341 // Find if the function contains a format string argument.
342 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
343 // vsnprintf, syslog, custom annotated functions.
344 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
345 if (!FDecl)
346 return false;
347 for (specific_attr_iterator<FormatAttr>
348 i = FDecl->specific_attr_begin<FormatAttr>(),
349 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
350
351 const FormatAttr *Format = *i;
352 ArgNum = Format->getFormatIdx() - 1;
353 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
354 return true;
355 }
356
357 // Or if a function is named setproctitle (this is a heuristic).
358 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
359 ArgNum = 0;
360 return true;
361 }
362
363 return false;
364}
365
366bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
367 CheckerContext &C) const{
368 // Check if the function contains a format string argument.
369 unsigned int ArgNum = 0;
370 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
371 return false;
372
373 // If either the format string content or the pointer itself are tainted, warn.
374 const ProgramState *State = C.getState();
375 const Expr *Arg = CE->getArg(ArgNum);
376 if (State->isTainted(getPointedToSymbol(C, Arg, false)) ||
377 State->isTainted(Arg, C.getLocationContext()))
378 if (ExplodedNode *N = C.addTransition()) {
379 initBugType();
380 BugReport *report = new BugReport(*BT,
381 "Tainted format string (CWE-134: Uncontrolled Format String)", N);
382 report->addRange(Arg->getSourceRange());
383 C.EmitReport(report);
384 return true;
385 }
386 return false;
387}
388
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000389void ento::registerGenericTaintChecker(CheckerManager &mgr) {
390 mgr.registerChecker<GenericTaintChecker>();
391}