blob: f09d670c1ff7aa36635383642aad77ee76fe97ef [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23
24using namespace clang;
25using namespace ento;
26
27namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000028class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000029 check::PreStmt<CallExpr> > {
30public:
31 enum TaintOnPreVisitKind {
32 /// No taint propagates from pre-visit to post-visit.
33 PrevisitNone = 0,
34 /// Based on the pre-visit, the return argument of the call
35 /// should be tainted.
36 PrevisitTaintRet = 1,
37 /// Based on the pre-visit, the call can taint values through it's
38 /// pointer/reference arguments.
39 PrevisitTaintArgs = 2
40 };
Anna Zaksdf18c5a2011-11-16 19:58:13 +000041
Anna Zaks9ffbe242011-12-17 00:26:34 +000042private:
Anna Zaks8f4caf52011-11-18 02:26:36 +000043 mutable llvm::OwningPtr<BugType> BT;
44 void initBugType() const;
45
46 /// Given a pointer argument, get the symbol of the value it contains
47 /// (points to).
48 SymbolRef getPointedToSymbol(CheckerContext &C,
Anna Zaks9ffbe242011-12-17 00:26:34 +000049 const Expr *Arg,
Anna Zaks8f4caf52011-11-18 02:26:36 +000050 bool IssueWarning = true) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000051
Anna Zaks9ffbe242011-12-17 00:26:34 +000052 /// Functions defining the attack surface.
53 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
54 CheckerContext &C) const;
55 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
56 const ProgramState *postFscanf(const CallExpr *CE, CheckerContext &C) const;
57 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
58 const ProgramState *postDefault(const CallExpr *CE, CheckerContext &C) const;
59
60 /// Taint the scanned input if the file is tainted.
61 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
62 /// Taint if any of the arguments are tainted.
63 const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000064
Anna Zaksd3d85482011-12-16 18:28:50 +000065 /// Check if the region the expression evaluates to is the standard input,
66 /// and thus, is tainted.
Anna Zaksefd69892011-12-14 00:56:18 +000067 bool isStdin(const Expr *E, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000068
Anna Zaksdf18c5a2011-11-16 19:58:13 +000069public:
Anna Zaks9ffbe242011-12-17 00:26:34 +000070 static void *getTag() { static int Tag; return &Tag; }
71
Anna Zaksdf18c5a2011-11-16 19:58:13 +000072 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000073 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000074
75 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
76
Anna Zaksdf18c5a2011-11-16 19:58:13 +000077};
78}
79
Anna Zaks9ffbe242011-12-17 00:26:34 +000080/// Definitions for the checker specific state.
81namespace { struct TaintOnPreVisit {};}
82namespace clang {
83namespace ento {
84 /// A flag which is used to pass information from call pre-visit instruction
85 /// to the call post-visit. The value is an unsigned, which takes on values
86 /// of the TaintOnPreVisitKind enumeration.
87 template<>
88 struct ProgramStateTrait<TaintOnPreVisit> :
89 public ProgramStatePartialTrait<unsigned> {
90 static void *GDMIndex() { return GenericTaintChecker::getTag(); }
91 };
92}
93}
94
Anna Zaks8f4caf52011-11-18 02:26:36 +000095inline void GenericTaintChecker::initBugType() const {
96 if (!BT)
97 BT.reset(new BugType("Tainted data checking", "General"));
98}
99
Anna Zaks9ffbe242011-12-17 00:26:34 +0000100void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
101 CheckerContext &C) const {
102 const ProgramState *State = C.getState();
103
104 // Set the evaluation function by switching on the callee name.
105 StringRef Name = C.getCalleeName(CE);
106 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
107 .Case("fscanf", &GenericTaintChecker::preFscanf)
108 .Case("atoi", &GenericTaintChecker::preAnyArgs)
109 .Case("atol", &GenericTaintChecker::preAnyArgs)
110 .Case("atoll", &GenericTaintChecker::preAnyArgs)
111 .Default(0);
112
113 // Check and evaluate the call.
114 if (evalFunction)
115 State = (this->*evalFunction)(CE, C);
116 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000117 return;
118
Anna Zaks9ffbe242011-12-17 00:26:34 +0000119 C.addTransition(State);
120}
121
122void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
123 CheckerContext &C) const {
124 const ProgramState *State = C.getState();
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000125
126 // Define the attack surface.
127 // Set the evaluation function by switching on the callee name.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000128 StringRef Name = C.getCalleeName(CE);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000129 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000130 .Case("scanf", &GenericTaintChecker::postScanf)
131 .Case("fscanf", &GenericTaintChecker::postFscanf)
132 .Case("sscanf", &GenericTaintChecker::postFscanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000133 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000134 .Case("getchar", &GenericTaintChecker::postRetTaint)
135 .Case("getenv", &GenericTaintChecker::postRetTaint)
136 .Case("fopen", &GenericTaintChecker::postRetTaint)
137 .Case("fdopen", &GenericTaintChecker::postRetTaint)
138 .Case("freopen", &GenericTaintChecker::postRetTaint)
139 .Default(&GenericTaintChecker::postDefault);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000140
141 // If the callee isn't defined, it is not of security concern.
142 // Check and evaluate the call.
143 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000144 State = (this->*evalFunction)(CE, C);
145 if (!State)
146 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000147
Anna Zaks9ffbe242011-12-17 00:26:34 +0000148 assert(State->get<TaintOnPreVisit>() == PrevisitNone &&
149 "State has to be cleared.");
150 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000151}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000152
153SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
154 const Expr* Arg,
155 bool IssueWarning) const {
156 const ProgramState *State = C.getState();
Anna Zaksefd69892011-12-14 00:56:18 +0000157 SVal AddrVal = State->getSVal(Arg->IgnoreParens());
Anna Zaksd3d85482011-12-16 18:28:50 +0000158 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000159 return 0;
160
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000161 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks8f4caf52011-11-18 02:26:36 +0000162
163 if (!AddrLoc && !IssueWarning)
164 return 0;
165
166 // If the Expr is not a location, issue a warning.
167 if (!AddrLoc) {
168 assert(IssueWarning);
169 if (ExplodedNode *N = C.generateSink(State)) {
170 initBugType();
171 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
172 report->addRange(Arg->getSourceRange());
173 C.EmitReport(report);
174 }
175 return 0;
176 }
177
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000178 SVal Val = State->getSVal(*AddrLoc);
179 return Val.getAsSymbol();
180}
181
Anna Zaks9ffbe242011-12-17 00:26:34 +0000182const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
183 CheckerContext &C) const {
184 assert(CE->getNumArgs() >= 2);
185 const ProgramState *State = C.getState();
186
187 // Check is the file descriptor is tainted.
188 if (State->isTainted(CE->getArg(0)) || isStdin(CE->getArg(0), C))
189 return State->set<TaintOnPreVisit>(PrevisitTaintArgs);
190 return 0;
191}
192
193// If any other arguments are tainted, mark state as tainted on pre-visit.
194const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
195 CheckerContext &C) const {
196 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
197 const ProgramState *State = C.getState();
198 const Expr *Arg = CE->getArg(i);
199 if (State->isTainted(Arg) || State->isTainted(getPointedToSymbol(C, Arg)))
200 return State = State->set<TaintOnPreVisit>(PrevisitTaintRet);
201 }
202 return 0;
203}
204
205const ProgramState *GenericTaintChecker::postDefault(const CallExpr *CE,
206 CheckerContext &C) const {
207 const ProgramState *State = C.getState();
208
209 // Check if we know that the result needs to be tainted based on the
210 // pre-visit analysis.
211 if (State->get<TaintOnPreVisit>() == PrevisitTaintRet) {
212 State = State->addTaint(CE);
213 return State->set<TaintOnPreVisit>(PrevisitNone);
214 }
215
216 return 0;
217}
218
219const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
220 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000221 const ProgramState *State = C.getState();
Anna Zaks1009ac72011-12-14 00:56:02 +0000222 assert(CE->getNumArgs() >= 2);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000223 SVal x = State->getSVal(CE->getArg(1));
224 // All arguments except for the very first one should get taint.
225 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
226 // The arguments are pointer arguments. The data they are pointing at is
227 // tainted after the call.
228 const Expr* Arg = CE->getArg(i);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000229 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000230 if (Sym)
231 State = State->addTaint(Sym);
232 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000233 return State;
Anna Zaks1009ac72011-12-14 00:56:02 +0000234}
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000235
Anna Zaks1009ac72011-12-14 00:56:02 +0000236/// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
237/// and arg 1 should get taint.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000238const ProgramState *GenericTaintChecker::postFscanf(const CallExpr *CE,
239 CheckerContext &C) const {
Anna Zaks1009ac72011-12-14 00:56:02 +0000240 const ProgramState *State = C.getState();
241 assert(CE->getNumArgs() >= 2);
242
Anna Zaks9ffbe242011-12-17 00:26:34 +0000243 // Fscanf is only tainted if the input file is tainted at pre visit, so
244 // check for that first.
245 if (State->get<TaintOnPreVisit>() == PrevisitNone)
246 return 0;
247
248 // Reset the taint state.
249 State = State->set<TaintOnPreVisit>(PrevisitNone);
Anna Zaks1009ac72011-12-14 00:56:02 +0000250
251 // All arguments except for the first two should get taint.
252 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) {
253 // The arguments are pointer arguments. The data they are pointing at is
254 // tainted after the call.
255 const Expr* Arg = CE->getArg(i);
256 SymbolRef Sym = getPointedToSymbol(C, Arg);
257 if (Sym)
258 State = State->addTaint(Sym);
259 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000260 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000261}
262
Anna Zaks9ffbe242011-12-17 00:26:34 +0000263const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
264 CheckerContext &C) const {
265 return C.getState()->addTaint(CE);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000266}
267
Anna Zaksefd69892011-12-14 00:56:18 +0000268bool GenericTaintChecker::isStdin(const Expr *E,
269 CheckerContext &C) const {
Anna Zaksd3d85482011-12-16 18:28:50 +0000270 const ProgramState *State = C.getState();
271 SVal Val = State->getSVal(E);
Anna Zaksefd69892011-12-14 00:56:18 +0000272
Anna Zaksd3d85482011-12-16 18:28:50 +0000273 // stdin is a pointer, so it would be a region.
274 const MemRegion *MemReg = Val.getAsRegion();
275
276 // The region should be symbolic, we do not know it's value.
277 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
278 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000279 return false;
280
Anna Zaksd3d85482011-12-16 18:28:50 +0000281 // Get it's symbol and find the declaration region it's pointing to.
282 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
283 if (!Sm)
284 return false;
285 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
286 if (!DeclReg)
287 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000288
Anna Zaksd3d85482011-12-16 18:28:50 +0000289 // This region corresponds to a declaration, find out if it's a global/extern
290 // variable named stdin with the proper type.
291 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
292 D = D->getCanonicalDecl();
293 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
294 if (const PointerType * PtrTy =
295 dyn_cast<PointerType>(D->getType().getTypePtr()))
296 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
297 return true;
298 }
Anna Zaksefd69892011-12-14 00:56:18 +0000299 return false;
300}
301
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000302void ento::registerGenericTaintChecker(CheckerManager &mgr) {
303 mgr.registerChecker<GenericTaintChecker>();
304}