blob: 37d80ade48adc0d8df1b2f0288c2a9b6ddcfb434 [file] [log] [blame]
Anna Zaksdf18c5a2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks9ffbe242011-12-17 00:26:34 +000021#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksdf18c5a2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Anna Zaks1fb826a2012-01-12 02:22:34 +000023#include <climits>
Anna Zaksdf18c5a2011-11-16 19:58:13 +000024
25using namespace clang;
26using namespace ento;
27
28namespace {
Anna Zaksefd69892011-12-14 00:56:18 +000029class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks9ffbe242011-12-17 00:26:34 +000030 check::PreStmt<CallExpr> > {
31public:
Anna Zaks8568ee72012-01-14 02:48:40 +000032 static void *getTag() { static int Tag; return &Tag; }
33
34 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
35 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
36
37 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000038
Anna Zaks9ffbe242011-12-17 00:26:34 +000039private:
Anna Zaks8568ee72012-01-14 02:48:40 +000040 static const unsigned ReturnValueIndex = UINT_MAX;
Anna Zaks022b3f42012-01-17 00:37:02 +000041 static const unsigned InvalidArgIndex = UINT_MAX - 1;
Anna Zaks8568ee72012-01-14 02:48:40 +000042
Anna Zaks8f4caf52011-11-18 02:26:36 +000043 mutable llvm::OwningPtr<BugType> BT;
44 void initBugType() const;
45
Anna Zaks1fb826a2012-01-12 02:22:34 +000046 /// \brief Catch taint related bugs. Check if tainted data is passed to a
47 /// system call etc.
Anna Zaks9f03b622012-01-07 02:33:10 +000048 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
49
Anna Zaks1fb826a2012-01-12 02:22:34 +000050 /// \brief Add taint sources on a pre-visit.
51 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
52
53 /// \brief Propagate taint generated at pre-visit.
54 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
55
56 /// \brief Add taint sources on a post visit.
57 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
58
59 /// \brief Given a pointer argument, get the symbol of the value it contains
Anna Zaks8f4caf52011-11-18 02:26:36 +000060 /// (points to).
61 SymbolRef getPointedToSymbol(CheckerContext &C,
Anna Zaks9ffbe242011-12-17 00:26:34 +000062 const Expr *Arg,
Anna Zaks1fb826a2012-01-12 02:22:34 +000063 bool IssueWarning = false) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000064
Anna Zaks022b3f42012-01-17 00:37:02 +000065 inline bool isTaintedOrPointsToTainted(const Expr *E,
66 const ProgramState *State,
67 CheckerContext &C) const {
68 return (State->isTainted(E, C.getLocationContext()) ||
69 (E->getType().getTypePtr()->isPointerType() &&
70 State->isTainted(getPointedToSymbol(C, E))));
71 }
72
Anna Zaks9ffbe242011-12-17 00:26:34 +000073 /// Functions defining the attack surface.
74 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
75 CheckerContext &C) const;
76 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000077 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +000078
79 /// Taint the scanned input if the file is tainted.
80 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
81 /// Taint if any of the arguments are tainted.
82 const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks1fb826a2012-01-12 02:22:34 +000083 const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const;
Anna Zaksdf18c5a2011-11-16 19:58:13 +000084
Anna Zaksd3d85482011-12-16 18:28:50 +000085 /// Check if the region the expression evaluates to is the standard input,
86 /// and thus, is tainted.
Anna Zaksefd69892011-12-14 00:56:18 +000087 bool isStdin(const Expr *E, CheckerContext &C) const;
Anna Zaksefd69892011-12-14 00:56:18 +000088
Anna Zaks9f03b622012-01-07 02:33:10 +000089 /// Check for CWE-134: Uncontrolled Format String.
Anna Zaks8568ee72012-01-14 02:48:40 +000090 static const char MsgUncontrolledFormatString[];
Anna Zaks9f03b622012-01-07 02:33:10 +000091 bool checkUncontrolledFormatString(const CallExpr *CE,
92 CheckerContext &C) const;
93
Anna Zaks8568ee72012-01-14 02:48:40 +000094 /// Check for:
95 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
96 /// CWE-78, "Failure to Sanitize Data into an OS Command"
97 static const char MsgSanitizeSystemArgs[];
98 bool checkSystemCall(const CallExpr *CE, StringRef Name,
99 CheckerContext &C) const;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000100
Anna Zaks8568ee72012-01-14 02:48:40 +0000101 /// Generate a report if the expression is tainted or points to tainted data.
102 bool generateReportIfTainted(const Expr *E, const char Msg[],
103 CheckerContext &C) const;
Anna Zaks022b3f42012-01-17 00:37:02 +0000104
105
106 typedef llvm::SmallVector<unsigned, 2> ArgVector;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000107
Anna Zaks022b3f42012-01-17 00:37:02 +0000108 /// \brief A struct used to specify taint propagation rules for a function.
109 ///
110 /// If any of the possible taint source arguments is tainted, all of the
111 /// destination arguments should also be tainted. Use InvalidArgIndex in the
112 /// src list to specify that all of the arguments can introduce taint. Use
113 /// InvalidArgIndex in the dst arguments to signify that all the non-const
114 /// pointer and reference arguments might be tainted on return. If
115 /// ReturnValueIndex is added to the dst list, the return value will be
116 /// tainted.
117 struct TaintPropagationRule {
118 /// List of arguments which can be taint sources and should be checked.
119 ArgVector SrcArgs;
120 /// List of arguments which should be tainted on function return.
121 ArgVector DstArgs;
122
123 TaintPropagationRule() {}
124
125 TaintPropagationRule(unsigned SArg, unsigned DArg) {
126 SrcArgs.push_back(SArg);
127 DstArgs.push_back(DArg);
128 }
129
130 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
131 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
132
133 inline bool isNull() { return SrcArgs.empty(); }
134 };
135
136 /// \brief Pre-process a function which propagates taint according to the
137 /// given taint rule.
138 const ProgramState *prePropagateTaint(const CallExpr *CE,
139 CheckerContext &C,
140 const TaintPropagationRule PR) const;
141
142
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000143};
Anna Zaks8568ee72012-01-14 02:48:40 +0000144// TODO: We probably could use TableGen here.
145const char GenericTaintChecker::MsgUncontrolledFormatString[] =
146 "Tainted format string (CWE-134: Uncontrolled Format String)";
147
148const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
149 "Tainted data passed to a system call "
150 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
151
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000152}
153
Anna Zaks1fb826a2012-01-12 02:22:34 +0000154/// A set which is used to pass information from call pre-visit instruction
155/// to the call post-visit. The values are unsigned integers, which are either
156/// ReturnValueIndex, or indexes of the pointer/reference argument, which
157/// points to data, which should be tainted on return.
158namespace { struct TaintArgsOnPostVisit{}; }
159namespace clang { namespace ento {
160template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
161 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
162 static void *GDMIndex() { return GenericTaintChecker::getTag(); }
163};
164}}
Anna Zaks9ffbe242011-12-17 00:26:34 +0000165
Anna Zaks8f4caf52011-11-18 02:26:36 +0000166inline void GenericTaintChecker::initBugType() const {
167 if (!BT)
Anna Zaks9f03b622012-01-07 02:33:10 +0000168 BT.reset(new BugType("Taint Analysis", "General"));
Anna Zaks8f4caf52011-11-18 02:26:36 +0000169}
170
Anna Zaks9ffbe242011-12-17 00:26:34 +0000171void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
172 CheckerContext &C) const {
Anna Zaks9f03b622012-01-07 02:33:10 +0000173 // Check for errors first.
174 if (checkPre(CE, C))
175 return;
Anna Zaks9ffbe242011-12-17 00:26:34 +0000176
Anna Zaks9f03b622012-01-07 02:33:10 +0000177 // Add taint second.
Anna Zaks1fb826a2012-01-12 02:22:34 +0000178 addSourcesPre(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000179}
180
181void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
182 CheckerContext &C) const {
Anna Zaks1fb826a2012-01-12 02:22:34 +0000183 if (propagateFromPre(CE, C))
184 return;
185 addSourcesPost(CE, C);
Anna Zaks9f03b622012-01-07 02:33:10 +0000186}
187
Anna Zaks1fb826a2012-01-12 02:22:34 +0000188void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
189 CheckerContext &C) const {
Anna Zaks9ffbe242011-12-17 00:26:34 +0000190 // Set the evaluation function by switching on the callee name.
191 StringRef Name = C.getCalleeName(CE);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000192 if (Name.empty())
193 return;
Anna Zaks022b3f42012-01-17 00:37:02 +0000194
195 const ProgramState *State = 0;
196
197 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
198 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
199 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
200 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
201 .Default(TaintPropagationRule());
202
203 if (!Rule.isNull()) {
204 State = prePropagateTaint(CE, C, Rule);
205 if (!State)
206 return;
207 C.addTransition(State);
208 }
209
Anna Zaks9ffbe242011-12-17 00:26:34 +0000210 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000211 .Case("fscanf", &GenericTaintChecker::preFscanf)
212 .Cases("strcpy", "__builtin___strcpy_chk",
213 "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy)
214 .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy)
215 .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000216 .Default(0);
217
218 // Check and evaluate the call.
219 if (evalFunction)
220 State = (this->*evalFunction)(CE, C);
221 if (!State)
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000222 return;
223
Anna Zaks9ffbe242011-12-17 00:26:34 +0000224 C.addTransition(State);
225}
226
Anna Zaks1fb826a2012-01-12 02:22:34 +0000227bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
228 CheckerContext &C) const {
229 const ProgramState *State = C.getState();
230
231 // Depending on what was tainted at pre-visit, we determined a set of
232 // arguments which should be tainted after the function returns. These are
233 // stored in the state as TaintArgsOnPostVisit set.
234 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
235 for (llvm::ImmutableSet<unsigned>::iterator
236 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
237 unsigned ArgNum = *I;
238
239 // Special handling for the tainted return value.
240 if (ArgNum == ReturnValueIndex) {
241 State = State->addTaint(CE, C.getLocationContext());
242 continue;
243 }
244
245 // The arguments are pointer arguments. The data they are pointing at is
246 // tainted after the call.
247 const Expr* Arg = CE->getArg(ArgNum);
248 SymbolRef Sym = getPointedToSymbol(C, Arg, true);
249 if (Sym)
250 State = State->addTaint(Sym);
251 }
252
253 // Clear up the taint info from the state.
254 State = State->remove<TaintArgsOnPostVisit>();
255
256 if (State != C.getState()) {
257 C.addTransition(State);
258 return true;
259 }
260 return false;
261}
262
263void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
264 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000265 // Define the attack surface.
266 // Set the evaluation function by switching on the callee name.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000267 StringRef Name = C.getCalleeName(CE);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000268 if (Name.empty())
269 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000270 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000271 .Case("scanf", &GenericTaintChecker::postScanf)
Anna Zaks1009ac72011-12-14 00:56:02 +0000272 // TODO: Add support for vfscanf & family.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000273 .Case("getchar", &GenericTaintChecker::postRetTaint)
274 .Case("getenv", &GenericTaintChecker::postRetTaint)
275 .Case("fopen", &GenericTaintChecker::postRetTaint)
276 .Case("fdopen", &GenericTaintChecker::postRetTaint)
277 .Case("freopen", &GenericTaintChecker::postRetTaint)
Anna Zaks1fb826a2012-01-12 02:22:34 +0000278 .Default(0);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000279
280 // If the callee isn't defined, it is not of security concern.
281 // Check and evaluate the call.
Anna Zaks9f03b622012-01-07 02:33:10 +0000282 const ProgramState *State = 0;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000283 if (evalFunction)
Anna Zaks9ffbe242011-12-17 00:26:34 +0000284 State = (this->*evalFunction)(CE, C);
285 if (!State)
286 return;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000287
Anna Zaks9ffbe242011-12-17 00:26:34 +0000288 C.addTransition(State);
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000289}
Anna Zaks8f4caf52011-11-18 02:26:36 +0000290
Anna Zaks9f03b622012-01-07 02:33:10 +0000291bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
292
293 if (checkUncontrolledFormatString(CE, C))
294 return true;
295
Anna Zaks8568ee72012-01-14 02:48:40 +0000296 StringRef Name = C.getCalleeName(CE);
297 if (Name.empty())
298 return false;
299
300 if (checkSystemCall(CE, Name, C))
301 return true;
302
Anna Zaks9f03b622012-01-07 02:33:10 +0000303 return false;
304}
305
Anna Zaks8f4caf52011-11-18 02:26:36 +0000306SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
307 const Expr* Arg,
308 bool IssueWarning) const {
309 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000310 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zaksd3d85482011-12-16 18:28:50 +0000311 if (AddrVal.isUnknownOrUndef())
Anna Zakse3d250e2011-12-11 18:43:40 +0000312 return 0;
313
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000314 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
Anna Zaks8f4caf52011-11-18 02:26:36 +0000315
316 if (!AddrLoc && !IssueWarning)
317 return 0;
318
319 // If the Expr is not a location, issue a warning.
320 if (!AddrLoc) {
321 assert(IssueWarning);
322 if (ExplodedNode *N = C.generateSink(State)) {
323 initBugType();
324 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
325 report->addRange(Arg->getSourceRange());
326 C.EmitReport(report);
327 }
328 return 0;
329 }
330
Anna Zaks71d29092012-01-13 00:56:51 +0000331 const PointerType *ArgTy =
332 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
333 assert(ArgTy);
334 SVal Val = State->getSVal(*AddrLoc, ArgTy->getPointeeType());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000335 return Val.getAsSymbol();
336}
337
Anna Zaks022b3f42012-01-17 00:37:02 +0000338const ProgramState *
339GenericTaintChecker::prePropagateTaint(const CallExpr *CE,
340 CheckerContext &C,
341 const TaintPropagationRule PR) const {
342 const ProgramState *State = C.getState();
343
344 // Check for taint in arguments.
345 bool IsTainted = false;
346 for (ArgVector::const_iterator I = PR.SrcArgs.begin(),
347 E = PR.SrcArgs.end(); I != E; ++I) {
348 unsigned ArgNum = *I;
349
350 if (ArgNum == InvalidArgIndex) {
351 // Check if any of the arguments is tainted.
352 for (unsigned int i = 0; i < CE->getNumArgs(); ++i)
353 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
354 break;
355 break;
356 }
357
358 assert(ArgNum < CE->getNumArgs());
359 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
360 break;
361 }
362 if (!IsTainted)
363 return State;
364
365 // Mark the arguments which should be tainted after the function returns.
366 for (ArgVector::const_iterator I = PR.DstArgs.begin(),
367 E = PR.DstArgs.end(); I != E; ++I) {
368 unsigned ArgNum = *I;
369
370 // Should we mark all arguments as tainted?
371 if (ArgNum == InvalidArgIndex) {
372 // For all pointer and references that were passed in:
373 // If they are not pointing to const data, mark data as tainted.
374 // TODO: So far we are just going one level down; ideally we'd need to
375 // recurse here.
376 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
377 const Expr *Arg = CE->getArg(i);
378 // Process pointer argument.
379 const Type *ArgTy = Arg->getType().getTypePtr();
380 QualType PType = ArgTy->getPointeeType();
381 if ((!PType.isNull() && !PType.isConstQualified())
382 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
383 State = State->add<TaintArgsOnPostVisit>(i);
384 }
385 continue;
386 }
387
388 // Should mark the return value?
389 if (ArgNum == ReturnValueIndex) {
390 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
391 continue;
392 }
393
394 // Mark the given argument.
395 assert(ArgNum < CE->getNumArgs());
396 State = State->add<TaintArgsOnPostVisit>(ArgNum);
397 }
398
399 return State;
400}
401
402
Anna Zaks1fb826a2012-01-12 02:22:34 +0000403// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
404// and arg 1 should get taint.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000405const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
406 CheckerContext &C) const {
407 assert(CE->getNumArgs() >= 2);
408 const ProgramState *State = C.getState();
409
410 // Check is the file descriptor is tainted.
Ted Kremenek5eca4822012-01-06 22:09:28 +0000411 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
Anna Zaks1fb826a2012-01-12 02:22:34 +0000412 isStdin(CE->getArg(0), C)) {
413 // All arguments except for the first two should get taint.
414 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
415 State = State->add<TaintArgsOnPostVisit>(i);
416 return State;
417 }
418
Anna Zaks9ffbe242011-12-17 00:26:34 +0000419 return 0;
420}
421
Anna Zaks8568ee72012-01-14 02:48:40 +0000422// If any arguments are tainted, mark the return value as tainted on post-visit.
Anna Zaks9ffbe242011-12-17 00:26:34 +0000423const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
424 CheckerContext &C) const {
425 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
426 const ProgramState *State = C.getState();
427 const Expr *Arg = CE->getArg(i);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000428 if (State->isTainted(Arg, C.getLocationContext()) ||
429 State->isTainted(getPointedToSymbol(C, Arg)))
Anna Zaks1fb826a2012-01-12 02:22:34 +0000430 return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000431 }
432 return 0;
433}
434
Anna Zaks1fb826a2012-01-12 02:22:34 +0000435const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE,
436 CheckerContext &C) const {
437 assert(CE->getNumArgs() >= 2);
438 const Expr *FromArg = CE->getArg(1);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000439 const ProgramState *State = C.getState();
Anna Zaks1fb826a2012-01-12 02:22:34 +0000440 if (State->isTainted(FromArg, C.getLocationContext()) ||
441 State->isTainted(getPointedToSymbol(C, FromArg)))
442 return State = State->add<TaintArgsOnPostVisit>(0);
Anna Zaks9ffbe242011-12-17 00:26:34 +0000443 return 0;
444}
445
446const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
447 CheckerContext &C) const {
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000448 const ProgramState *State = C.getState();
Anna Zaks1009ac72011-12-14 00:56:02 +0000449 assert(CE->getNumArgs() >= 2);
Ted Kremenek5eca4822012-01-06 22:09:28 +0000450 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000451 // All arguments except for the very first one should get taint.
452 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
453 // The arguments are pointer arguments. The data they are pointing at is
454 // tainted after the call.
455 const Expr* Arg = CE->getArg(i);
Anna Zaks1fb826a2012-01-12 02:22:34 +0000456 SymbolRef Sym = getPointedToSymbol(C, Arg, true);
Anna Zaks1009ac72011-12-14 00:56:02 +0000457 if (Sym)
458 State = State->addTaint(Sym);
459 }
Anna Zaks9ffbe242011-12-17 00:26:34 +0000460 return State;
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000461}
462
Anna Zaks9ffbe242011-12-17 00:26:34 +0000463const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
464 CheckerContext &C) const {
Ted Kremenek5eca4822012-01-06 22:09:28 +0000465 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000466}
467
Anna Zaksefd69892011-12-14 00:56:18 +0000468bool GenericTaintChecker::isStdin(const Expr *E,
469 CheckerContext &C) const {
Anna Zaksd3d85482011-12-16 18:28:50 +0000470 const ProgramState *State = C.getState();
Ted Kremenek5eca4822012-01-06 22:09:28 +0000471 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaksefd69892011-12-14 00:56:18 +0000472
Anna Zaksd3d85482011-12-16 18:28:50 +0000473 // stdin is a pointer, so it would be a region.
474 const MemRegion *MemReg = Val.getAsRegion();
475
476 // The region should be symbolic, we do not know it's value.
477 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
478 if (!SymReg)
Anna Zaksefd69892011-12-14 00:56:18 +0000479 return false;
480
Anna Zaksd3d85482011-12-16 18:28:50 +0000481 // Get it's symbol and find the declaration region it's pointing to.
482 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
483 if (!Sm)
484 return false;
485 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
486 if (!DeclReg)
487 return false;
Anna Zaksefd69892011-12-14 00:56:18 +0000488
Anna Zaksd3d85482011-12-16 18:28:50 +0000489 // This region corresponds to a declaration, find out if it's a global/extern
490 // variable named stdin with the proper type.
491 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
492 D = D->getCanonicalDecl();
493 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
494 if (const PointerType * PtrTy =
495 dyn_cast<PointerType>(D->getType().getTypePtr()))
496 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
497 return true;
498 }
Anna Zaksefd69892011-12-14 00:56:18 +0000499 return false;
500}
501
Anna Zaks9f03b622012-01-07 02:33:10 +0000502static bool getPrintfFormatArgumentNum(const CallExpr *CE,
503 const CheckerContext &C,
504 unsigned int &ArgNum) {
505 // Find if the function contains a format string argument.
506 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
507 // vsnprintf, syslog, custom annotated functions.
508 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
509 if (!FDecl)
510 return false;
511 for (specific_attr_iterator<FormatAttr>
512 i = FDecl->specific_attr_begin<FormatAttr>(),
513 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
514
515 const FormatAttr *Format = *i;
516 ArgNum = Format->getFormatIdx() - 1;
517 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
518 return true;
519 }
520
521 // Or if a function is named setproctitle (this is a heuristic).
522 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
523 ArgNum = 0;
524 return true;
525 }
526
527 return false;
528}
529
Anna Zaks8568ee72012-01-14 02:48:40 +0000530bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
531 const char Msg[],
532 CheckerContext &C) const {
533 assert(E);
534
535 // Check for taint.
536 const ProgramState *State = C.getState();
537 if (!State->isTainted(getPointedToSymbol(C, E)) &&
538 !State->isTainted(E, C.getLocationContext()))
539 return false;
540
541 // Generate diagnostic.
542 if (ExplodedNode *N = C.addTransition()) {
543 initBugType();
544 BugReport *report = new BugReport(*BT, Msg, N);
545 report->addRange(E->getSourceRange());
546 C.EmitReport(report);
547 return true;
548 }
549 return false;
550}
551
Anna Zaks9f03b622012-01-07 02:33:10 +0000552bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
553 CheckerContext &C) const{
554 // Check if the function contains a format string argument.
555 unsigned int ArgNum = 0;
556 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
557 return false;
558
559 // If either the format string content or the pointer itself are tainted, warn.
Anna Zaks8568ee72012-01-14 02:48:40 +0000560 if (generateReportIfTainted(CE->getArg(ArgNum),
561 MsgUncontrolledFormatString, C))
562 return true;
563 return false;
564}
565
566bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
567 StringRef Name,
568 CheckerContext &C) const {
569 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
570 .Case("system", 0)
571 .Case("popen", 0)
572 .Default(UINT_MAX);
573
574 if (ArgNum == UINT_MAX)
575 return false;
576
577 if (generateReportIfTainted(CE->getArg(ArgNum),
578 MsgSanitizeSystemArgs, C))
579 return true;
580
Anna Zaks9f03b622012-01-07 02:33:10 +0000581 return false;
582}
583
Anna Zaksdf18c5a2011-11-16 19:58:13 +0000584void ento::registerGenericTaintChecker(CheckerManager &mgr) {
585 mgr.registerChecker<GenericTaintChecker>();
586}