blob: af334c414367574bc75adb0ff790de1836f5ec70 [file] [log] [blame]
Anna Zaks5c5bf9b2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
Chandler Carruth3a022472012-12-04 09:13:33 +000018#include "clang/AST/Attr.h"
19#include "clang/Basic/Builtins.h"
20#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000021#include "clang/StaticAnalyzer/Core/Checker.h"
22#include "clang/StaticAnalyzer/Core/CheckerManager.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks3b0ab202011-12-17 00:26:34 +000024#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksb3fa8d72012-01-12 02:22:34 +000025#include <climits>
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000026
27using namespace clang;
28using namespace ento;
29
30namespace {
Anna Zaks099fe3f2011-12-14 00:56:18 +000031class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks3b0ab202011-12-17 00:26:34 +000032 check::PreStmt<CallExpr> > {
33public:
Anna Zaks0244cd72012-01-14 02:48:40 +000034 static void *getTag() { static int Tag; return &Tag; }
35
36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks0244cd72012-01-14 02:48:40 +000037
38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000039
Anna Zaks3b0ab202011-12-17 00:26:34 +000040private:
Anna Zaksbf740512012-01-24 19:32:25 +000041 static const unsigned InvalidArgIndex = UINT_MAX;
42 /// Denotes the return vale.
43 static const unsigned ReturnValueIndex = UINT_MAX - 1;
Anna Zaks0244cd72012-01-14 02:48:40 +000044
Ahmed Charlesb8984322014-03-07 20:03:18 +000045 mutable std::unique_ptr<BugType> BT;
Anna Zaks5d324e52012-01-18 02:45:07 +000046 inline void initBugType() const {
47 if (!BT)
Alexander Kornienko4aca9b12014-02-11 21:49:21 +000048 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
Anna Zaks5d324e52012-01-18 02:45:07 +000049 }
Anna Zaks457c6872011-11-18 02:26:36 +000050
Anna Zaksb3fa8d72012-01-12 02:22:34 +000051 /// \brief Catch taint related bugs. Check if tainted data is passed to a
52 /// system call etc.
Anna Zaks126a2ef2012-01-07 02:33:10 +000053 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
Anna Zaksb3fa8d72012-01-12 02:22:34 +000055 /// \brief Add taint sources on a pre-visit.
56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57
58 /// \brief Propagate taint generated at pre-visit.
59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60
61 /// \brief Add taint sources on a post visit.
62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63
Anna Zaksbf740512012-01-24 19:32:25 +000064 /// Check if the region the expression evaluates to is the standard input,
65 /// and thus, is tainted.
66 static bool isStdin(const Expr *E, CheckerContext &C);
67
Anna Zaksb3fa8d72012-01-12 02:22:34 +000068 /// \brief Given a pointer argument, get the symbol of the value it contains
Anna Zaks457c6872011-11-18 02:26:36 +000069 /// (points to).
Anna Zaks7f6a6b72012-01-18 02:45:13 +000070 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000071
Anna Zaks3b0ab202011-12-17 00:26:34 +000072 /// Functions defining the attack surface.
Ted Kremenek49b1e382012-01-26 21:29:00 +000073 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
Anna Zaks3b0ab202011-12-17 00:26:34 +000074 CheckerContext &C) const;
Ted Kremenek49b1e382012-01-26 21:29:00 +000075 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks3b0ab202011-12-17 00:26:34 +000078
79 /// Taint the scanned input if the file is tainted.
Ted Kremenek49b1e382012-01-26 21:29:00 +000080 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000081
Anna Zaks126a2ef2012-01-07 02:33:10 +000082 /// Check for CWE-134: Uncontrolled Format String.
Anna Zaks0244cd72012-01-14 02:48:40 +000083 static const char MsgUncontrolledFormatString[];
Anna Zaks126a2ef2012-01-07 02:33:10 +000084 bool checkUncontrolledFormatString(const CallExpr *CE,
85 CheckerContext &C) const;
86
Anna Zaks0244cd72012-01-14 02:48:40 +000087 /// Check for:
88 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89 /// CWE-78, "Failure to Sanitize Data into an OS Command"
90 static const char MsgSanitizeSystemArgs[];
91 bool checkSystemCall(const CallExpr *CE, StringRef Name,
92 CheckerContext &C) const;
Anna Zaks3b0ab202011-12-17 00:26:34 +000093
Anna Zaks560dbe92012-01-18 02:45:11 +000094 /// Check if tainted data is used as a buffer size ins strn.. functions,
95 /// and allocators.
96 static const char MsgTaintedBufferSize[];
97 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98 CheckerContext &C) const;
99
Anna Zaks0244cd72012-01-14 02:48:40 +0000100 /// Generate a report if the expression is tainted or points to tainted data.
101 bool generateReportIfTainted(const Expr *E, const char Msg[],
102 CheckerContext &C) const;
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000103
Anna Zaksd4e43ae2017-03-09 00:01:07 +0000104 /// The bug visitor prints a diagnostic message at the location where a given
105 /// variable was tainted.
106 class TaintBugVisitor
107 : public BugReporterVisitorImpl<TaintBugVisitor> {
108 private:
109 const SVal V;
110
111 public:
112 TaintBugVisitor(const SVal V) : V(V) {}
113 void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); }
114
115 std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
116 const ExplodedNode *PrevN,
117 BugReporterContext &BRC,
118 BugReport &BR) override;
119 };
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000120
Dmitri Gribenkof8579502013-01-12 19:30:44 +0000121 typedef SmallVector<unsigned, 2> ArgVector;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000122
Anna Zaks3666d2c2012-01-17 00:37:02 +0000123 /// \brief A struct used to specify taint propagation rules for a function.
124 ///
125 /// If any of the possible taint source arguments is tainted, all of the
126 /// destination arguments should also be tainted. Use InvalidArgIndex in the
127 /// src list to specify that all of the arguments can introduce taint. Use
128 /// InvalidArgIndex in the dst arguments to signify that all the non-const
129 /// pointer and reference arguments might be tainted on return. If
130 /// ReturnValueIndex is added to the dst list, the return value will be
131 /// tainted.
132 struct TaintPropagationRule {
133 /// List of arguments which can be taint sources and should be checked.
134 ArgVector SrcArgs;
135 /// List of arguments which should be tainted on function return.
136 ArgVector DstArgs;
Anna Zaks5d324e52012-01-18 02:45:07 +0000137 // TODO: Check if using other data structures would be more optimal.
Anna Zaks3666d2c2012-01-17 00:37:02 +0000138
139 TaintPropagationRule() {}
140
Anna Zaks5d324e52012-01-18 02:45:07 +0000141 TaintPropagationRule(unsigned SArg,
142 unsigned DArg, bool TaintRet = false) {
Anna Zaks3666d2c2012-01-17 00:37:02 +0000143 SrcArgs.push_back(SArg);
144 DstArgs.push_back(DArg);
Anna Zaks5d324e52012-01-18 02:45:07 +0000145 if (TaintRet)
146 DstArgs.push_back(ReturnValueIndex);
Anna Zaks3666d2c2012-01-17 00:37:02 +0000147 }
148
Anna Zaks5d324e52012-01-18 02:45:07 +0000149 TaintPropagationRule(unsigned SArg1, unsigned SArg2,
150 unsigned DArg, bool TaintRet = false) {
151 SrcArgs.push_back(SArg1);
152 SrcArgs.push_back(SArg2);
153 DstArgs.push_back(DArg);
154 if (TaintRet)
155 DstArgs.push_back(ReturnValueIndex);
156 }
157
158 /// Get the propagation rule for a given function.
159 static TaintPropagationRule
160 getTaintPropagationRule(const FunctionDecl *FDecl,
161 StringRef Name,
162 CheckerContext &C);
163
Anna Zaks3666d2c2012-01-17 00:37:02 +0000164 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
165 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
166
Anna Zaks5d324e52012-01-18 02:45:07 +0000167 inline bool isNull() const { return SrcArgs.empty(); }
168
169 inline bool isDestinationArgument(unsigned ArgNum) const {
170 return (std::find(DstArgs.begin(),
171 DstArgs.end(), ArgNum) != DstArgs.end());
172 }
Anna Zaks3666d2c2012-01-17 00:37:02 +0000173
Anna Zaksbf740512012-01-24 19:32:25 +0000174 static inline bool isTaintedOrPointsToTainted(const Expr *E,
Ted Kremenek49b1e382012-01-26 21:29:00 +0000175 ProgramStateRef State,
Anna Zaksbf740512012-01-24 19:32:25 +0000176 CheckerContext &C) {
177 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
178 (E->getType().getTypePtr()->isPointerType() &&
179 State->isTainted(getPointedToSymbol(C, E))));
180 }
181
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000182 /// \brief Pre-process a function which propagates taint according to the
183 /// taint rule.
Ted Kremenek49b1e382012-01-26 21:29:00 +0000184 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000185
186 };
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000187};
Anna Zaks5d324e52012-01-18 02:45:07 +0000188
189const unsigned GenericTaintChecker::ReturnValueIndex;
190const unsigned GenericTaintChecker::InvalidArgIndex;
191
Anna Zaks0244cd72012-01-14 02:48:40 +0000192const char GenericTaintChecker::MsgUncontrolledFormatString[] =
Anna Zaks3705a1e2012-02-22 02:35:58 +0000193 "Untrusted data is used as a format string "
194 "(CWE-134: Uncontrolled Format String)";
Anna Zaks0244cd72012-01-14 02:48:40 +0000195
196const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
Anna Zaks3705a1e2012-02-22 02:35:58 +0000197 "Untrusted data is passed to a system call "
Anna Zaks0244cd72012-01-14 02:48:40 +0000198 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
Anna Zaks560dbe92012-01-18 02:45:11 +0000199
200const char GenericTaintChecker::MsgTaintedBufferSize[] =
Anna Zaks3705a1e2012-02-22 02:35:58 +0000201 "Untrusted data is used to specify the buffer size "
Anna Zaks560dbe92012-01-18 02:45:11 +0000202 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
203 "character data and the null terminator)";
204
205} // end of anonymous namespace
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000206
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000207/// A set which is used to pass information from call pre-visit instruction
208/// to the call post-visit. The values are unsigned integers, which are either
209/// ReturnValueIndex, or indexes of the pointer/reference argument, which
210/// points to data, which should be tainted on return.
Jordan Rose0c153cb2012-11-02 01:54:06 +0000211REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
Anna Zaks3b0ab202011-12-17 00:26:34 +0000212
Anna Zaksd4e43ae2017-03-09 00:01:07 +0000213std::shared_ptr<PathDiagnosticPiece>
214GenericTaintChecker::TaintBugVisitor::VisitNode(const ExplodedNode *N,
215 const ExplodedNode *PrevN, BugReporterContext &BRC, BugReport &BR) {
216
217 // Find the ExplodedNode where the taint was first introduced
218 if (!N->getState()->isTainted(V) || PrevN->getState()->isTainted(V))
219 return nullptr;
220
221 const Stmt *S = PathDiagnosticLocation::getStmt(N);
222 if (!S)
223 return nullptr;
224
225 const LocationContext *NCtx = N->getLocationContext();
226 PathDiagnosticLocation L =
227 PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
228 if (!L.isValid() || !L.asLocation().isValid())
229 return nullptr;
230
231 return std::make_shared<PathDiagnosticEventPiece>(
232 L, "Taint originated here");
233}
234
Anna Zaks5d324e52012-01-18 02:45:07 +0000235GenericTaintChecker::TaintPropagationRule
236GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
237 const FunctionDecl *FDecl,
238 StringRef Name,
239 CheckerContext &C) {
Enrico Pertoso4432d872015-06-03 09:10:58 +0000240 // TODO: Currently, we might lose precision here: we always mark a return
Anna Zaksbf740512012-01-24 19:32:25 +0000241 // value as tainted even if it's just a pointer, pointing to tainted data.
242
Anna Zaks5d324e52012-01-18 02:45:07 +0000243 // Check for exact name match for functions without builtin substitutes.
244 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
245 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
246 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
247 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaksbf740512012-01-24 19:32:25 +0000248 .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
249 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
250 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
251 .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
252 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
253 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
254 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
255 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaks3b754b22012-01-20 00:11:19 +0000256 .Case("read", TaintPropagationRule(0, 2, 1, true))
257 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
Anna Zaksbf740512012-01-24 19:32:25 +0000258 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
259 .Case("fgets", TaintPropagationRule(2, 0, true))
260 .Case("getline", TaintPropagationRule(2, 0))
261 .Case("getdelim", TaintPropagationRule(3, 0))
262 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaks5d324e52012-01-18 02:45:07 +0000263 .Default(TaintPropagationRule());
264
265 if (!Rule.isNull())
266 return Rule;
267
268 // Check if it's one of the memory setting/copying functions.
269 // This check is specialized but faster then calling isCLibraryFunction.
270 unsigned BId = 0;
271 if ( (BId = FDecl->getMemoryFunctionKind()) )
272 switch(BId) {
273 case Builtin::BImemcpy:
274 case Builtin::BImemmove:
275 case Builtin::BIstrncpy:
276 case Builtin::BIstrncat:
277 return TaintPropagationRule(1, 2, 0, true);
Anna Zaks5d324e52012-01-18 02:45:07 +0000278 case Builtin::BIstrlcpy:
279 case Builtin::BIstrlcat:
280 return TaintPropagationRule(1, 2, 0, false);
Anna Zaks5d324e52012-01-18 02:45:07 +0000281 case Builtin::BIstrndup:
282 return TaintPropagationRule(0, 1, ReturnValueIndex);
Anna Zaks5d324e52012-01-18 02:45:07 +0000283
284 default:
285 break;
286 };
287
288 // Process all other functions which could be defined as builtins.
289 if (Rule.isNull()) {
290 if (C.isCLibraryFunction(FDecl, "snprintf") ||
291 C.isCLibraryFunction(FDecl, "sprintf"))
292 return TaintPropagationRule(InvalidArgIndex, 0, true);
293 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
294 C.isCLibraryFunction(FDecl, "stpcpy") ||
295 C.isCLibraryFunction(FDecl, "strcat"))
296 return TaintPropagationRule(1, 0, true);
297 else if (C.isCLibraryFunction(FDecl, "bcopy"))
298 return TaintPropagationRule(0, 2, 1, false);
299 else if (C.isCLibraryFunction(FDecl, "strdup") ||
300 C.isCLibraryFunction(FDecl, "strdupa"))
301 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks560dbe92012-01-18 02:45:11 +0000302 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
303 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks5d324e52012-01-18 02:45:07 +0000304 }
305
306 // Skipping the following functions, since they might be used for cleansing
307 // or smart memory copy:
Benjamin Kramer474261a2012-06-02 10:20:41 +0000308 // - memccpy - copying until hitting a special character.
Anna Zaks5d324e52012-01-18 02:45:07 +0000309
310 return TaintPropagationRule();
Anna Zaks457c6872011-11-18 02:26:36 +0000311}
312
Anna Zaks3b0ab202011-12-17 00:26:34 +0000313void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
314 CheckerContext &C) const {
Anna Zaks126a2ef2012-01-07 02:33:10 +0000315 // Check for errors first.
316 if (checkPre(CE, C))
317 return;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000318
Anna Zaks126a2ef2012-01-07 02:33:10 +0000319 // Add taint second.
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000320 addSourcesPre(CE, C);
Anna Zaks126a2ef2012-01-07 02:33:10 +0000321}
322
323void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
324 CheckerContext &C) const {
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000325 if (propagateFromPre(CE, C))
326 return;
327 addSourcesPost(CE, C);
Anna Zaks126a2ef2012-01-07 02:33:10 +0000328}
329
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000330void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
331 CheckerContext &C) const {
Craig Topper0dbb7832014-05-27 02:45:47 +0000332 ProgramStateRef State = nullptr;
Anna Zaks5d324e52012-01-18 02:45:07 +0000333 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
Jordan Rose6cd16c52012-07-10 23:13:01 +0000334 if (!FDecl || FDecl->getKind() != Decl::Function)
335 return;
336
Anna Zaks5d324e52012-01-18 02:45:07 +0000337 StringRef Name = C.getCalleeName(FDecl);
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000338 if (Name.empty())
339 return;
Anna Zaks3666d2c2012-01-17 00:37:02 +0000340
Anna Zaks5d324e52012-01-18 02:45:07 +0000341 // First, try generating a propagation rule for this function.
342 TaintPropagationRule Rule =
343 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
Anna Zaks3666d2c2012-01-17 00:37:02 +0000344 if (!Rule.isNull()) {
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000345 State = Rule.process(CE, C);
Anna Zaks3666d2c2012-01-17 00:37:02 +0000346 if (!State)
347 return;
348 C.addTransition(State);
Anna Zaks5d324e52012-01-18 02:45:07 +0000349 return;
Anna Zaks3666d2c2012-01-17 00:37:02 +0000350 }
351
Anna Zaks5d324e52012-01-18 02:45:07 +0000352 // Otherwise, check if we have custom pre-processing implemented.
Anna Zaks3b0ab202011-12-17 00:26:34 +0000353 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000354 .Case("fscanf", &GenericTaintChecker::preFscanf)
Craig Topper0dbb7832014-05-27 02:45:47 +0000355 .Default(nullptr);
Anna Zaks3b0ab202011-12-17 00:26:34 +0000356 // Check and evaluate the call.
357 if (evalFunction)
358 State = (this->*evalFunction)(CE, C);
359 if (!State)
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000360 return;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000361 C.addTransition(State);
Anna Zaks5d324e52012-01-18 02:45:07 +0000362
Anna Zaks3b0ab202011-12-17 00:26:34 +0000363}
364
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000365bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
366 CheckerContext &C) const {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000367 ProgramStateRef State = C.getState();
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000368
369 // Depending on what was tainted at pre-visit, we determined a set of
370 // arguments which should be tainted after the function returns. These are
371 // stored in the state as TaintArgsOnPostVisit set.
Jordan Rose0c153cb2012-11-02 01:54:06 +0000372 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
Anna Zaksbf740512012-01-24 19:32:25 +0000373 if (TaintArgs.isEmpty())
374 return false;
375
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000376 for (llvm::ImmutableSet<unsigned>::iterator
377 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
378 unsigned ArgNum = *I;
379
380 // Special handling for the tainted return value.
381 if (ArgNum == ReturnValueIndex) {
382 State = State->addTaint(CE, C.getLocationContext());
383 continue;
384 }
385
386 // The arguments are pointer arguments. The data they are pointing at is
387 // tainted after the call.
Anna Zaksb508d292012-04-10 23:41:11 +0000388 if (CE->getNumArgs() < (ArgNum + 1))
389 return false;
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000390 const Expr* Arg = CE->getArg(ArgNum);
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000391 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000392 if (Sym)
393 State = State->addTaint(Sym);
394 }
395
396 // Clear up the taint info from the state.
397 State = State->remove<TaintArgsOnPostVisit>();
398
399 if (State != C.getState()) {
400 C.addTransition(State);
401 return true;
402 }
403 return false;
404}
405
406void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
407 CheckerContext &C) const {
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000408 // Define the attack surface.
409 // Set the evaluation function by switching on the callee name.
Jordan Rose6cd16c52012-07-10 23:13:01 +0000410 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
411 if (!FDecl || FDecl->getKind() != Decl::Function)
412 return;
413
414 StringRef Name = C.getCalleeName(FDecl);
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000415 if (Name.empty())
416 return;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000417 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks3b0ab202011-12-17 00:26:34 +0000418 .Case("scanf", &GenericTaintChecker::postScanf)
Anna Zakseefc0e92011-12-14 00:56:02 +0000419 // TODO: Add support for vfscanf & family.
Anna Zaks3b0ab202011-12-17 00:26:34 +0000420 .Case("getchar", &GenericTaintChecker::postRetTaint)
Anna Zaksbf740512012-01-24 19:32:25 +0000421 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
Anna Zaks3b0ab202011-12-17 00:26:34 +0000422 .Case("getenv", &GenericTaintChecker::postRetTaint)
423 .Case("fopen", &GenericTaintChecker::postRetTaint)
424 .Case("fdopen", &GenericTaintChecker::postRetTaint)
425 .Case("freopen", &GenericTaintChecker::postRetTaint)
Anna Zaksbf740512012-01-24 19:32:25 +0000426 .Case("getch", &GenericTaintChecker::postRetTaint)
427 .Case("wgetch", &GenericTaintChecker::postRetTaint)
Anna Zaks3b754b22012-01-20 00:11:19 +0000428 .Case("socket", &GenericTaintChecker::postSocket)
Craig Topper0dbb7832014-05-27 02:45:47 +0000429 .Default(nullptr);
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000430
431 // If the callee isn't defined, it is not of security concern.
432 // Check and evaluate the call.
Craig Topper0dbb7832014-05-27 02:45:47 +0000433 ProgramStateRef State = nullptr;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000434 if (evalFunction)
Anna Zaks3b0ab202011-12-17 00:26:34 +0000435 State = (this->*evalFunction)(CE, C);
436 if (!State)
437 return;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000438
Anna Zaks3b0ab202011-12-17 00:26:34 +0000439 C.addTransition(State);
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000440}
Anna Zaks457c6872011-11-18 02:26:36 +0000441
Anna Zaks126a2ef2012-01-07 02:33:10 +0000442bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
443
444 if (checkUncontrolledFormatString(CE, C))
445 return true;
446
Anna Zaks560dbe92012-01-18 02:45:11 +0000447 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
Jordan Rose6cd16c52012-07-10 23:13:01 +0000448 if (!FDecl || FDecl->getKind() != Decl::Function)
449 return false;
450
Anna Zaks560dbe92012-01-18 02:45:11 +0000451 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks0244cd72012-01-14 02:48:40 +0000452 if (Name.empty())
453 return false;
454
455 if (checkSystemCall(CE, Name, C))
456 return true;
457
Anna Zaks560dbe92012-01-18 02:45:11 +0000458 if (checkTaintedBufferSize(CE, FDecl, C))
459 return true;
460
Anna Zaks126a2ef2012-01-07 02:33:10 +0000461 return false;
462}
463
Anna Zaks457c6872011-11-18 02:26:36 +0000464SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000465 const Expr* Arg) {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000466 ProgramStateRef State = C.getState();
Ted Kremenek632e3b72012-01-06 22:09:28 +0000467 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
Anna Zakse48ee502011-12-16 18:28:50 +0000468 if (AddrVal.isUnknownOrUndef())
Craig Topper0dbb7832014-05-27 02:45:47 +0000469 return nullptr;
Anna Zaks7c96b7d2011-12-11 18:43:40 +0000470
David Blaikie05785d12013-02-20 22:23:23 +0000471 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000472 if (!AddrLoc)
Craig Topper0dbb7832014-05-27 02:45:47 +0000473 return nullptr;
Anna Zaks457c6872011-11-18 02:26:36 +0000474
Anna Zaksa31f6b92012-01-13 00:56:51 +0000475 const PointerType *ArgTy =
476 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
Anna Zaks97bef562012-01-21 06:59:01 +0000477 SVal Val = State->getSVal(*AddrLoc,
478 ArgTy ? ArgTy->getPointeeType(): QualType());
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000479 return Val.getAsSymbol();
480}
481
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000482ProgramStateRef
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000483GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
484 CheckerContext &C) const {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000485 ProgramStateRef State = C.getState();
Anna Zaks3666d2c2012-01-17 00:37:02 +0000486
487 // Check for taint in arguments.
488 bool IsTainted = false;
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000489 for (ArgVector::const_iterator I = SrcArgs.begin(),
490 E = SrcArgs.end(); I != E; ++I) {
Anna Zaks3666d2c2012-01-17 00:37:02 +0000491 unsigned ArgNum = *I;
492
493 if (ArgNum == InvalidArgIndex) {
Anna Zaks5d324e52012-01-18 02:45:07 +0000494 // Check if any of the arguments is tainted, but skip the
495 // destination arguments.
496 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000497 if (isDestinationArgument(i))
Anna Zaks5d324e52012-01-18 02:45:07 +0000498 continue;
Anna Zaksbf740512012-01-24 19:32:25 +0000499 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
Anna Zaks3666d2c2012-01-17 00:37:02 +0000500 break;
Anna Zaks5d324e52012-01-18 02:45:07 +0000501 }
Anna Zaks3666d2c2012-01-17 00:37:02 +0000502 break;
503 }
504
Anna Zaksb508d292012-04-10 23:41:11 +0000505 if (CE->getNumArgs() < (ArgNum + 1))
506 return State;
Anna Zaksbf740512012-01-24 19:32:25 +0000507 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
Anna Zaks3666d2c2012-01-17 00:37:02 +0000508 break;
509 }
510 if (!IsTainted)
511 return State;
512
513 // Mark the arguments which should be tainted after the function returns.
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000514 for (ArgVector::const_iterator I = DstArgs.begin(),
515 E = DstArgs.end(); I != E; ++I) {
Anna Zaks3666d2c2012-01-17 00:37:02 +0000516 unsigned ArgNum = *I;
517
518 // Should we mark all arguments as tainted?
519 if (ArgNum == InvalidArgIndex) {
520 // For all pointer and references that were passed in:
521 // If they are not pointing to const data, mark data as tainted.
522 // TODO: So far we are just going one level down; ideally we'd need to
523 // recurse here.
524 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
525 const Expr *Arg = CE->getArg(i);
526 // Process pointer argument.
527 const Type *ArgTy = Arg->getType().getTypePtr();
528 QualType PType = ArgTy->getPointeeType();
529 if ((!PType.isNull() && !PType.isConstQualified())
530 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
531 State = State->add<TaintArgsOnPostVisit>(i);
532 }
533 continue;
534 }
535
536 // Should mark the return value?
537 if (ArgNum == ReturnValueIndex) {
538 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
539 continue;
540 }
541
542 // Mark the given argument.
543 assert(ArgNum < CE->getNumArgs());
544 State = State->add<TaintArgsOnPostVisit>(ArgNum);
545 }
546
547 return State;
548}
549
550
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000551// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
552// and arg 1 should get taint.
Ted Kremenek49b1e382012-01-26 21:29:00 +0000553ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
Anna Zaks3b0ab202011-12-17 00:26:34 +0000554 CheckerContext &C) const {
555 assert(CE->getNumArgs() >= 2);
Ted Kremenek49b1e382012-01-26 21:29:00 +0000556 ProgramStateRef State = C.getState();
Anna Zaks3b0ab202011-12-17 00:26:34 +0000557
558 // Check is the file descriptor is tainted.
Ted Kremenek632e3b72012-01-06 22:09:28 +0000559 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000560 isStdin(CE->getArg(0), C)) {
561 // All arguments except for the first two should get taint.
562 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
563 State = State->add<TaintArgsOnPostVisit>(i);
564 return State;
565 }
566
Craig Topper0dbb7832014-05-27 02:45:47 +0000567 return nullptr;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000568}
569
Anna Zaks3b754b22012-01-20 00:11:19 +0000570
571// If argument 0(protocol domain) is network, the return value should get taint.
Ted Kremenek49b1e382012-01-26 21:29:00 +0000572ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
Anna Zaksb508d292012-04-10 23:41:11 +0000573 CheckerContext &C) const {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000574 ProgramStateRef State = C.getState();
Anna Zaksb508d292012-04-10 23:41:11 +0000575 if (CE->getNumArgs() < 3)
576 return State;
Anna Zaks3b754b22012-01-20 00:11:19 +0000577
578 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
579 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
580 // White list the internal communication protocols.
581 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
582 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
583 return State;
584 State = State->addTaint(CE, C.getLocationContext());
585 return State;
586}
587
Ted Kremenek49b1e382012-01-26 21:29:00 +0000588ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
Anna Zaks3b0ab202011-12-17 00:26:34 +0000589 CheckerContext &C) const {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000590 ProgramStateRef State = C.getState();
Anna Zaksb508d292012-04-10 23:41:11 +0000591 if (CE->getNumArgs() < 2)
592 return State;
593
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000594 // All arguments except for the very first one should get taint.
595 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
596 // The arguments are pointer arguments. The data they are pointing at is
597 // tainted after the call.
598 const Expr* Arg = CE->getArg(i);
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000599 SymbolRef Sym = getPointedToSymbol(C, Arg);
Anna Zakseefc0e92011-12-14 00:56:02 +0000600 if (Sym)
601 State = State->addTaint(Sym);
602 }
Anna Zaks3b0ab202011-12-17 00:26:34 +0000603 return State;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000604}
605
Ted Kremenek49b1e382012-01-26 21:29:00 +0000606ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
Anna Zaksb508d292012-04-10 23:41:11 +0000607 CheckerContext &C) const {
Ted Kremenek632e3b72012-01-06 22:09:28 +0000608 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000609}
610
Anna Zaksbf740512012-01-24 19:32:25 +0000611bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000612 ProgramStateRef State = C.getState();
Ted Kremenek632e3b72012-01-06 22:09:28 +0000613 SVal Val = State->getSVal(E, C.getLocationContext());
Anna Zaks099fe3f2011-12-14 00:56:18 +0000614
Anna Zakse48ee502011-12-16 18:28:50 +0000615 // stdin is a pointer, so it would be a region.
616 const MemRegion *MemReg = Val.getAsRegion();
617
618 // The region should be symbolic, we do not know it's value.
619 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
620 if (!SymReg)
Anna Zaks099fe3f2011-12-14 00:56:18 +0000621 return false;
622
Anna Zakse48ee502011-12-16 18:28:50 +0000623 // Get it's symbol and find the declaration region it's pointing to.
624 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
625 if (!Sm)
626 return false;
627 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
628 if (!DeclReg)
629 return false;
Anna Zaks099fe3f2011-12-14 00:56:18 +0000630
Anna Zakse48ee502011-12-16 18:28:50 +0000631 // This region corresponds to a declaration, find out if it's a global/extern
632 // variable named stdin with the proper type.
633 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
634 D = D->getCanonicalDecl();
635 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
636 if (const PointerType * PtrTy =
637 dyn_cast<PointerType>(D->getType().getTypePtr()))
638 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
639 return true;
640 }
Anna Zaks099fe3f2011-12-14 00:56:18 +0000641 return false;
642}
643
Anna Zaks126a2ef2012-01-07 02:33:10 +0000644static bool getPrintfFormatArgumentNum(const CallExpr *CE,
645 const CheckerContext &C,
646 unsigned int &ArgNum) {
647 // Find if the function contains a format string argument.
648 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
649 // vsnprintf, syslog, custom annotated functions.
650 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
651 if (!FDecl)
652 return false;
Aaron Ballmanbe22bcb2014-03-10 17:08:28 +0000653 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
Anna Zaks126a2ef2012-01-07 02:33:10 +0000654 ArgNum = Format->getFormatIdx() - 1;
Aaron Ballmanf58070b2013-09-03 21:02:22 +0000655 if ((Format->getType()->getName() == "printf") &&
656 CE->getNumArgs() > ArgNum)
Anna Zaks126a2ef2012-01-07 02:33:10 +0000657 return true;
658 }
659
660 // Or if a function is named setproctitle (this is a heuristic).
661 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
662 ArgNum = 0;
663 return true;
664 }
665
666 return false;
667}
668
Anna Zaks0244cd72012-01-14 02:48:40 +0000669bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
670 const char Msg[],
671 CheckerContext &C) const {
672 assert(E);
673
674 // Check for taint.
Ted Kremenek49b1e382012-01-26 21:29:00 +0000675 ProgramStateRef State = C.getState();
Anna Zaksd4e43ae2017-03-09 00:01:07 +0000676 const SymbolRef PointedToSym = getPointedToSymbol(C, E);
677 SVal TaintedSVal;
678 if (State->isTainted(PointedToSym))
679 TaintedSVal = nonloc::SymbolVal(PointedToSym);
680 else if (State->isTainted(E, C.getLocationContext()))
681 TaintedSVal = C.getSVal(E);
682 else
Anna Zaks0244cd72012-01-14 02:48:40 +0000683 return false;
684
685 // Generate diagnostic.
Devin Coughline39bd402015-09-16 22:03:05 +0000686 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
Anna Zaks0244cd72012-01-14 02:48:40 +0000687 initBugType();
Aaron Ballman8d3a7a52015-06-23 13:15:32 +0000688 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
Anna Zaks0244cd72012-01-14 02:48:40 +0000689 report->addRange(E->getSourceRange());
Anna Zaksd4e43ae2017-03-09 00:01:07 +0000690 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
Aaron Ballman8d3a7a52015-06-23 13:15:32 +0000691 C.emitReport(std::move(report));
Anna Zaks0244cd72012-01-14 02:48:40 +0000692 return true;
693 }
694 return false;
695}
696
Anna Zaks126a2ef2012-01-07 02:33:10 +0000697bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
698 CheckerContext &C) const{
699 // Check if the function contains a format string argument.
700 unsigned int ArgNum = 0;
701 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
702 return false;
703
704 // If either the format string content or the pointer itself are tainted, warn.
Alexander Kornienko9c104902015-12-28 13:06:58 +0000705 return generateReportIfTainted(CE->getArg(ArgNum),
706 MsgUncontrolledFormatString, C);
Anna Zaks0244cd72012-01-14 02:48:40 +0000707}
708
709bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
710 StringRef Name,
711 CheckerContext &C) const {
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000712 // TODO: It might make sense to run this check on demand. In some cases,
713 // we should check if the environment has been cleansed here. We also might
Anna Zaksbf740512012-01-24 19:32:25 +0000714 // need to know if the user was reset before these calls(seteuid).
Anna Zaks0244cd72012-01-14 02:48:40 +0000715 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
716 .Case("system", 0)
717 .Case("popen", 0)
Anna Zaks3b754b22012-01-20 00:11:19 +0000718 .Case("execl", 0)
719 .Case("execle", 0)
720 .Case("execlp", 0)
721 .Case("execv", 0)
722 .Case("execvp", 0)
723 .Case("execvP", 0)
Anna Zaksbf740512012-01-24 19:32:25 +0000724 .Case("execve", 0)
725 .Case("dlopen", 0)
Anna Zaks0244cd72012-01-14 02:48:40 +0000726 .Default(UINT_MAX);
727
Anna Zaksb508d292012-04-10 23:41:11 +0000728 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
Anna Zaks0244cd72012-01-14 02:48:40 +0000729 return false;
730
Alexander Kornienko9c104902015-12-28 13:06:58 +0000731 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
Anna Zaks126a2ef2012-01-07 02:33:10 +0000732}
733
Anna Zaks560dbe92012-01-18 02:45:11 +0000734// TODO: Should this check be a part of the CString checker?
735// If yes, should taint be a global setting?
736bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
737 const FunctionDecl *FDecl,
738 CheckerContext &C) const {
739 // If the function has a buffer size argument, set ArgNum.
740 unsigned ArgNum = InvalidArgIndex;
741 unsigned BId = 0;
742 if ( (BId = FDecl->getMemoryFunctionKind()) )
743 switch(BId) {
744 case Builtin::BImemcpy:
745 case Builtin::BImemmove:
746 case Builtin::BIstrncpy:
747 ArgNum = 2;
748 break;
749 case Builtin::BIstrndup:
750 ArgNum = 1;
751 break;
752 default:
753 break;
754 };
755
756 if (ArgNum == InvalidArgIndex) {
757 if (C.isCLibraryFunction(FDecl, "malloc") ||
758 C.isCLibraryFunction(FDecl, "calloc") ||
759 C.isCLibraryFunction(FDecl, "alloca"))
760 ArgNum = 0;
761 else if (C.isCLibraryFunction(FDecl, "memccpy"))
762 ArgNum = 3;
763 else if (C.isCLibraryFunction(FDecl, "realloc"))
764 ArgNum = 1;
765 else if (C.isCLibraryFunction(FDecl, "bcopy"))
766 ArgNum = 2;
767 }
768
Alexander Kornienko9c104902015-12-28 13:06:58 +0000769 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
770 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
Anna Zaks560dbe92012-01-18 02:45:11 +0000771}
772
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000773void ento::registerGenericTaintChecker(CheckerManager &mgr) {
774 mgr.registerChecker<GenericTaintChecker>();
775}