blob: bf321f01b3fddb70d65b73ee4b059b4857de9cba [file] [log] [blame]
Anna Zaks5c5bf9b2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
Chandler Carruth3a022472012-12-04 09:13:33 +000018#include "clang/AST/Attr.h"
19#include "clang/Basic/Builtins.h"
20#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000021#include "clang/StaticAnalyzer/Core/Checker.h"
22#include "clang/StaticAnalyzer/Core/CheckerManager.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks3b0ab202011-12-17 00:26:34 +000024#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksb3fa8d72012-01-12 02:22:34 +000025#include <climits>
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000026
27using namespace clang;
28using namespace ento;
29
30namespace {
Anna Zaks099fe3f2011-12-14 00:56:18 +000031class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
Anna Zaks3b0ab202011-12-17 00:26:34 +000032 check::PreStmt<CallExpr> > {
33public:
Anna Zaks0244cd72012-01-14 02:48:40 +000034 static void *getTag() { static int Tag; return &Tag; }
35
36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks0244cd72012-01-14 02:48:40 +000037
38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000039
Anna Zaks3b0ab202011-12-17 00:26:34 +000040private:
Anna Zaksbf740512012-01-24 19:32:25 +000041 static const unsigned InvalidArgIndex = UINT_MAX;
42 /// Denotes the return vale.
43 static const unsigned ReturnValueIndex = UINT_MAX - 1;
Anna Zaks0244cd72012-01-14 02:48:40 +000044
Ahmed Charlesb8984322014-03-07 20:03:18 +000045 mutable std::unique_ptr<BugType> BT;
Anna Zaks5d324e52012-01-18 02:45:07 +000046 inline void initBugType() const {
47 if (!BT)
Alexander Kornienko4aca9b12014-02-11 21:49:21 +000048 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
Anna Zaks5d324e52012-01-18 02:45:07 +000049 }
Anna Zaks457c6872011-11-18 02:26:36 +000050
Anna Zaksb3fa8d72012-01-12 02:22:34 +000051 /// \brief Catch taint related bugs. Check if tainted data is passed to a
52 /// system call etc.
Anna Zaks126a2ef2012-01-07 02:33:10 +000053 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
Anna Zaksb3fa8d72012-01-12 02:22:34 +000055 /// \brief Add taint sources on a pre-visit.
56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57
58 /// \brief Propagate taint generated at pre-visit.
59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60
61 /// \brief Add taint sources on a post visit.
62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63
Anna Zaksbf740512012-01-24 19:32:25 +000064 /// Check if the region the expression evaluates to is the standard input,
65 /// and thus, is tainted.
66 static bool isStdin(const Expr *E, CheckerContext &C);
67
Artem Dergacheveed7a312017-05-29 15:42:56 +000068 /// \brief Given a pointer argument, return the value it points to.
69 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000070
Anna Zaks3b0ab202011-12-17 00:26:34 +000071 /// Functions defining the attack surface.
Ted Kremenek49b1e382012-01-26 21:29:00 +000072 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
Anna Zaks3b0ab202011-12-17 00:26:34 +000073 CheckerContext &C) const;
Ted Kremenek49b1e382012-01-26 21:29:00 +000074 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
75 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
76 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks3b0ab202011-12-17 00:26:34 +000077
78 /// Taint the scanned input if the file is tainted.
Ted Kremenek49b1e382012-01-26 21:29:00 +000079 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000080
Anna Zaks126a2ef2012-01-07 02:33:10 +000081 /// Check for CWE-134: Uncontrolled Format String.
Anna Zaks0244cd72012-01-14 02:48:40 +000082 static const char MsgUncontrolledFormatString[];
Anna Zaks126a2ef2012-01-07 02:33:10 +000083 bool checkUncontrolledFormatString(const CallExpr *CE,
84 CheckerContext &C) const;
85
Anna Zaks0244cd72012-01-14 02:48:40 +000086 /// Check for:
87 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
88 /// CWE-78, "Failure to Sanitize Data into an OS Command"
89 static const char MsgSanitizeSystemArgs[];
90 bool checkSystemCall(const CallExpr *CE, StringRef Name,
91 CheckerContext &C) const;
Anna Zaks3b0ab202011-12-17 00:26:34 +000092
Anna Zaks560dbe92012-01-18 02:45:11 +000093 /// Check if tainted data is used as a buffer size ins strn.. functions,
94 /// and allocators.
95 static const char MsgTaintedBufferSize[];
96 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
97 CheckerContext &C) const;
98
Anna Zaks0244cd72012-01-14 02:48:40 +000099 /// Generate a report if the expression is tainted or points to tainted data.
100 bool generateReportIfTainted(const Expr *E, const char Msg[],
101 CheckerContext &C) const;
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000102
Dmitri Gribenkof8579502013-01-12 19:30:44 +0000103 typedef SmallVector<unsigned, 2> ArgVector;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000104
Anna Zaks3666d2c2012-01-17 00:37:02 +0000105 /// \brief A struct used to specify taint propagation rules for a function.
106 ///
107 /// If any of the possible taint source arguments is tainted, all of the
108 /// destination arguments should also be tainted. Use InvalidArgIndex in the
109 /// src list to specify that all of the arguments can introduce taint. Use
110 /// InvalidArgIndex in the dst arguments to signify that all the non-const
111 /// pointer and reference arguments might be tainted on return. If
112 /// ReturnValueIndex is added to the dst list, the return value will be
113 /// tainted.
114 struct TaintPropagationRule {
115 /// List of arguments which can be taint sources and should be checked.
116 ArgVector SrcArgs;
117 /// List of arguments which should be tainted on function return.
118 ArgVector DstArgs;
Anna Zaks5d324e52012-01-18 02:45:07 +0000119 // TODO: Check if using other data structures would be more optimal.
Anna Zaks3666d2c2012-01-17 00:37:02 +0000120
121 TaintPropagationRule() {}
122
Anna Zaks5d324e52012-01-18 02:45:07 +0000123 TaintPropagationRule(unsigned SArg,
124 unsigned DArg, bool TaintRet = false) {
Anna Zaks3666d2c2012-01-17 00:37:02 +0000125 SrcArgs.push_back(SArg);
126 DstArgs.push_back(DArg);
Anna Zaks5d324e52012-01-18 02:45:07 +0000127 if (TaintRet)
128 DstArgs.push_back(ReturnValueIndex);
Anna Zaks3666d2c2012-01-17 00:37:02 +0000129 }
130
Anna Zaks5d324e52012-01-18 02:45:07 +0000131 TaintPropagationRule(unsigned SArg1, unsigned SArg2,
132 unsigned DArg, bool TaintRet = false) {
133 SrcArgs.push_back(SArg1);
134 SrcArgs.push_back(SArg2);
135 DstArgs.push_back(DArg);
136 if (TaintRet)
137 DstArgs.push_back(ReturnValueIndex);
138 }
139
140 /// Get the propagation rule for a given function.
141 static TaintPropagationRule
142 getTaintPropagationRule(const FunctionDecl *FDecl,
143 StringRef Name,
144 CheckerContext &C);
145
Anna Zaks3666d2c2012-01-17 00:37:02 +0000146 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
147 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
148
Anna Zaks5d324e52012-01-18 02:45:07 +0000149 inline bool isNull() const { return SrcArgs.empty(); }
150
151 inline bool isDestinationArgument(unsigned ArgNum) const {
152 return (std::find(DstArgs.begin(),
153 DstArgs.end(), ArgNum) != DstArgs.end());
154 }
Anna Zaks3666d2c2012-01-17 00:37:02 +0000155
Anna Zaksbf740512012-01-24 19:32:25 +0000156 static inline bool isTaintedOrPointsToTainted(const Expr *E,
Ted Kremenek49b1e382012-01-26 21:29:00 +0000157 ProgramStateRef State,
Anna Zaksbf740512012-01-24 19:32:25 +0000158 CheckerContext &C) {
Artem Dergacheveed7a312017-05-29 15:42:56 +0000159 if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
160 return true;
161
162 if (!E->getType().getTypePtr()->isPointerType())
163 return false;
164
165 Optional<SVal> V = getPointedToSVal(C, E);
166 return (V && State->isTainted(*V));
Anna Zaksbf740512012-01-24 19:32:25 +0000167 }
168
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000169 /// \brief Pre-process a function which propagates taint according to the
170 /// taint rule.
Ted Kremenek49b1e382012-01-26 21:29:00 +0000171 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000172
173 };
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000174};
Anna Zaks5d324e52012-01-18 02:45:07 +0000175
176const unsigned GenericTaintChecker::ReturnValueIndex;
177const unsigned GenericTaintChecker::InvalidArgIndex;
178
Anna Zaks0244cd72012-01-14 02:48:40 +0000179const char GenericTaintChecker::MsgUncontrolledFormatString[] =
Anna Zaks3705a1e2012-02-22 02:35:58 +0000180 "Untrusted data is used as a format string "
181 "(CWE-134: Uncontrolled Format String)";
Anna Zaks0244cd72012-01-14 02:48:40 +0000182
183const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
Anna Zaks3705a1e2012-02-22 02:35:58 +0000184 "Untrusted data is passed to a system call "
Anna Zaks0244cd72012-01-14 02:48:40 +0000185 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
Anna Zaks560dbe92012-01-18 02:45:11 +0000186
187const char GenericTaintChecker::MsgTaintedBufferSize[] =
Anna Zaks3705a1e2012-02-22 02:35:58 +0000188 "Untrusted data is used to specify the buffer size "
Anna Zaks560dbe92012-01-18 02:45:11 +0000189 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
190 "character data and the null terminator)";
191
192} // end of anonymous namespace
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000193
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000194/// A set which is used to pass information from call pre-visit instruction
195/// to the call post-visit. The values are unsigned integers, which are either
196/// ReturnValueIndex, or indexes of the pointer/reference argument, which
197/// points to data, which should be tainted on return.
Jordan Rose0c153cb2012-11-02 01:54:06 +0000198REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
Anna Zaks3b0ab202011-12-17 00:26:34 +0000199
Anna Zaks5d324e52012-01-18 02:45:07 +0000200GenericTaintChecker::TaintPropagationRule
201GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
202 const FunctionDecl *FDecl,
203 StringRef Name,
204 CheckerContext &C) {
Enrico Pertoso4432d872015-06-03 09:10:58 +0000205 // TODO: Currently, we might lose precision here: we always mark a return
Anna Zaksbf740512012-01-24 19:32:25 +0000206 // value as tainted even if it's just a pointer, pointing to tainted data.
207
Anna Zaks5d324e52012-01-18 02:45:07 +0000208 // Check for exact name match for functions without builtin substitutes.
209 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
210 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
211 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
212 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaksbf740512012-01-24 19:32:25 +0000213 .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
214 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
215 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
216 .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
217 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
218 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
219 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
220 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaks3b754b22012-01-20 00:11:19 +0000221 .Case("read", TaintPropagationRule(0, 2, 1, true))
222 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
Anna Zaksbf740512012-01-24 19:32:25 +0000223 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
224 .Case("fgets", TaintPropagationRule(2, 0, true))
225 .Case("getline", TaintPropagationRule(2, 0))
226 .Case("getdelim", TaintPropagationRule(3, 0))
227 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
Anna Zaks5d324e52012-01-18 02:45:07 +0000228 .Default(TaintPropagationRule());
229
230 if (!Rule.isNull())
231 return Rule;
232
233 // Check if it's one of the memory setting/copying functions.
234 // This check is specialized but faster then calling isCLibraryFunction.
235 unsigned BId = 0;
236 if ( (BId = FDecl->getMemoryFunctionKind()) )
237 switch(BId) {
238 case Builtin::BImemcpy:
239 case Builtin::BImemmove:
240 case Builtin::BIstrncpy:
241 case Builtin::BIstrncat:
242 return TaintPropagationRule(1, 2, 0, true);
Anna Zaks5d324e52012-01-18 02:45:07 +0000243 case Builtin::BIstrlcpy:
244 case Builtin::BIstrlcat:
245 return TaintPropagationRule(1, 2, 0, false);
Anna Zaks5d324e52012-01-18 02:45:07 +0000246 case Builtin::BIstrndup:
247 return TaintPropagationRule(0, 1, ReturnValueIndex);
Anna Zaks5d324e52012-01-18 02:45:07 +0000248
249 default:
250 break;
251 };
252
253 // Process all other functions which could be defined as builtins.
254 if (Rule.isNull()) {
255 if (C.isCLibraryFunction(FDecl, "snprintf") ||
256 C.isCLibraryFunction(FDecl, "sprintf"))
257 return TaintPropagationRule(InvalidArgIndex, 0, true);
258 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
259 C.isCLibraryFunction(FDecl, "stpcpy") ||
260 C.isCLibraryFunction(FDecl, "strcat"))
261 return TaintPropagationRule(1, 0, true);
262 else if (C.isCLibraryFunction(FDecl, "bcopy"))
263 return TaintPropagationRule(0, 2, 1, false);
264 else if (C.isCLibraryFunction(FDecl, "strdup") ||
265 C.isCLibraryFunction(FDecl, "strdupa"))
266 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks560dbe92012-01-18 02:45:11 +0000267 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
268 return TaintPropagationRule(0, ReturnValueIndex);
Anna Zaks5d324e52012-01-18 02:45:07 +0000269 }
270
271 // Skipping the following functions, since they might be used for cleansing
272 // or smart memory copy:
Benjamin Kramer474261a2012-06-02 10:20:41 +0000273 // - memccpy - copying until hitting a special character.
Anna Zaks5d324e52012-01-18 02:45:07 +0000274
275 return TaintPropagationRule();
Anna Zaks457c6872011-11-18 02:26:36 +0000276}
277
Anna Zaks3b0ab202011-12-17 00:26:34 +0000278void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
279 CheckerContext &C) const {
Anna Zaks126a2ef2012-01-07 02:33:10 +0000280 // Check for errors first.
281 if (checkPre(CE, C))
282 return;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000283
Anna Zaks126a2ef2012-01-07 02:33:10 +0000284 // Add taint second.
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000285 addSourcesPre(CE, C);
Anna Zaks126a2ef2012-01-07 02:33:10 +0000286}
287
288void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
289 CheckerContext &C) const {
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000290 if (propagateFromPre(CE, C))
291 return;
292 addSourcesPost(CE, C);
Anna Zaks126a2ef2012-01-07 02:33:10 +0000293}
294
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000295void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
296 CheckerContext &C) const {
Craig Topper0dbb7832014-05-27 02:45:47 +0000297 ProgramStateRef State = nullptr;
Anna Zaks5d324e52012-01-18 02:45:07 +0000298 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
Jordan Rose6cd16c52012-07-10 23:13:01 +0000299 if (!FDecl || FDecl->getKind() != Decl::Function)
300 return;
301
Anna Zaks5d324e52012-01-18 02:45:07 +0000302 StringRef Name = C.getCalleeName(FDecl);
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000303 if (Name.empty())
304 return;
Anna Zaks3666d2c2012-01-17 00:37:02 +0000305
Anna Zaks5d324e52012-01-18 02:45:07 +0000306 // First, try generating a propagation rule for this function.
307 TaintPropagationRule Rule =
308 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
Anna Zaks3666d2c2012-01-17 00:37:02 +0000309 if (!Rule.isNull()) {
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000310 State = Rule.process(CE, C);
Anna Zaks3666d2c2012-01-17 00:37:02 +0000311 if (!State)
312 return;
313 C.addTransition(State);
Anna Zaks5d324e52012-01-18 02:45:07 +0000314 return;
Anna Zaks3666d2c2012-01-17 00:37:02 +0000315 }
316
Anna Zaks5d324e52012-01-18 02:45:07 +0000317 // Otherwise, check if we have custom pre-processing implemented.
Anna Zaks3b0ab202011-12-17 00:26:34 +0000318 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000319 .Case("fscanf", &GenericTaintChecker::preFscanf)
Craig Topper0dbb7832014-05-27 02:45:47 +0000320 .Default(nullptr);
Anna Zaks3b0ab202011-12-17 00:26:34 +0000321 // Check and evaluate the call.
322 if (evalFunction)
323 State = (this->*evalFunction)(CE, C);
324 if (!State)
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000325 return;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000326 C.addTransition(State);
Anna Zaks5d324e52012-01-18 02:45:07 +0000327
Anna Zaks3b0ab202011-12-17 00:26:34 +0000328}
329
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000330bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
331 CheckerContext &C) const {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000332 ProgramStateRef State = C.getState();
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000333
334 // Depending on what was tainted at pre-visit, we determined a set of
335 // arguments which should be tainted after the function returns. These are
336 // stored in the state as TaintArgsOnPostVisit set.
Jordan Rose0c153cb2012-11-02 01:54:06 +0000337 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
Anna Zaksbf740512012-01-24 19:32:25 +0000338 if (TaintArgs.isEmpty())
339 return false;
340
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000341 for (llvm::ImmutableSet<unsigned>::iterator
342 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
343 unsigned ArgNum = *I;
344
345 // Special handling for the tainted return value.
346 if (ArgNum == ReturnValueIndex) {
347 State = State->addTaint(CE, C.getLocationContext());
348 continue;
349 }
350
351 // The arguments are pointer arguments. The data they are pointing at is
352 // tainted after the call.
Anna Zaksb508d292012-04-10 23:41:11 +0000353 if (CE->getNumArgs() < (ArgNum + 1))
354 return false;
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000355 const Expr* Arg = CE->getArg(ArgNum);
Artem Dergacheveed7a312017-05-29 15:42:56 +0000356 Optional<SVal> V = getPointedToSVal(C, Arg);
357 if (V)
358 State = State->addTaint(*V);
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000359 }
360
361 // Clear up the taint info from the state.
362 State = State->remove<TaintArgsOnPostVisit>();
363
364 if (State != C.getState()) {
365 C.addTransition(State);
366 return true;
367 }
368 return false;
369}
370
371void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
372 CheckerContext &C) const {
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000373 // Define the attack surface.
374 // Set the evaluation function by switching on the callee name.
Jordan Rose6cd16c52012-07-10 23:13:01 +0000375 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
376 if (!FDecl || FDecl->getKind() != Decl::Function)
377 return;
378
379 StringRef Name = C.getCalleeName(FDecl);
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000380 if (Name.empty())
381 return;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000382 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
Anna Zaks3b0ab202011-12-17 00:26:34 +0000383 .Case("scanf", &GenericTaintChecker::postScanf)
Anna Zakseefc0e92011-12-14 00:56:02 +0000384 // TODO: Add support for vfscanf & family.
Anna Zaks3b0ab202011-12-17 00:26:34 +0000385 .Case("getchar", &GenericTaintChecker::postRetTaint)
Anna Zaksbf740512012-01-24 19:32:25 +0000386 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
Anna Zaks3b0ab202011-12-17 00:26:34 +0000387 .Case("getenv", &GenericTaintChecker::postRetTaint)
388 .Case("fopen", &GenericTaintChecker::postRetTaint)
389 .Case("fdopen", &GenericTaintChecker::postRetTaint)
390 .Case("freopen", &GenericTaintChecker::postRetTaint)
Anna Zaksbf740512012-01-24 19:32:25 +0000391 .Case("getch", &GenericTaintChecker::postRetTaint)
392 .Case("wgetch", &GenericTaintChecker::postRetTaint)
Anna Zaks3b754b22012-01-20 00:11:19 +0000393 .Case("socket", &GenericTaintChecker::postSocket)
Craig Topper0dbb7832014-05-27 02:45:47 +0000394 .Default(nullptr);
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000395
396 // If the callee isn't defined, it is not of security concern.
397 // Check and evaluate the call.
Craig Topper0dbb7832014-05-27 02:45:47 +0000398 ProgramStateRef State = nullptr;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000399 if (evalFunction)
Anna Zaks3b0ab202011-12-17 00:26:34 +0000400 State = (this->*evalFunction)(CE, C);
401 if (!State)
402 return;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000403
Anna Zaks3b0ab202011-12-17 00:26:34 +0000404 C.addTransition(State);
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000405}
Anna Zaks457c6872011-11-18 02:26:36 +0000406
Anna Zaks126a2ef2012-01-07 02:33:10 +0000407bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
408
409 if (checkUncontrolledFormatString(CE, C))
410 return true;
411
Anna Zaks560dbe92012-01-18 02:45:11 +0000412 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
Jordan Rose6cd16c52012-07-10 23:13:01 +0000413 if (!FDecl || FDecl->getKind() != Decl::Function)
414 return false;
415
Anna Zaks560dbe92012-01-18 02:45:11 +0000416 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks0244cd72012-01-14 02:48:40 +0000417 if (Name.empty())
418 return false;
419
420 if (checkSystemCall(CE, Name, C))
421 return true;
422
Anna Zaks560dbe92012-01-18 02:45:11 +0000423 if (checkTaintedBufferSize(CE, FDecl, C))
424 return true;
425
Anna Zaks126a2ef2012-01-07 02:33:10 +0000426 return false;
427}
428
Artem Dergacheveed7a312017-05-29 15:42:56 +0000429Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
Artem Dergachev3ef5deb2017-12-12 02:27:55 +0000430 const Expr *Arg) {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000431 ProgramStateRef State = C.getState();
George Karpenkovd703ec92018-01-17 20:27:29 +0000432 SVal AddrVal = C.getSVal(Arg->IgnoreParens());
Anna Zakse48ee502011-12-16 18:28:50 +0000433 if (AddrVal.isUnknownOrUndef())
Artem Dergacheveed7a312017-05-29 15:42:56 +0000434 return None;
Anna Zaks7c96b7d2011-12-11 18:43:40 +0000435
David Blaikie05785d12013-02-20 22:23:23 +0000436 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000437 if (!AddrLoc)
Artem Dergacheveed7a312017-05-29 15:42:56 +0000438 return None;
Anna Zaks457c6872011-11-18 02:26:36 +0000439
Artem Dergachev3ef5deb2017-12-12 02:27:55 +0000440 QualType ArgTy = Arg->getType().getCanonicalType();
441 if (!ArgTy->isPointerType())
442 return None;
443
444 QualType ValTy = ArgTy->getPointeeType();
445
446 // Do not dereference void pointers. Treat them as byte pointers instead.
447 // FIXME: we might want to consider more than just the first byte.
448 if (ValTy->isVoidType())
449 ValTy = C.getASTContext().CharTy;
450
451 return State->getSVal(*AddrLoc, ValTy);
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000452}
453
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000454ProgramStateRef
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000455GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
456 CheckerContext &C) const {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000457 ProgramStateRef State = C.getState();
Anna Zaks3666d2c2012-01-17 00:37:02 +0000458
459 // Check for taint in arguments.
460 bool IsTainted = false;
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000461 for (ArgVector::const_iterator I = SrcArgs.begin(),
462 E = SrcArgs.end(); I != E; ++I) {
Anna Zaks3666d2c2012-01-17 00:37:02 +0000463 unsigned ArgNum = *I;
464
465 if (ArgNum == InvalidArgIndex) {
Anna Zaks5d324e52012-01-18 02:45:07 +0000466 // Check if any of the arguments is tainted, but skip the
467 // destination arguments.
468 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000469 if (isDestinationArgument(i))
Anna Zaks5d324e52012-01-18 02:45:07 +0000470 continue;
Anna Zaksbf740512012-01-24 19:32:25 +0000471 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
Anna Zaks3666d2c2012-01-17 00:37:02 +0000472 break;
Anna Zaks5d324e52012-01-18 02:45:07 +0000473 }
Anna Zaks3666d2c2012-01-17 00:37:02 +0000474 break;
475 }
476
Anna Zaksb508d292012-04-10 23:41:11 +0000477 if (CE->getNumArgs() < (ArgNum + 1))
478 return State;
Anna Zaksbf740512012-01-24 19:32:25 +0000479 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
Anna Zaks3666d2c2012-01-17 00:37:02 +0000480 break;
481 }
482 if (!IsTainted)
483 return State;
484
485 // Mark the arguments which should be tainted after the function returns.
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000486 for (ArgVector::const_iterator I = DstArgs.begin(),
487 E = DstArgs.end(); I != E; ++I) {
Anna Zaks3666d2c2012-01-17 00:37:02 +0000488 unsigned ArgNum = *I;
489
490 // Should we mark all arguments as tainted?
491 if (ArgNum == InvalidArgIndex) {
492 // For all pointer and references that were passed in:
493 // If they are not pointing to const data, mark data as tainted.
494 // TODO: So far we are just going one level down; ideally we'd need to
495 // recurse here.
496 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
497 const Expr *Arg = CE->getArg(i);
498 // Process pointer argument.
499 const Type *ArgTy = Arg->getType().getTypePtr();
500 QualType PType = ArgTy->getPointeeType();
501 if ((!PType.isNull() && !PType.isConstQualified())
502 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
503 State = State->add<TaintArgsOnPostVisit>(i);
504 }
505 continue;
506 }
507
508 // Should mark the return value?
509 if (ArgNum == ReturnValueIndex) {
510 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
511 continue;
512 }
513
514 // Mark the given argument.
515 assert(ArgNum < CE->getNumArgs());
516 State = State->add<TaintArgsOnPostVisit>(ArgNum);
517 }
518
519 return State;
520}
521
522
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000523// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
524// and arg 1 should get taint.
Ted Kremenek49b1e382012-01-26 21:29:00 +0000525ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
Anna Zaks3b0ab202011-12-17 00:26:34 +0000526 CheckerContext &C) const {
527 assert(CE->getNumArgs() >= 2);
Ted Kremenek49b1e382012-01-26 21:29:00 +0000528 ProgramStateRef State = C.getState();
Anna Zaks3b0ab202011-12-17 00:26:34 +0000529
530 // Check is the file descriptor is tainted.
Ted Kremenek632e3b72012-01-06 22:09:28 +0000531 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000532 isStdin(CE->getArg(0), C)) {
533 // All arguments except for the first two should get taint.
534 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
535 State = State->add<TaintArgsOnPostVisit>(i);
536 return State;
537 }
538
Craig Topper0dbb7832014-05-27 02:45:47 +0000539 return nullptr;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000540}
541
Anna Zaks3b754b22012-01-20 00:11:19 +0000542
543// If argument 0(protocol domain) is network, the return value should get taint.
Ted Kremenek49b1e382012-01-26 21:29:00 +0000544ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
Anna Zaksb508d292012-04-10 23:41:11 +0000545 CheckerContext &C) const {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000546 ProgramStateRef State = C.getState();
Anna Zaksb508d292012-04-10 23:41:11 +0000547 if (CE->getNumArgs() < 3)
548 return State;
Anna Zaks3b754b22012-01-20 00:11:19 +0000549
550 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
551 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
552 // White list the internal communication protocols.
553 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
554 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
555 return State;
556 State = State->addTaint(CE, C.getLocationContext());
557 return State;
558}
559
Ted Kremenek49b1e382012-01-26 21:29:00 +0000560ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
Anna Zaks3b0ab202011-12-17 00:26:34 +0000561 CheckerContext &C) const {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000562 ProgramStateRef State = C.getState();
Anna Zaksb508d292012-04-10 23:41:11 +0000563 if (CE->getNumArgs() < 2)
564 return State;
565
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000566 // All arguments except for the very first one should get taint.
567 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
568 // The arguments are pointer arguments. The data they are pointing at is
569 // tainted after the call.
570 const Expr* Arg = CE->getArg(i);
Artem Dergacheveed7a312017-05-29 15:42:56 +0000571 Optional<SVal> V = getPointedToSVal(C, Arg);
572 if (V)
573 State = State->addTaint(*V);
Anna Zakseefc0e92011-12-14 00:56:02 +0000574 }
Anna Zaks3b0ab202011-12-17 00:26:34 +0000575 return State;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000576}
577
Ted Kremenek49b1e382012-01-26 21:29:00 +0000578ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
Anna Zaksb508d292012-04-10 23:41:11 +0000579 CheckerContext &C) const {
Ted Kremenek632e3b72012-01-06 22:09:28 +0000580 return C.getState()->addTaint(CE, C.getLocationContext());
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000581}
582
Anna Zaksbf740512012-01-24 19:32:25 +0000583bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000584 ProgramStateRef State = C.getState();
George Karpenkovd703ec92018-01-17 20:27:29 +0000585 SVal Val = C.getSVal(E);
Anna Zaks099fe3f2011-12-14 00:56:18 +0000586
Anna Zakse48ee502011-12-16 18:28:50 +0000587 // stdin is a pointer, so it would be a region.
588 const MemRegion *MemReg = Val.getAsRegion();
589
590 // The region should be symbolic, we do not know it's value.
591 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
592 if (!SymReg)
Anna Zaks099fe3f2011-12-14 00:56:18 +0000593 return false;
594
Anna Zakse48ee502011-12-16 18:28:50 +0000595 // Get it's symbol and find the declaration region it's pointing to.
596 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
597 if (!Sm)
598 return false;
599 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
600 if (!DeclReg)
601 return false;
Anna Zaks099fe3f2011-12-14 00:56:18 +0000602
Anna Zakse48ee502011-12-16 18:28:50 +0000603 // This region corresponds to a declaration, find out if it's a global/extern
604 // variable named stdin with the proper type.
605 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
606 D = D->getCanonicalDecl();
607 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
608 if (const PointerType * PtrTy =
609 dyn_cast<PointerType>(D->getType().getTypePtr()))
Henry Wongcb2ad242018-03-05 15:41:15 +0000610 if (PtrTy->getPointeeType().getCanonicalType() ==
611 C.getASTContext().getFILEType().getCanonicalType())
Anna Zakse48ee502011-12-16 18:28:50 +0000612 return true;
613 }
Anna Zaks099fe3f2011-12-14 00:56:18 +0000614 return false;
615}
616
Anna Zaks126a2ef2012-01-07 02:33:10 +0000617static bool getPrintfFormatArgumentNum(const CallExpr *CE,
618 const CheckerContext &C,
619 unsigned int &ArgNum) {
620 // Find if the function contains a format string argument.
621 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
622 // vsnprintf, syslog, custom annotated functions.
623 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
624 if (!FDecl)
625 return false;
Aaron Ballmanbe22bcb2014-03-10 17:08:28 +0000626 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
Anna Zaks126a2ef2012-01-07 02:33:10 +0000627 ArgNum = Format->getFormatIdx() - 1;
Aaron Ballmanf58070b2013-09-03 21:02:22 +0000628 if ((Format->getType()->getName() == "printf") &&
629 CE->getNumArgs() > ArgNum)
Anna Zaks126a2ef2012-01-07 02:33:10 +0000630 return true;
631 }
632
633 // Or if a function is named setproctitle (this is a heuristic).
634 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
635 ArgNum = 0;
636 return true;
637 }
638
639 return false;
640}
641
Anna Zaks0244cd72012-01-14 02:48:40 +0000642bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
643 const char Msg[],
644 CheckerContext &C) const {
645 assert(E);
646
647 // Check for taint.
Ted Kremenek49b1e382012-01-26 21:29:00 +0000648 ProgramStateRef State = C.getState();
Artem Dergacheveed7a312017-05-29 15:42:56 +0000649 Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
Anna Zaksd4e43ae2017-03-09 00:01:07 +0000650 SVal TaintedSVal;
Artem Dergacheveed7a312017-05-29 15:42:56 +0000651 if (PointedToSVal && State->isTainted(*PointedToSVal))
652 TaintedSVal = *PointedToSVal;
Anna Zaksd4e43ae2017-03-09 00:01:07 +0000653 else if (State->isTainted(E, C.getLocationContext()))
654 TaintedSVal = C.getSVal(E);
655 else
Anna Zaks0244cd72012-01-14 02:48:40 +0000656 return false;
657
658 // Generate diagnostic.
Devin Coughline39bd402015-09-16 22:03:05 +0000659 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
Anna Zaks0244cd72012-01-14 02:48:40 +0000660 initBugType();
Aaron Ballman8d3a7a52015-06-23 13:15:32 +0000661 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
Anna Zaks0244cd72012-01-14 02:48:40 +0000662 report->addRange(E->getSourceRange());
Anna Zaksd4e43ae2017-03-09 00:01:07 +0000663 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
Aaron Ballman8d3a7a52015-06-23 13:15:32 +0000664 C.emitReport(std::move(report));
Anna Zaks0244cd72012-01-14 02:48:40 +0000665 return true;
666 }
667 return false;
668}
669
Anna Zaks126a2ef2012-01-07 02:33:10 +0000670bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
671 CheckerContext &C) const{
672 // Check if the function contains a format string argument.
673 unsigned int ArgNum = 0;
674 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
675 return false;
676
677 // If either the format string content or the pointer itself are tainted, warn.
Alexander Kornienko9c104902015-12-28 13:06:58 +0000678 return generateReportIfTainted(CE->getArg(ArgNum),
679 MsgUncontrolledFormatString, C);
Anna Zaks0244cd72012-01-14 02:48:40 +0000680}
681
682bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
683 StringRef Name,
684 CheckerContext &C) const {
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000685 // TODO: It might make sense to run this check on demand. In some cases,
686 // we should check if the environment has been cleansed here. We also might
Anna Zaksbf740512012-01-24 19:32:25 +0000687 // need to know if the user was reset before these calls(seteuid).
Anna Zaks0244cd72012-01-14 02:48:40 +0000688 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
689 .Case("system", 0)
690 .Case("popen", 0)
Anna Zaks3b754b22012-01-20 00:11:19 +0000691 .Case("execl", 0)
692 .Case("execle", 0)
693 .Case("execlp", 0)
694 .Case("execv", 0)
695 .Case("execvp", 0)
696 .Case("execvP", 0)
Anna Zaksbf740512012-01-24 19:32:25 +0000697 .Case("execve", 0)
698 .Case("dlopen", 0)
Anna Zaks0244cd72012-01-14 02:48:40 +0000699 .Default(UINT_MAX);
700
Anna Zaksb508d292012-04-10 23:41:11 +0000701 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
Anna Zaks0244cd72012-01-14 02:48:40 +0000702 return false;
703
Alexander Kornienko9c104902015-12-28 13:06:58 +0000704 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
Anna Zaks126a2ef2012-01-07 02:33:10 +0000705}
706
Anna Zaks560dbe92012-01-18 02:45:11 +0000707// TODO: Should this check be a part of the CString checker?
708// If yes, should taint be a global setting?
709bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
710 const FunctionDecl *FDecl,
711 CheckerContext &C) const {
712 // If the function has a buffer size argument, set ArgNum.
713 unsigned ArgNum = InvalidArgIndex;
714 unsigned BId = 0;
715 if ( (BId = FDecl->getMemoryFunctionKind()) )
716 switch(BId) {
717 case Builtin::BImemcpy:
718 case Builtin::BImemmove:
719 case Builtin::BIstrncpy:
720 ArgNum = 2;
721 break;
722 case Builtin::BIstrndup:
723 ArgNum = 1;
724 break;
725 default:
726 break;
727 };
728
729 if (ArgNum == InvalidArgIndex) {
730 if (C.isCLibraryFunction(FDecl, "malloc") ||
731 C.isCLibraryFunction(FDecl, "calloc") ||
732 C.isCLibraryFunction(FDecl, "alloca"))
733 ArgNum = 0;
734 else if (C.isCLibraryFunction(FDecl, "memccpy"))
735 ArgNum = 3;
736 else if (C.isCLibraryFunction(FDecl, "realloc"))
737 ArgNum = 1;
738 else if (C.isCLibraryFunction(FDecl, "bcopy"))
739 ArgNum = 2;
740 }
741
Alexander Kornienko9c104902015-12-28 13:06:58 +0000742 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
743 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
Anna Zaks560dbe92012-01-18 02:45:11 +0000744}
745
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000746void ento::registerGenericTaintChecker(CheckerManager &mgr) {
747 mgr.registerChecker<GenericTaintChecker>();
748}