blob: d3ab980332365df5fc39e06f7bd3822978ca8cbd [file] [log] [blame]
Anna Zaks5c5bf9b2011-11-16 19:58:13 +00001//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Anna Zaks5c5bf9b2011-11-16 19:58:13 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This checker defines the attack surface for generic taint propagation.
10//
11// The taint information produced by it might be useful to other checkers. For
12// example, checkers should report errors which involve tainted data more
13// aggressively, even if the involved symbols are under constrained.
14//
15//===----------------------------------------------------------------------===//
Artem Dergachev44551cf2019-03-29 22:49:30 +000016
17#include "Taint.h"
Kristof Umann76a21502018-12-15 16:23:51 +000018#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
Chandler Carruth3a022472012-12-04 09:13:33 +000019#include "clang/AST/Attr.h"
20#include "clang/Basic/Builtins.h"
21#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000022#include "clang/StaticAnalyzer/Core/Checker.h"
23#include "clang/StaticAnalyzer/Core/CheckerManager.h"
24#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
Anna Zaks3b0ab202011-12-17 00:26:34 +000025#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
Anna Zaksb3fa8d72012-01-12 02:22:34 +000026#include <climits>
Artem Dergachev2a5fb122019-01-30 00:06:43 +000027#include <initializer_list>
28#include <utility>
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000029
30using namespace clang;
31using namespace ento;
Artem Dergachev44551cf2019-03-29 22:49:30 +000032using namespace taint;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000033
34namespace {
Artem Dergachevb68cb542018-12-19 23:35:08 +000035class GenericTaintChecker
36 : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
Anna Zaks3b0ab202011-12-17 00:26:34 +000037public:
Artem Dergachevb68cb542018-12-19 23:35:08 +000038 static void *getTag() {
39 static int Tag;
40 return &Tag;
41 }
Anna Zaks0244cd72012-01-14 02:48:40 +000042
43 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks0244cd72012-01-14 02:48:40 +000044
45 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000046
Artem Dergachev44551cf2019-03-29 22:49:30 +000047 void printState(raw_ostream &Out, ProgramStateRef State,
48 const char *NL, const char *Sep) const override;
49
Anna Zaks3b0ab202011-12-17 00:26:34 +000050private:
Anna Zaksbf740512012-01-24 19:32:25 +000051 static const unsigned InvalidArgIndex = UINT_MAX;
52 /// Denotes the return vale.
53 static const unsigned ReturnValueIndex = UINT_MAX - 1;
Anna Zaks0244cd72012-01-14 02:48:40 +000054
Ahmed Charlesb8984322014-03-07 20:03:18 +000055 mutable std::unique_ptr<BugType> BT;
Artem Dergachev2a5fb122019-01-30 00:06:43 +000056 void initBugType() const {
Anna Zaks5d324e52012-01-18 02:45:07 +000057 if (!BT)
Alexander Kornienko4aca9b12014-02-11 21:49:21 +000058 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
Anna Zaks5d324e52012-01-18 02:45:07 +000059 }
Anna Zaks457c6872011-11-18 02:26:36 +000060
Adrian Prantl9fc8faf2018-05-09 01:00:01 +000061 /// Catch taint related bugs. Check if tainted data is passed to a
Anna Zaksb3fa8d72012-01-12 02:22:34 +000062 /// system call etc.
Anna Zaks126a2ef2012-01-07 02:33:10 +000063 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
64
Adrian Prantl9fc8faf2018-05-09 01:00:01 +000065 /// Add taint sources on a pre-visit.
Anna Zaksb3fa8d72012-01-12 02:22:34 +000066 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
67
Adrian Prantl9fc8faf2018-05-09 01:00:01 +000068 /// Propagate taint generated at pre-visit.
Anna Zaksb3fa8d72012-01-12 02:22:34 +000069 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
70
Anna Zaksbf740512012-01-24 19:32:25 +000071 /// Check if the region the expression evaluates to is the standard input,
72 /// and thus, is tainted.
73 static bool isStdin(const Expr *E, CheckerContext &C);
74
Adrian Prantl9fc8faf2018-05-09 01:00:01 +000075 /// Given a pointer argument, return the value it points to.
Artem Dergacheveed7a312017-05-29 15:42:56 +000076 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
Anna Zaks5c5bf9b2011-11-16 19:58:13 +000077
Anna Zaks126a2ef2012-01-07 02:33:10 +000078 /// Check for CWE-134: Uncontrolled Format String.
Anna Zaks0244cd72012-01-14 02:48:40 +000079 static const char MsgUncontrolledFormatString[];
Anna Zaks126a2ef2012-01-07 02:33:10 +000080 bool checkUncontrolledFormatString(const CallExpr *CE,
81 CheckerContext &C) const;
82
Anna Zaks0244cd72012-01-14 02:48:40 +000083 /// Check for:
84 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
85 /// CWE-78, "Failure to Sanitize Data into an OS Command"
86 static const char MsgSanitizeSystemArgs[];
87 bool checkSystemCall(const CallExpr *CE, StringRef Name,
88 CheckerContext &C) const;
Anna Zaks3b0ab202011-12-17 00:26:34 +000089
Anna Zaks560dbe92012-01-18 02:45:11 +000090 /// Check if tainted data is used as a buffer size ins strn.. functions,
91 /// and allocators.
92 static const char MsgTaintedBufferSize[];
93 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
94 CheckerContext &C) const;
95
Anna Zaks0244cd72012-01-14 02:48:40 +000096 /// Generate a report if the expression is tainted or points to tainted data.
97 bool generateReportIfTainted(const Expr *E, const char Msg[],
98 CheckerContext &C) const;
Ted Kremenek3a0678e2015-09-08 03:50:52 +000099
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000100 using ArgVector = SmallVector<unsigned, 2>;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000101
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000102 /// A struct used to specify taint propagation rules for a function.
Anna Zaks3666d2c2012-01-17 00:37:02 +0000103 ///
104 /// If any of the possible taint source arguments is tainted, all of the
105 /// destination arguments should also be tainted. Use InvalidArgIndex in the
106 /// src list to specify that all of the arguments can introduce taint. Use
107 /// InvalidArgIndex in the dst arguments to signify that all the non-const
108 /// pointer and reference arguments might be tainted on return. If
109 /// ReturnValueIndex is added to the dst list, the return value will be
110 /// tainted.
111 struct TaintPropagationRule {
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000112 enum class VariadicType { None, Src, Dst };
113
Kristof Umann28273492019-03-08 15:47:56 +0000114 using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
115 CheckerContext &C);
116
Anna Zaks3666d2c2012-01-17 00:37:02 +0000117 /// List of arguments which can be taint sources and should be checked.
118 ArgVector SrcArgs;
119 /// List of arguments which should be tainted on function return.
120 ArgVector DstArgs;
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000121 /// Index for the first variadic parameter if exist.
122 unsigned VariadicIndex;
123 /// Show when a function has variadic parameters. If it has, it marks all
124 /// of them as source or destination.
125 VariadicType VarType;
Kristof Umann28273492019-03-08 15:47:56 +0000126 /// Special function for tainted source determination. If defined, it can
127 /// override the default behavior.
128 PropagationFuncType PropagationFunc;
Anna Zaks3666d2c2012-01-17 00:37:02 +0000129
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000130 TaintPropagationRule()
Kristof Umann28273492019-03-08 15:47:56 +0000131 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
132 PropagationFunc(nullptr) {}
Anna Zaks3666d2c2012-01-17 00:37:02 +0000133
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000134 TaintPropagationRule(std::initializer_list<unsigned> &&Src,
135 std::initializer_list<unsigned> &&Dst,
136 VariadicType Var = VariadicType::None,
Kristof Umann28273492019-03-08 15:47:56 +0000137 unsigned VarIndex = InvalidArgIndex,
138 PropagationFuncType Func = nullptr)
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000139 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
Kristof Umann28273492019-03-08 15:47:56 +0000140 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
Anna Zaks5d324e52012-01-18 02:45:07 +0000141
142 /// Get the propagation rule for a given function.
143 static TaintPropagationRule
Artem Dergachevb68cb542018-12-19 23:35:08 +0000144 getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
145 CheckerContext &C);
Anna Zaks5d324e52012-01-18 02:45:07 +0000146
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000147 void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
148 void addDstArg(unsigned A) { DstArgs.push_back(A); }
Anna Zaks3666d2c2012-01-17 00:37:02 +0000149
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000150 bool isNull() const {
151 return SrcArgs.empty() && DstArgs.empty() &&
152 VariadicType::None == VarType;
Anna Zaks5d324e52012-01-18 02:45:07 +0000153 }
Anna Zaks3666d2c2012-01-17 00:37:02 +0000154
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000155 bool isDestinationArgument(unsigned ArgNum) const {
156 return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
157 }
158
159 static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
160 CheckerContext &C) {
Artem Dergachev44551cf2019-03-29 22:49:30 +0000161 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
Artem Dergacheveed7a312017-05-29 15:42:56 +0000162 return true;
163
164 if (!E->getType().getTypePtr()->isPointerType())
165 return false;
166
167 Optional<SVal> V = getPointedToSVal(C, E);
Artem Dergachev44551cf2019-03-29 22:49:30 +0000168 return (V && isTainted(State, *V));
Anna Zaksbf740512012-01-24 19:32:25 +0000169 }
170
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000171 /// Pre-process a function which propagates taint according to the
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000172 /// taint rule.
Ted Kremenek49b1e382012-01-26 21:29:00 +0000173 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
Kristof Umann28273492019-03-08 15:47:56 +0000174
175 // Functions for custom taintedness propagation.
176 static bool postSocket(bool IsTainted, const CallExpr *CE,
177 CheckerContext &C);
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000178 };
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000179};
Anna Zaks5d324e52012-01-18 02:45:07 +0000180
181const unsigned GenericTaintChecker::ReturnValueIndex;
182const unsigned GenericTaintChecker::InvalidArgIndex;
183
Anna Zaks0244cd72012-01-14 02:48:40 +0000184const char GenericTaintChecker::MsgUncontrolledFormatString[] =
Artem Dergachevb68cb542018-12-19 23:35:08 +0000185 "Untrusted data is used as a format string "
186 "(CWE-134: Uncontrolled Format String)";
Anna Zaks0244cd72012-01-14 02:48:40 +0000187
188const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
Artem Dergachevb68cb542018-12-19 23:35:08 +0000189 "Untrusted data is passed to a system call "
190 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
Anna Zaks560dbe92012-01-18 02:45:11 +0000191
192const char GenericTaintChecker::MsgTaintedBufferSize[] =
Artem Dergachevb68cb542018-12-19 23:35:08 +0000193 "Untrusted data is used to specify the buffer size "
194 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000195 "for character data and the null terminator)";
Anna Zaks560dbe92012-01-18 02:45:11 +0000196
197} // end of anonymous namespace
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000198
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000199/// A set which is used to pass information from call pre-visit instruction
200/// to the call post-visit. The values are unsigned integers, which are either
201/// ReturnValueIndex, or indexes of the pointer/reference argument, which
202/// points to data, which should be tainted on return.
Jordan Rose0c153cb2012-11-02 01:54:06 +0000203REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
Anna Zaks3b0ab202011-12-17 00:26:34 +0000204
Anna Zaks5d324e52012-01-18 02:45:07 +0000205GenericTaintChecker::TaintPropagationRule
206GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
Artem Dergachevb68cb542018-12-19 23:35:08 +0000207 const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
Enrico Pertoso4432d872015-06-03 09:10:58 +0000208 // TODO: Currently, we might lose precision here: we always mark a return
Anna Zaksbf740512012-01-24 19:32:25 +0000209 // value as tainted even if it's just a pointer, pointing to tainted data.
210
Anna Zaks5d324e52012-01-18 02:45:07 +0000211 // Check for exact name match for functions without builtin substitutes.
Artem Dergachevb68cb542018-12-19 23:35:08 +0000212 TaintPropagationRule Rule =
213 llvm::StringSwitch<TaintPropagationRule>(Name)
Kristof Umann28273492019-03-08 15:47:56 +0000214 // Source functions
215 // TODO: Add support for vfscanf & family.
216 .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
217 .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
218 .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
219 .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
220 .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
221 .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
222 .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
223 .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
224 .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
225 .Case("socket",
226 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
227 InvalidArgIndex,
228 &TaintPropagationRule::postSocket))
229 .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
230 // Propagating functions
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000231 .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
232 .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
233 .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000234 .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
Kristof Umann28273492019-03-08 15:47:56 +0000235 .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
236 .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
237 .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
238 .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000239 .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
Kristof Umann28273492019-03-08 15:47:56 +0000240 .Case("getdelim", TaintPropagationRule({3}, {0}))
241 .Case("getline", TaintPropagationRule({2}, {0}))
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000242 .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000243 .Case("pread",
244 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
Kristof Umann28273492019-03-08 15:47:56 +0000245 .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
246 .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
247 .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
248 .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
249 .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
Artem Dergachevb68cb542018-12-19 23:35:08 +0000250 .Default(TaintPropagationRule());
Anna Zaks5d324e52012-01-18 02:45:07 +0000251
252 if (!Rule.isNull())
253 return Rule;
254
255 // Check if it's one of the memory setting/copying functions.
256 // This check is specialized but faster then calling isCLibraryFunction.
257 unsigned BId = 0;
Artem Dergachevb68cb542018-12-19 23:35:08 +0000258 if ((BId = FDecl->getMemoryFunctionKind()))
259 switch (BId) {
Anna Zaks5d324e52012-01-18 02:45:07 +0000260 case Builtin::BImemcpy:
261 case Builtin::BImemmove:
262 case Builtin::BIstrncpy:
263 case Builtin::BIstrncat:
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000264 return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
Anna Zaks5d324e52012-01-18 02:45:07 +0000265 case Builtin::BIstrlcpy:
266 case Builtin::BIstrlcat:
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000267 return TaintPropagationRule({1, 2}, {0});
Anna Zaks5d324e52012-01-18 02:45:07 +0000268 case Builtin::BIstrndup:
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000269 return TaintPropagationRule({0, 1}, {ReturnValueIndex});
Anna Zaks5d324e52012-01-18 02:45:07 +0000270
271 default:
272 break;
273 };
274
275 // Process all other functions which could be defined as builtins.
276 if (Rule.isNull()) {
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000277 if (C.isCLibraryFunction(FDecl, "snprintf"))
278 return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
279 3);
280 else if (C.isCLibraryFunction(FDecl, "sprintf"))
281 return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
282 2);
Anna Zaks5d324e52012-01-18 02:45:07 +0000283 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
284 C.isCLibraryFunction(FDecl, "stpcpy") ||
285 C.isCLibraryFunction(FDecl, "strcat"))
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000286 return TaintPropagationRule({1}, {0, ReturnValueIndex});
Anna Zaks5d324e52012-01-18 02:45:07 +0000287 else if (C.isCLibraryFunction(FDecl, "bcopy"))
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000288 return TaintPropagationRule({0, 2}, {1});
Anna Zaks5d324e52012-01-18 02:45:07 +0000289 else if (C.isCLibraryFunction(FDecl, "strdup") ||
290 C.isCLibraryFunction(FDecl, "strdupa"))
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000291 return TaintPropagationRule({0}, {ReturnValueIndex});
Anna Zaks560dbe92012-01-18 02:45:11 +0000292 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000293 return TaintPropagationRule({0}, {ReturnValueIndex});
Anna Zaks5d324e52012-01-18 02:45:07 +0000294 }
295
296 // Skipping the following functions, since they might be used for cleansing
297 // or smart memory copy:
Benjamin Kramer474261a2012-06-02 10:20:41 +0000298 // - memccpy - copying until hitting a special character.
Anna Zaks5d324e52012-01-18 02:45:07 +0000299
300 return TaintPropagationRule();
Anna Zaks457c6872011-11-18 02:26:36 +0000301}
302
Anna Zaks3b0ab202011-12-17 00:26:34 +0000303void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
304 CheckerContext &C) const {
Kristof Umann28273492019-03-08 15:47:56 +0000305 // Check for taintedness related errors first: system call, uncontrolled
306 // format string, tainted buffer size.
Anna Zaks126a2ef2012-01-07 02:33:10 +0000307 if (checkPre(CE, C))
308 return;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000309
Kristof Umann28273492019-03-08 15:47:56 +0000310 // Marks the function's arguments and/or return value tainted if it present in
311 // the list.
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000312 addSourcesPre(CE, C);
Anna Zaks126a2ef2012-01-07 02:33:10 +0000313}
314
315void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
316 CheckerContext &C) const {
Kristof Umann28273492019-03-08 15:47:56 +0000317 // Set the marked values as tainted. The return value only accessible from
318 // checkPostStmt.
319 propagateFromPre(CE, C);
Anna Zaks126a2ef2012-01-07 02:33:10 +0000320}
321
Artem Dergachev44551cf2019-03-29 22:49:30 +0000322void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
323 const char *NL, const char *Sep) const {
324 printTaint(State, Out, NL, Sep);
325}
326
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000327void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
328 CheckerContext &C) const {
Craig Topper0dbb7832014-05-27 02:45:47 +0000329 ProgramStateRef State = nullptr;
Anna Zaks5d324e52012-01-18 02:45:07 +0000330 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
Jordan Rose6cd16c52012-07-10 23:13:01 +0000331 if (!FDecl || FDecl->getKind() != Decl::Function)
332 return;
333
Anna Zaks5d324e52012-01-18 02:45:07 +0000334 StringRef Name = C.getCalleeName(FDecl);
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000335 if (Name.empty())
336 return;
Anna Zaks3666d2c2012-01-17 00:37:02 +0000337
Anna Zaks5d324e52012-01-18 02:45:07 +0000338 // First, try generating a propagation rule for this function.
339 TaintPropagationRule Rule =
Artem Dergachevb68cb542018-12-19 23:35:08 +0000340 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
Anna Zaks3666d2c2012-01-17 00:37:02 +0000341 if (!Rule.isNull()) {
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000342 State = Rule.process(CE, C);
Anna Zaks3666d2c2012-01-17 00:37:02 +0000343 if (!State)
344 return;
345 C.addTransition(State);
Anna Zaks5d324e52012-01-18 02:45:07 +0000346 return;
Anna Zaks3666d2c2012-01-17 00:37:02 +0000347 }
348
Anna Zaks3b0ab202011-12-17 00:26:34 +0000349 if (!State)
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000350 return;
Anna Zaks3b0ab202011-12-17 00:26:34 +0000351 C.addTransition(State);
352}
353
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000354bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
355 CheckerContext &C) const {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000356 ProgramStateRef State = C.getState();
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000357
358 // Depending on what was tainted at pre-visit, we determined a set of
359 // arguments which should be tainted after the function returns. These are
360 // stored in the state as TaintArgsOnPostVisit set.
Jordan Rose0c153cb2012-11-02 01:54:06 +0000361 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
Anna Zaksbf740512012-01-24 19:32:25 +0000362 if (TaintArgs.isEmpty())
363 return false;
364
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000365 for (unsigned ArgNum : TaintArgs) {
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000366 // Special handling for the tainted return value.
367 if (ArgNum == ReturnValueIndex) {
Artem Dergachev44551cf2019-03-29 22:49:30 +0000368 State = addTaint(State, CE, C.getLocationContext());
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000369 continue;
370 }
371
372 // The arguments are pointer arguments. The data they are pointing at is
373 // tainted after the call.
Anna Zaksb508d292012-04-10 23:41:11 +0000374 if (CE->getNumArgs() < (ArgNum + 1))
375 return false;
Artem Dergachevb68cb542018-12-19 23:35:08 +0000376 const Expr *Arg = CE->getArg(ArgNum);
Artem Dergacheveed7a312017-05-29 15:42:56 +0000377 Optional<SVal> V = getPointedToSVal(C, Arg);
378 if (V)
Artem Dergachev44551cf2019-03-29 22:49:30 +0000379 State = addTaint(State, *V);
Anna Zaksb3fa8d72012-01-12 02:22:34 +0000380 }
381
382 // Clear up the taint info from the state.
383 State = State->remove<TaintArgsOnPostVisit>();
384
385 if (State != C.getState()) {
386 C.addTransition(State);
387 return true;
388 }
389 return false;
390}
391
Artem Dergachevb68cb542018-12-19 23:35:08 +0000392bool GenericTaintChecker::checkPre(const CallExpr *CE,
393 CheckerContext &C) const {
Anna Zaks126a2ef2012-01-07 02:33:10 +0000394
395 if (checkUncontrolledFormatString(CE, C))
396 return true;
397
Anna Zaks560dbe92012-01-18 02:45:11 +0000398 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
Jordan Rose6cd16c52012-07-10 23:13:01 +0000399 if (!FDecl || FDecl->getKind() != Decl::Function)
400 return false;
401
Anna Zaks560dbe92012-01-18 02:45:11 +0000402 StringRef Name = C.getCalleeName(FDecl);
Anna Zaks0244cd72012-01-14 02:48:40 +0000403 if (Name.empty())
404 return false;
405
406 if (checkSystemCall(CE, Name, C))
407 return true;
408
Anna Zaks560dbe92012-01-18 02:45:11 +0000409 if (checkTaintedBufferSize(CE, FDecl, C))
410 return true;
411
Anna Zaks126a2ef2012-01-07 02:33:10 +0000412 return false;
413}
414
Artem Dergacheveed7a312017-05-29 15:42:56 +0000415Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
Artem Dergachev3ef5deb2017-12-12 02:27:55 +0000416 const Expr *Arg) {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000417 ProgramStateRef State = C.getState();
George Karpenkovd703ec92018-01-17 20:27:29 +0000418 SVal AddrVal = C.getSVal(Arg->IgnoreParens());
Anna Zakse48ee502011-12-16 18:28:50 +0000419 if (AddrVal.isUnknownOrUndef())
Artem Dergacheveed7a312017-05-29 15:42:56 +0000420 return None;
Anna Zaks7c96b7d2011-12-11 18:43:40 +0000421
David Blaikie05785d12013-02-20 22:23:23 +0000422 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000423 if (!AddrLoc)
Artem Dergacheveed7a312017-05-29 15:42:56 +0000424 return None;
Anna Zaks457c6872011-11-18 02:26:36 +0000425
Artem Dergachev3ef5deb2017-12-12 02:27:55 +0000426 QualType ArgTy = Arg->getType().getCanonicalType();
427 if (!ArgTy->isPointerType())
428 return None;
429
430 QualType ValTy = ArgTy->getPointeeType();
431
432 // Do not dereference void pointers. Treat them as byte pointers instead.
433 // FIXME: we might want to consider more than just the first byte.
434 if (ValTy->isVoidType())
435 ValTy = C.getASTContext().CharTy;
436
437 return State->getSVal(*AddrLoc, ValTy);
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000438}
439
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000440ProgramStateRef
Anna Zaks7f6a6b72012-01-18 02:45:13 +0000441GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
442 CheckerContext &C) const {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000443 ProgramStateRef State = C.getState();
Anna Zaks3666d2c2012-01-17 00:37:02 +0000444
445 // Check for taint in arguments.
Kristof Umann85547832019-03-05 12:42:59 +0000446 bool IsTainted = true;
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000447 for (unsigned ArgNum : SrcArgs) {
448 if (ArgNum >= CE->getNumArgs())
Anna Zaksb508d292012-04-10 23:41:11 +0000449 return State;
Anna Zaksbf740512012-01-24 19:32:25 +0000450 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
Anna Zaks3666d2c2012-01-17 00:37:02 +0000451 break;
452 }
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000453
454 // Check for taint in variadic arguments.
455 if (!IsTainted && VariadicType::Src == VarType) {
456 // Check if any of the arguments is tainted
457 for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
458 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
459 break;
460 }
461 }
462
Kristof Umann28273492019-03-08 15:47:56 +0000463 if (PropagationFunc)
464 IsTainted = PropagationFunc(IsTainted, CE, C);
465
Anna Zaks3666d2c2012-01-17 00:37:02 +0000466 if (!IsTainted)
467 return State;
468
469 // Mark the arguments which should be tainted after the function returns.
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000470 for (unsigned ArgNum : DstArgs) {
Anna Zaks3666d2c2012-01-17 00:37:02 +0000471 // Should mark the return value?
472 if (ArgNum == ReturnValueIndex) {
473 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
474 continue;
475 }
476
477 // Mark the given argument.
478 assert(ArgNum < CE->getNumArgs());
479 State = State->add<TaintArgsOnPostVisit>(ArgNum);
480 }
481
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000482 // Mark all variadic arguments tainted if present.
483 if (VariadicType::Dst == VarType) {
484 // For all pointer and references that were passed in:
485 // If they are not pointing to const data, mark data as tainted.
486 // TODO: So far we are just going one level down; ideally we'd need to
487 // recurse here.
488 for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
489 const Expr *Arg = CE->getArg(i);
490 // Process pointer argument.
491 const Type *ArgTy = Arg->getType().getTypePtr();
492 QualType PType = ArgTy->getPointeeType();
493 if ((!PType.isNull() && !PType.isConstQualified()) ||
494 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
495 State = State->add<TaintArgsOnPostVisit>(i);
496 }
497 }
498
Anna Zaks3666d2c2012-01-17 00:37:02 +0000499 return State;
500}
501
Anna Zaks3b754b22012-01-20 00:11:19 +0000502// If argument 0(protocol domain) is network, the return value should get taint.
Kristof Umann28273492019-03-08 15:47:56 +0000503bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
504 const CallExpr *CE,
505 CheckerContext &C) {
Anna Zaks3b754b22012-01-20 00:11:19 +0000506 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
507 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
508 // White list the internal communication protocols.
509 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
510 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
Kristof Umann28273492019-03-08 15:47:56 +0000511 return false;
Anna Zaks3b754b22012-01-20 00:11:19 +0000512
Kristof Umann28273492019-03-08 15:47:56 +0000513 return true;
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000514}
515
Anna Zaksbf740512012-01-24 19:32:25 +0000516bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
Ted Kremenek49b1e382012-01-26 21:29:00 +0000517 ProgramStateRef State = C.getState();
George Karpenkovd703ec92018-01-17 20:27:29 +0000518 SVal Val = C.getSVal(E);
Anna Zaks099fe3f2011-12-14 00:56:18 +0000519
Anna Zakse48ee502011-12-16 18:28:50 +0000520 // stdin is a pointer, so it would be a region.
521 const MemRegion *MemReg = Val.getAsRegion();
522
523 // The region should be symbolic, we do not know it's value.
524 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
525 if (!SymReg)
Anna Zaks099fe3f2011-12-14 00:56:18 +0000526 return false;
527
Anna Zakse48ee502011-12-16 18:28:50 +0000528 // Get it's symbol and find the declaration region it's pointing to.
Artem Dergachevb68cb542018-12-19 23:35:08 +0000529 const SymbolRegionValue *Sm =
530 dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
Anna Zakse48ee502011-12-16 18:28:50 +0000531 if (!Sm)
532 return false;
533 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
534 if (!DeclReg)
535 return false;
Anna Zaks099fe3f2011-12-14 00:56:18 +0000536
Anna Zakse48ee502011-12-16 18:28:50 +0000537 // This region corresponds to a declaration, find out if it's a global/extern
538 // variable named stdin with the proper type.
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000539 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
Anna Zakse48ee502011-12-16 18:28:50 +0000540 D = D->getCanonicalDecl();
Artem Dergachev2a5fb122019-01-30 00:06:43 +0000541 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
542 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
543 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
544 C.getASTContext().getFILEType().getCanonicalType())
545 return true;
546 }
Anna Zakse48ee502011-12-16 18:28:50 +0000547 }
Anna Zaks099fe3f2011-12-14 00:56:18 +0000548 return false;
549}
550
Anna Zaks126a2ef2012-01-07 02:33:10 +0000551static bool getPrintfFormatArgumentNum(const CallExpr *CE,
552 const CheckerContext &C,
553 unsigned int &ArgNum) {
554 // Find if the function contains a format string argument.
555 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
556 // vsnprintf, syslog, custom annotated functions.
557 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
558 if (!FDecl)
559 return false;
Aaron Ballmanbe22bcb2014-03-10 17:08:28 +0000560 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
Anna Zaks126a2ef2012-01-07 02:33:10 +0000561 ArgNum = Format->getFormatIdx() - 1;
Artem Dergachevb68cb542018-12-19 23:35:08 +0000562 if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
Anna Zaks126a2ef2012-01-07 02:33:10 +0000563 return true;
564 }
565
566 // Or if a function is named setproctitle (this is a heuristic).
567 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
568 ArgNum = 0;
569 return true;
570 }
571
572 return false;
573}
574
Anna Zaks0244cd72012-01-14 02:48:40 +0000575bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
576 const char Msg[],
577 CheckerContext &C) const {
578 assert(E);
579
580 // Check for taint.
Ted Kremenek49b1e382012-01-26 21:29:00 +0000581 ProgramStateRef State = C.getState();
Artem Dergacheveed7a312017-05-29 15:42:56 +0000582 Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
Anna Zaksd4e43ae2017-03-09 00:01:07 +0000583 SVal TaintedSVal;
Artem Dergachev44551cf2019-03-29 22:49:30 +0000584 if (PointedToSVal && isTainted(State, *PointedToSVal))
Artem Dergacheveed7a312017-05-29 15:42:56 +0000585 TaintedSVal = *PointedToSVal;
Artem Dergachev44551cf2019-03-29 22:49:30 +0000586 else if (isTainted(State, E, C.getLocationContext()))
Anna Zaksd4e43ae2017-03-09 00:01:07 +0000587 TaintedSVal = C.getSVal(E);
588 else
Anna Zaks0244cd72012-01-14 02:48:40 +0000589 return false;
590
591 // Generate diagnostic.
Devin Coughline39bd402015-09-16 22:03:05 +0000592 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
Anna Zaks0244cd72012-01-14 02:48:40 +0000593 initBugType();
Aaron Ballman8d3a7a52015-06-23 13:15:32 +0000594 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
Anna Zaks0244cd72012-01-14 02:48:40 +0000595 report->addRange(E->getSourceRange());
Anna Zaksd4e43ae2017-03-09 00:01:07 +0000596 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
Aaron Ballman8d3a7a52015-06-23 13:15:32 +0000597 C.emitReport(std::move(report));
Anna Zaks0244cd72012-01-14 02:48:40 +0000598 return true;
599 }
600 return false;
601}
602
Artem Dergachevb68cb542018-12-19 23:35:08 +0000603bool GenericTaintChecker::checkUncontrolledFormatString(
604 const CallExpr *CE, CheckerContext &C) const {
Anna Zaks126a2ef2012-01-07 02:33:10 +0000605 // Check if the function contains a format string argument.
606 unsigned int ArgNum = 0;
607 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
608 return false;
609
Artem Dergachevb68cb542018-12-19 23:35:08 +0000610 // If either the format string content or the pointer itself are tainted,
611 // warn.
Alexander Kornienko9c104902015-12-28 13:06:58 +0000612 return generateReportIfTainted(CE->getArg(ArgNum),
613 MsgUncontrolledFormatString, C);
Anna Zaks0244cd72012-01-14 02:48:40 +0000614}
615
Artem Dergachevb68cb542018-12-19 23:35:08 +0000616bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
Anna Zaks0244cd72012-01-14 02:48:40 +0000617 CheckerContext &C) const {
Ted Kremenek3a0678e2015-09-08 03:50:52 +0000618 // TODO: It might make sense to run this check on demand. In some cases,
619 // we should check if the environment has been cleansed here. We also might
Anna Zaksbf740512012-01-24 19:32:25 +0000620 // need to know if the user was reset before these calls(seteuid).
Anna Zaks0244cd72012-01-14 02:48:40 +0000621 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
Artem Dergachevb68cb542018-12-19 23:35:08 +0000622 .Case("system", 0)
623 .Case("popen", 0)
624 .Case("execl", 0)
625 .Case("execle", 0)
626 .Case("execlp", 0)
627 .Case("execv", 0)
628 .Case("execvp", 0)
629 .Case("execvP", 0)
630 .Case("execve", 0)
631 .Case("dlopen", 0)
632 .Default(UINT_MAX);
Anna Zaks0244cd72012-01-14 02:48:40 +0000633
Anna Zaksb508d292012-04-10 23:41:11 +0000634 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
Anna Zaks0244cd72012-01-14 02:48:40 +0000635 return false;
636
Alexander Kornienko9c104902015-12-28 13:06:58 +0000637 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
Anna Zaks126a2ef2012-01-07 02:33:10 +0000638}
639
Anna Zaks560dbe92012-01-18 02:45:11 +0000640// TODO: Should this check be a part of the CString checker?
641// If yes, should taint be a global setting?
642bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
643 const FunctionDecl *FDecl,
644 CheckerContext &C) const {
645 // If the function has a buffer size argument, set ArgNum.
646 unsigned ArgNum = InvalidArgIndex;
647 unsigned BId = 0;
Artem Dergachevb68cb542018-12-19 23:35:08 +0000648 if ((BId = FDecl->getMemoryFunctionKind()))
649 switch (BId) {
Anna Zaks560dbe92012-01-18 02:45:11 +0000650 case Builtin::BImemcpy:
651 case Builtin::BImemmove:
652 case Builtin::BIstrncpy:
653 ArgNum = 2;
654 break;
655 case Builtin::BIstrndup:
656 ArgNum = 1;
657 break;
658 default:
659 break;
660 };
661
662 if (ArgNum == InvalidArgIndex) {
663 if (C.isCLibraryFunction(FDecl, "malloc") ||
664 C.isCLibraryFunction(FDecl, "calloc") ||
665 C.isCLibraryFunction(FDecl, "alloca"))
666 ArgNum = 0;
667 else if (C.isCLibraryFunction(FDecl, "memccpy"))
668 ArgNum = 3;
669 else if (C.isCLibraryFunction(FDecl, "realloc"))
670 ArgNum = 1;
671 else if (C.isCLibraryFunction(FDecl, "bcopy"))
672 ArgNum = 2;
673 }
674
Alexander Kornienko9c104902015-12-28 13:06:58 +0000675 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
676 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
Anna Zaks560dbe92012-01-18 02:45:11 +0000677}
678
Anna Zaks5c5bf9b2011-11-16 19:58:13 +0000679void ento::registerGenericTaintChecker(CheckerManager &mgr) {
680 mgr.registerChecker<GenericTaintChecker>();
681}
Kristof Umann058a7a42019-01-26 14:23:08 +0000682
683bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
684 return true;
685}