blob: 3c20baa93cf77943aecac1639fafdcd909257662 [file] [log] [blame]
//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This defines CStringChecker, which is an assortment of checks on calls
// to functions in <string.h>.
//
//===----------------------------------------------------------------------===//
#include "GRExprEngineExperimentalChecks.h"
#include "clang/Checker/BugReporter/BugType.h"
#include "clang/Checker/PathSensitive/CheckerVisitor.h"
#include "llvm/ADT/StringSwitch.h"
using namespace clang;
namespace {
class CStringChecker : public CheckerVisitor<CStringChecker> {
BugType *BT_Bounds;
BugType *BT_Overlap;
public:
CStringChecker()
: BT_Bounds(0), BT_Overlap(0) {}
static void *getTag() { static int tag; return &tag; }
bool EvalCallExpr(CheckerContext &C, const CallExpr *CE);
typedef const GRState *(CStringChecker::*FnCheck)(CheckerContext &,
const CallExpr *);
const GRState *EvalMemcpy(CheckerContext &C, const CallExpr *CE);
const GRState *EvalMemmove(CheckerContext &C, const CallExpr *CE);
const GRState *EvalMemcmp(CheckerContext &C, const CallExpr *CE);
const GRState *EvalBcopy(CheckerContext &C, const CallExpr *CE);
// Utility methods
const GRState *CheckNonNull(CheckerContext &C, const GRState *state,
const Stmt *S, SVal l);
const GRState *CheckLocation(CheckerContext &C, const GRState *state,
const Stmt *S, SVal l);
const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state,
const Expr *Size,
const Expr *FirstBuf,
const Expr *SecondBuf = NULL);
const GRState *CheckOverlap(CheckerContext &C, const GRState *state,
const Expr *First, const Expr *Second,
const Expr *Size);
void EmitOverlapBug(CheckerContext &C, const GRState *state,
const Stmt *First, const Stmt *Second);
};
} //end anonymous namespace
void clang::RegisterCStringChecker(GRExprEngine &Eng) {
Eng.registerCheck(new CStringChecker());
}
const GRState *CStringChecker::CheckNonNull(CheckerContext &C,
const GRState *state,
const Stmt *S, SVal l) {
// FIXME: This method just checks, of course, that the value is non-null.
// It should maybe be refactored and combined with AttrNonNullChecker.
if (l.isUnknownOrUndef())
return state;
ValueManager &ValMgr = C.getValueManager();
SValuator &SV = ValMgr.getSValuator();
Loc Null = ValMgr.makeNull();
DefinedOrUnknownSVal LocIsNull = SV.EvalEQ(state, cast<Loc>(l), Null);
const GRState *stateIsNull, *stateIsNonNull;
llvm::tie(stateIsNull, stateIsNonNull) = state->Assume(LocIsNull);
if (stateIsNull && !stateIsNonNull) {
ExplodedNode *N = C.GenerateSink(stateIsNull);
if (!N)
return NULL;
if (!BT_Bounds)
BT_Bounds = new BuiltinBug("API",
"Null pointer argument in call to byte string function");
// Generate a report for this bug.
BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds);
EnhancedBugReport *report = new EnhancedBugReport(*BT,
BT->getDescription(), N);
report->addRange(S->getSourceRange());
report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S);
C.EmitReport(report);
return NULL;
}
// From here on, assume that the value is non-null.
assert(stateIsNonNull);
return stateIsNonNull;
}
// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
const GRState *CStringChecker::CheckLocation(CheckerContext &C,
const GRState *state,
const Stmt *S, SVal l) {
// Check for out of bound array element access.
const MemRegion *R = l.getAsRegion();
if (!R)
return state;
const ElementRegion *ER = dyn_cast<ElementRegion>(R);
if (!ER)
return state;
assert(ER->getValueType(C.getASTContext()) == C.getASTContext().CharTy &&
"CheckLocation should only be called with char* ElementRegions");
// Get the size of the array.
const SubRegion *Super = cast<SubRegion>(ER->getSuperRegion());
ValueManager &ValMgr = C.getValueManager();
SVal Extent = ValMgr.convertToArrayIndex(Super->getExtent(ValMgr));
DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
// Get the index of the accessed element.
DefinedOrUnknownSVal &Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
const GRState *StInBound = state->AssumeInBound(Idx, Size, true);
const GRState *StOutBound = state->AssumeInBound(Idx, Size, false);
if (StOutBound && !StInBound) {
ExplodedNode *N = C.GenerateSink(StOutBound);
if (!N)
return NULL;
if (!BT_Bounds)
BT_Bounds = new BuiltinBug("Out-of-bound array access",
"Byte string function accesses out-of-bound array element "
"(buffer overflow)");
// FIXME: It would be nice to eventually make this diagnostic more clear,
// e.g., by referencing the original declaration or by saying *why* this
// reference is outside the range.
// Generate a report for this bug.
BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds);
RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N);
report->addRange(S->getSourceRange());
C.EmitReport(report);
return NULL;
}
// Array bound check succeeded. From this point forward the array bound
// should always succeed.
return StInBound;
}
const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C,
const GRState *state,
const Expr *Size,
const Expr *FirstBuf,
const Expr *SecondBuf) {
ValueManager &VM = C.getValueManager();
SValuator &SV = VM.getSValuator();
ASTContext &Ctx = C.getASTContext();
QualType SizeTy = Ctx.getSizeType();
QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
// Get the access length and make sure it is known.
SVal LengthVal = state->getSVal(Size);
NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
if (!Length)
return state;
// If the length is zero, it doesn't matter what the two buffers are.
DefinedOrUnknownSVal Zero = VM.makeZeroVal(SizeTy);
DefinedOrUnknownSVal LengthIsZero = SV.EvalEQ(state, *Length, Zero);
const GRState *stateZeroLength, *stateNonZeroLength;
llvm::tie(stateZeroLength, stateNonZeroLength) = state->Assume(LengthIsZero);
if (stateZeroLength && !stateNonZeroLength)
return stateZeroLength;
// FIXME: At this point all we know is it's *possible* for the length to be
// nonzero; we don't know it for sure. Unfortunately, that means the next few
// tests are incorrect for the edge cases in which a buffer is null or invalid
// but the size argument was set to zero in some way that we couldn't track.
// What we should really do is bifurcate the state here, but that doesn't
// match the way CheckBufferAccess is being used.
// From here on, we're going to pretend that even if the length is zero, the
// buffer access rules still apply. That means the buffer must be non-NULL,
// and the value at buffer[size-1] must be valid.
// Check that the first buffer is non-null.
SVal BufVal = state->getSVal(FirstBuf);
state = CheckNonNull(C, state, FirstBuf, BufVal);
if (!state)
return NULL;
// Compute the offset of the last element to be accessed: size-1.
NonLoc One = cast<NonLoc>(VM.makeIntVal(1, SizeTy));
NonLoc LastOffset = cast<NonLoc>(SV.EvalBinOpNN(state, BinaryOperator::Sub,
*Length, One, SizeTy));
// Check that the first buffer is sufficently long.
Loc BufStart = cast<Loc>(SV.EvalCast(BufVal, PtrTy, FirstBuf->getType()));
SVal BufEnd
= SV.EvalBinOpLN(state, BinaryOperator::Add, BufStart, LastOffset, PtrTy);
state = CheckLocation(C, state, FirstBuf, BufEnd);
// If the buffer isn't large enough, abort.
if (!state)
return NULL;
// If there's a second buffer, check it as well.
if (SecondBuf) {
BufVal = state->getSVal(SecondBuf);
state = CheckNonNull(C, state, SecondBuf, BufVal);
if (!state)
return NULL;
BufStart = cast<Loc>(SV.EvalCast(BufVal, PtrTy, SecondBuf->getType()));
BufEnd
= SV.EvalBinOpLN(state, BinaryOperator::Add, BufStart, LastOffset, PtrTy);
state = CheckLocation(C, state, SecondBuf, BufEnd);
}
// Large enough or not, return this state!
return state;
}
const GRState *CStringChecker::CheckOverlap(CheckerContext &C,
const GRState *state,
const Expr *First,
const Expr *Second,
const Expr *Size) {
// Do a simple check for overlap: if the two arguments are from the same
// buffer, see if the end of the first is greater than the start of the second
// or vice versa.
ValueManager &VM = state->getStateManager().getValueManager();
SValuator &SV = VM.getSValuator();
ASTContext &Ctx = VM.getContext();
const GRState *stateTrue, *stateFalse;
// Get the buffer values and make sure they're known locations.
SVal FirstVal = state->getSVal(First);
SVal SecondVal = state->getSVal(Second);
Loc *FirstLoc = dyn_cast<Loc>(&FirstVal);
if (!FirstLoc)
return state;
Loc *SecondLoc = dyn_cast<Loc>(&SecondVal);
if (!SecondLoc)
return state;
// Are the two values the same?
DefinedOrUnknownSVal EqualTest = SV.EvalEQ(state, *FirstLoc, *SecondLoc);
llvm::tie(stateTrue, stateFalse) = state->Assume(EqualTest);
if (stateTrue && !stateFalse) {
// If the values are known to be equal, that's automatically an overlap.
EmitOverlapBug(C, stateTrue, First, Second);
return NULL;
}
// Assume the two expressions are not equal.
assert(stateFalse);
state = stateFalse;
// Which value comes first?
QualType CmpTy = Ctx.IntTy;
SVal Reverse = SV.EvalBinOpLL(state, BinaryOperator::GT,
*FirstLoc, *SecondLoc, CmpTy);
DefinedOrUnknownSVal *ReverseTest = dyn_cast<DefinedOrUnknownSVal>(&Reverse);
if (!ReverseTest)
return state;
llvm::tie(stateTrue, stateFalse) = state->Assume(*ReverseTest);
if (stateTrue) {
if (stateFalse) {
// If we don't know which one comes first, we can't perform this test.
return state;
} else {
// Switch the values so that FirstVal is before SecondVal.
Loc *tmpLoc = FirstLoc;
FirstLoc = SecondLoc;
SecondLoc = tmpLoc;
// Switch the Exprs as well, so that they still correspond.
const Expr *tmpExpr = First;
First = Second;
Second = tmpExpr;
}
}
// Get the length, and make sure it too is known.
SVal LengthVal = state->getSVal(Size);
NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
if (!Length)
return state;
// Convert the first buffer's start address to char*.
// Bail out if the cast fails.
QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
SVal FirstStart = SV.EvalCast(*FirstLoc, CharPtrTy, First->getType());
Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
if (!FirstStartLoc)
return state;
// Compute the end of the first buffer. Bail out if THAT fails.
SVal FirstEnd = SV.EvalBinOpLN(state, BinaryOperator::Add,
*FirstStartLoc, *Length, CharPtrTy);
Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
if (!FirstEndLoc)
return state;
// Is the end of the first buffer past the start of the second buffer?
SVal Overlap = SV.EvalBinOpLL(state, BinaryOperator::GT,
*FirstEndLoc, *SecondLoc, CmpTy);
DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
if (!OverlapTest)
return state;
llvm::tie(stateTrue, stateFalse) = state->Assume(*OverlapTest);
if (stateTrue && !stateFalse) {
// Overlap!
EmitOverlapBug(C, stateTrue, First, Second);
return NULL;
}
// Assume the two expressions don't overlap.
assert(stateFalse);
return stateFalse;
}
void CStringChecker::EmitOverlapBug(CheckerContext &C, const GRState *state,
const Stmt *First, const Stmt *Second) {
ExplodedNode *N = C.GenerateSink(state);
if (!N)
return;
if (!BT_Overlap)
BT_Overlap = new BugType("Unix API", "Improper arguments");
// Generate a report for this bug.
RangedBugReport *report =
new RangedBugReport(*BT_Overlap,
"Arguments must not be overlapping buffers", N);
report->addRange(First->getSourceRange());
report->addRange(Second->getSourceRange());
C.EmitReport(report);
}
const GRState *
CStringChecker::EvalMemcpy(CheckerContext &C, const CallExpr *CE) {
// void *memcpy(void *restrict dst, const void *restrict src, size_t n);
// memcpy() is like memmove(), but with the extra requirement that the buffers
// not overlap.
const GRState *state = EvalMemmove(C, CE);
if (!state)
return NULL;
return CheckOverlap(C, state, CE->getArg(0), CE->getArg(1), CE->getArg(2));
}
const GRState *
CStringChecker::EvalMemmove(CheckerContext &C, const CallExpr *CE) {
// void *memmove(void *dst, const void *src, size_t n);
const Expr *Dest = CE->getArg(0);
const Expr *Source = CE->getArg(1);
const Expr *Size = CE->getArg(2);
// Check that the accesses will stay in bounds.
const GRState *state = C.getState();
state = CheckBufferAccess(C, state, Size, Dest, Source);
if (!state)
return NULL;
// The return value is the address of the destination buffer.
return state->BindExpr(CE, state->getSVal(Dest));
}
const GRState *
CStringChecker::EvalMemcmp(CheckerContext &C, const CallExpr *CE) {
// int memcmp(const void *s1, const void *s2, size_t n);
const Expr *Left = CE->getArg(0);
const Expr *Right = CE->getArg(1);
const Expr *Size = CE->getArg(2);
const GRState *state = C.getState();
ValueManager &ValMgr = C.getValueManager();
SValuator &SV = ValMgr.getSValuator();
const GRState *stateTrue, *stateFalse;
// If we know the size argument is 0, we know the result is 0, and we don't
// have to check either of the buffers. (Another checker will have already
// made sure the size isn't undefined, so we can cast it safely.)
DefinedOrUnknownSVal SizeV = cast<DefinedOrUnknownSVal>(state->getSVal(Size));
DefinedOrUnknownSVal Zero = ValMgr.makeZeroVal(Size->getType());
DefinedOrUnknownSVal SizeIsZero = SV.EvalEQ(state, SizeV, Zero);
llvm::tie(stateTrue, stateFalse) = state->Assume(SizeIsZero);
// FIXME: This should really cause a bifurcation of the state, but that would
// require changing the contract to allow the various Eval* methods to add
// transitions themselves. Currently that isn't the case because some of these
// functions are "basically" like another function, but with one or two
// additional restrictions (like memcpy and memmove).
if (stateTrue && !stateFalse)
return stateTrue->BindExpr(CE, ValMgr.makeZeroVal(CE->getType()));
// At this point, we still don't know that the size is nonzero, only that it
// might be.
// If we know the two buffers are the same, we know the result is 0.
// First, get the two buffers' addresses. Another checker will have already
// made sure they're not undefined.
DefinedOrUnknownSVal LBuf = cast<DefinedOrUnknownSVal>(state->getSVal(Left));
DefinedOrUnknownSVal RBuf = cast<DefinedOrUnknownSVal>(state->getSVal(Right));
// See if they are the same.
DefinedOrUnknownSVal SameBuf = SV.EvalEQ(state, LBuf, RBuf);
llvm::tie(stateTrue, stateFalse) = state->Assume(SameBuf);
// FIXME: This should also bifurcate the state (as above).
// If the two arguments are known to be the same buffer, we know the result is
// zero, and we only need to check one size.
if (stateTrue && !stateFalse) {
state = CheckBufferAccess(C, stateTrue, Size, Left);
return state->BindExpr(CE, ValMgr.makeZeroVal(CE->getType()));
}
// At this point, we don't know if the arguments are the same or not -- we
// only know that they *might* be different. We can't make any assumptions.
// The return value is the comparison result, which we don't know.
unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
SVal RetVal = ValMgr.getConjuredSymbolVal(NULL, CE, CE->getType(), Count);
state = state->BindExpr(CE, RetVal);
// Check that the accesses will stay in bounds.
return CheckBufferAccess(C, state, Size, Left, Right);
}
const GRState *
CStringChecker::EvalBcopy(CheckerContext &C, const CallExpr *CE) {
// void bcopy(const void *src, void *dst, size_t n);
return CheckBufferAccess(C, C.getState(),
CE->getArg(2), CE->getArg(0), CE->getArg(1));
}
bool CStringChecker::EvalCallExpr(CheckerContext &C, const CallExpr *CE) {
// Get the callee. All the functions we care about are C functions
// with simple identifiers.
const GRState *state = C.getState();
const Expr *Callee = CE->getCallee();
const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl();
if (!FD)
return false;
// Get the name of the callee. If it's a builtin, strip off the prefix.
llvm::StringRef Name = FD->getName();
if (Name.startswith("__builtin_"))
Name = Name.substr(10);
FnCheck EvalFunction = llvm::StringSwitch<FnCheck>(Name)
.Cases("memcpy", "__memcpy_chk", &CStringChecker::EvalMemcpy)
.Cases("memcmp", "bcmp", &CStringChecker::EvalMemcmp)
.Cases("memmove", "__memmove_chk", &CStringChecker::EvalMemmove)
.Case("bcopy", &CStringChecker::EvalBcopy)
.Default(NULL);
if (!EvalFunction)
// The callee isn't a string function. Let another checker handle it.
return false;
const GRState *NewState = (this->*EvalFunction)(C, CE);
if (NewState)
C.addTransition(NewState);
return true;
}