Perform array bounds checking in more situations and properly handle special
case situations with the unary operators & and *. Also extend the array bounds
checking to work with pointer arithmetic; the pointer arithemtic checking can
be turned on using -Warray-bounds-pointer-arithmetic.

The changes to where CheckArrayAccess gets called is based on some trial &
error and a bunch of digging through source code and gdb backtraces in order
to have the check performed under as many situations as possible (such as for
variable initializers, arguments to function calls, and within conditional in
addition to the simpler cases of the operands to binary and unary operator)
while not being called--and triggering warnings--more than once for a given
ArraySubscriptExpr.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@136997 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index c54d250..8dfcfe7 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -3370,6 +3370,11 @@
   if (E->isTypeDependent() || E->isValueDependent())
     return;
 
+  // Check for array bounds violations in cases where the check isn't triggered
+  // elsewhere for other Expr types (like BinaryOperators), e.g. when an
+  // ArraySubscriptExpr is on the RHS of a variable initialization.
+  CheckArrayAccess(E);
+
   // This is not the right CC for (e.g.) a variable initialization.
   AnalyzeImplicitConversions(*this, E, CC);
 }
@@ -3474,6 +3479,15 @@
     << TRange << Op->getSourceRange();
 }
 
+static const Type* getElementType(const Expr *BaseExpr) {
+  const Type* EltType = BaseExpr->getType().getTypePtr();
+  if (EltType->isAnyPointerType())
+    return EltType->getPointeeType().getTypePtr();
+  else if (EltType->isArrayType())
+    return EltType->getBaseElementTypeUnsafe();
+  return EltType;
+}
+
 /// \brief Check whether this array fits the idiom of a size-one tail padded
 /// array member of a struct.
 ///
@@ -3510,19 +3524,21 @@
   return false;
 }
 
-static void CheckArrayAccess_Check(Sema &S,
-                                   const clang::ArraySubscriptExpr *E) {
-  const Expr *BaseExpr = E->getBase()->IgnoreParenImpCasts();
+void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
+                            bool isSubscript, bool AllowOnePastEnd) {
+  const Type* EffectiveType = getElementType(BaseExpr);
+  BaseExpr = BaseExpr->IgnoreParenCasts();
+  IndexExpr = IndexExpr->IgnoreParenCasts();
+
   const ConstantArrayType *ArrayTy =
-    S.Context.getAsConstantArrayType(BaseExpr->getType());
+    Context.getAsConstantArrayType(BaseExpr->getType());
   if (!ArrayTy)
     return;
 
-  const Expr *IndexExpr = E->getIdx();
   if (IndexExpr->isValueDependent())
     return;
   llvm::APSInt index;
-  if (!IndexExpr->isIntegerConstantExpr(index, S.Context))
+  if (!IndexExpr->isIntegerConstantExpr(index, Context))
     return;
 
   const NamedDecl *ND = NULL;
@@ -3535,47 +3551,94 @@
     llvm::APInt size = ArrayTy->getSize();
     if (!size.isStrictlyPositive())
       return;
+
+    const Type* BaseType = getElementType(BaseExpr);
+    if (!isSubscript && BaseType != EffectiveType) {
+      // Make sure we're comparing apples to apples when comparing index to size
+      uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType);
+      uint64_t array_typesize = Context.getTypeSize(BaseType);
+      if (ptrarith_typesize != array_typesize) {
+        // There's a cast to a different size type involved
+        uint64_t ratio = array_typesize / ptrarith_typesize;
+        // TODO: Be smarter about handling cases where array_typesize is not a
+        // multiple of ptrarith_typesize
+        if (ptrarith_typesize * ratio == array_typesize)
+          size *= llvm::APInt(size.getBitWidth(), ratio);
+      }
+    }
+
     if (size.getBitWidth() > index.getBitWidth())
       index = index.sext(size.getBitWidth());
     else if (size.getBitWidth() < index.getBitWidth())
       size = size.sext(index.getBitWidth());
 
-    // Don't warn for valid indexes
-    if (index.slt(size))
+    // For array subscripting the index must be less than size, but for pointer
+    // arithmetic also allow the index (offset) to be equal to size since
+    // computing the next address after the end of the array is legal and
+    // commonly done e.g. in C++ iterators and range-based for loops.
+    if (AllowOnePastEnd ? index.sle(size) : index.slt(size))
       return;
 
     // Also don't warn for arrays of size 1 which are members of some
     // structure. These are often used to approximate flexible arrays in C89
     // code.
-    if (IsTailPaddedMemberArray(S, size, ND))
+    if (IsTailPaddedMemberArray(*this, size, ND))
       return;
 
-    S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr,
-                          S.PDiag(diag::warn_array_index_exceeds_bounds)
-                            << index.toString(10, true)
-                            << size.toString(10, true)
-                            << (unsigned)size.ugt(1)
-                            << IndexExpr->getSourceRange());
+    unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds;
+    if (isSubscript)
+      DiagID = diag::warn_array_index_exceeds_bounds;
+
+    DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr,
+                        PDiag(DiagID) << index.toString(10, true)
+                          << size.toString(10, true)
+                          << (unsigned)size.getLimitedValue(~0U)
+                          << IndexExpr->getSourceRange());
   } else {
-    S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr,
-                          S.PDiag(diag::warn_array_index_precedes_bounds)
-                            << index.toString(10, true)
-                            << IndexExpr->getSourceRange());
+    unsigned DiagID = diag::warn_array_index_precedes_bounds;
+    if (!isSubscript) {
+      DiagID = diag::warn_ptr_arith_precedes_bounds;
+      if (index.isNegative()) index = -index;
+    }
+
+    DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr,
+                        PDiag(DiagID) << index.toString(10, true)
+                          << IndexExpr->getSourceRange());
   }
 
   if (ND)
-    S.DiagRuntimeBehavior(ND->getLocStart(), BaseExpr,
-                          S.PDiag(diag::note_array_index_out_of_bounds)
-                            << ND->getDeclName());
+    DiagRuntimeBehavior(ND->getLocStart(), BaseExpr,
+                        PDiag(diag::note_array_index_out_of_bounds)
+                          << ND->getDeclName());
 }
 
 void Sema::CheckArrayAccess(const Expr *expr) {
-  while (true) {
-    expr = expr->IgnoreParens();
+  int AllowOnePastEnd = 0;
+  while (expr) {
+    expr = expr->IgnoreParenImpCasts();
     switch (expr->getStmtClass()) {
-      case Stmt::ArraySubscriptExprClass:
-        CheckArrayAccess_Check(*this, cast<ArraySubscriptExpr>(expr));
+      case Stmt::ArraySubscriptExprClass: {
+        const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(expr);
+        CheckArrayAccess(ASE->getBase(), ASE->getIdx(), true,
+                         AllowOnePastEnd > 0);
         return;
+      }
+      case Stmt::UnaryOperatorClass: {
+        // Only unwrap the * and & unary operators
+        const UnaryOperator *UO = cast<UnaryOperator>(expr);
+        expr = UO->getSubExpr();
+        switch (UO->getOpcode()) {
+          case UO_AddrOf:
+            AllowOnePastEnd++;
+            break;
+          case UO_Deref:
+            AllowOnePastEnd--;
+            break;
+          default:
+            return;
+        }
+        break;
+      }
       case Stmt::ConditionalOperatorClass: {
         const ConditionalOperator *cond = cast<ConditionalOperator>(expr);
         if (const Expr *lhs = cond->getLHS())