[esan] Add handling of large stack size rlimits

Summary:
Adds detection of large stack size rlimits (over 1 TB or unlimited), which
results in an mmap location that our shadow mapping does not support.  We
re-exec the application in this situation.  Adds a test of this behavior.

Adds general detection of mmap regions outside of our app regions.  In the
future we want to try to adaptively handle these but for now we abort.

Moves the existing Linux-specific mmap code into a platform-specific file
where the new rlimit code lives.

Reviewers: eugenis

Subscribers: vitalybuka, zhaoqin, kcc, aizatsky, llvm-commits, kubabrecka

Differential Revision: http://reviews.llvm.org/D20745

llvm-svn: 271079
diff --git a/compiler-rt/lib/esan/CMakeLists.txt b/compiler-rt/lib/esan/CMakeLists.txt
index 3bdf32b..24bed26 100644
--- a/compiler-rt/lib/esan/CMakeLists.txt
+++ b/compiler-rt/lib/esan/CMakeLists.txt
@@ -12,6 +12,7 @@
   esan_flags.cpp
   esan_interface.cpp
   esan_interceptors.cpp
+  esan_linux.cpp
   cache_frag.cpp
   working_set.cpp)
 
diff --git a/compiler-rt/lib/esan/esan.cpp b/compiler-rt/lib/esan/esan.cpp
index b2110bb..2711e4b 100644
--- a/compiler-rt/lib/esan/esan.cpp
+++ b/compiler-rt/lib/esan/esan.cpp
@@ -124,6 +124,8 @@
 #endif
 
 static void initializeShadow() {
+  verifyAddressSpace();
+
   DCHECK(verifyShadowScheme());
 
   Mapping.initialize(ShadowScale[WhichTool]);
diff --git a/compiler-rt/lib/esan/esan.h b/compiler-rt/lib/esan/esan.h
index 2dc8d19..5be2240 100644
--- a/compiler-rt/lib/esan/esan.h
+++ b/compiler-rt/lib/esan/esan.h
@@ -26,6 +26,7 @@
 #ifndef ESAN_H
 #define ESAN_H
 
+#include "interception/interception.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "esan_interface_internal.h"
 
@@ -44,6 +45,11 @@
 void processRangeAccess(uptr PC, uptr Addr, int Size, bool IsWrite);
 void initializeInterceptors();
 
+// Platform-dependent routines.
+void verifyAddressSpace();
+bool fixMmapAddr(void **Addr, SIZE_T Size, int Flags);
+uptr checkMmapResult(uptr Addr, SIZE_T Size);
+
 } // namespace __esan
 
 #endif // ESAN_H
diff --git a/compiler-rt/lib/esan/esan_interceptors.cpp b/compiler-rt/lib/esan/esan_interceptors.cpp
index 8c927c6..a9489d2 100644
--- a/compiler-rt/lib/esan/esan_interceptors.cpp
+++ b/compiler-rt/lib/esan/esan_interceptors.cpp
@@ -21,19 +21,6 @@
 
 using namespace __esan; // NOLINT
 
-// FIXME: if this gets more complex as more platforms are added we may
-// want to split pieces into separate platform-specific files.
-#if SANITIZER_LINUX
-// Sanitizer runtimes in general want to avoid including system headers.
-// We define the few constants we need here:
-const int EINVAL = 22; // from /usr/include/asm-generic/errno-base.h
-const int MAP_FIXED = 0x10; // from /usr/include/sys/mman.h
-extern "C" int *__errno_location();
-#define errno (*__errno_location())
-#else
-#error Other platforms are not yet supported.
-#endif
-
 #define CUR_PC() (StackTrace::GetCurrentPc())
 
 //===----------------------------------------------------------------------===//
@@ -343,35 +330,12 @@
 // These are candidates for sharing with all sanitizers if shadow memory
 // support is also standardized.
 
-static bool fixMmapAddr(void **addr, SIZE_T sz, int flags) {
-  if (*addr) {
-    uptr AppStart, AppEnd;
-    bool SingleApp = false;
-    for (int i = 0; getAppRegion(i, &AppStart, &AppEnd); ++i) {
-      if ((uptr)*addr >= AppStart && (uptr)*addr + sz - 1 <= AppEnd) {
-        SingleApp = true;
-        break;
-      }
-    }
-    if (!SingleApp) {
-      VPrintf(1, "mmap conflict: [%p-%p) is not in an app region\n",
-              *addr, (uptr)*addr + sz);
-      if (flags & MAP_FIXED) {
-        errno = EINVAL;
-        return false;
-      } else {
-        *addr = 0;
-      }
-    }
-  }
-  return true;
-}
-
 INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags,
                  int fd, OFF_T off) {
   if (!fixMmapAddr(&addr, sz, flags))
     return (void *)-1;
-  return REAL(mmap)(addr, sz, prot, flags, fd, off);
+  void *result = REAL(mmap)(addr, sz, prot, flags, fd, off);
+  return (void *)checkMmapResult((uptr)result, sz);
 }
 
 #if SANITIZER_LINUX
@@ -379,7 +343,8 @@
                  int fd, OFF64_T off) {
   if (!fixMmapAddr(&addr, sz, flags))
     return (void *)-1;
-  return REAL(mmap64)(addr, sz, prot, flags, fd, off);
+  void *result = REAL(mmap64)(addr, sz, prot, flags, fd, off);
+  return (void *)checkMmapResult((uptr)result, sz);
 }
 #define ESAN_MAYBE_INTERCEPT_MMAP64 INTERCEPT_FUNCTION(mmap64)
 #else
diff --git a/compiler-rt/lib/esan/esan_linux.cpp b/compiler-rt/lib/esan/esan_linux.cpp
new file mode 100644
index 0000000..aa961b6
--- /dev/null
+++ b/compiler-rt/lib/esan/esan_linux.cpp
@@ -0,0 +1,83 @@
+//===-- esan.cpp ----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Linux-specific code for the Esan run-time.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_FREEBSD || SANITIZER_LINUX
+
+#include "esan.h"
+#include "esan_shadow.h"
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include <sys/mman.h>
+#include <errno.h>
+
+namespace __esan {
+
+void verifyAddressSpace() {
+#if SANITIZER_LINUX && defined(__x86_64__)
+  // The kernel determines its mmap base from the stack size limit.
+  // Our Linux 64-bit shadow mapping assumes the stack limit is less than a
+  // terabyte, which keeps the mmap region above 0x7e00'.
+  uptr StackLimit = GetStackSizeLimitInBytes();
+  if (StackSizeIsUnlimited() || StackLimit > MaxStackSize) {
+    VReport(1, "The stack size limit is beyond the maximum supported.\n"
+            "Re-execing with a stack size below 1TB.\n");
+    SetStackSizeLimitInBytes(MaxStackSize);
+    ReExec();
+  }
+#endif
+}
+
+static bool liesWithinSingleAppRegion(uptr Start, SIZE_T Size) {
+  uptr AppStart, AppEnd;
+  for (int i = 0; getAppRegion(i, &AppStart, &AppEnd); ++i) {
+    if (Start >= AppStart && Start + Size - 1 <= AppEnd) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool fixMmapAddr(void **Addr, SIZE_T Size, int Flags) {
+  if (*Addr) {
+    if (!liesWithinSingleAppRegion((uptr)*Addr, Size)) {
+      VPrintf(1, "mmap conflict: [%p-%p) is not in an app region\n",
+              *Addr, (uptr)*Addr + Size);
+      if (Flags & MAP_FIXED) {
+        errno = EINVAL;
+        return false;
+      } else {
+        *Addr = 0;
+      }
+    }
+  }
+  return true;
+}
+
+uptr checkMmapResult(uptr Addr, SIZE_T Size) {
+  if ((void *)Addr == MAP_FAILED)
+    return Addr;
+  if (!liesWithinSingleAppRegion(Addr, Size)) {
+    // FIXME: attempt to dynamically add this as an app region if it
+    // fits our shadow criteria.
+    // We could also try to remap somewhere else.
+    Printf("ERROR: unsupported mapping at [%p-%p)\n", Addr, Addr+Size);
+    Die();
+  }
+  return Addr;
+}
+
+} // namespace __esan
+
+#endif // SANITIZER_FREEBSD || SANITIZER_LINUX
diff --git a/compiler-rt/lib/esan/esan_shadow.h b/compiler-rt/lib/esan/esan_shadow.h
index 4507c3d..f8f154e 100644
--- a/compiler-rt/lib/esan/esan_shadow.h
+++ b/compiler-rt/lib/esan/esan_shadow.h
@@ -40,6 +40,10 @@
 // library region to distinguish the vsyscall's shadow, considering this gap to
 // be an invalid app region.
 // We disallow application memory outside of those 5 regions.
+// Our regions assume that the stack rlimit is less than a terabyte (otherwise
+// the Linux kernel's default mmap region drops below 0x7e00'), which we enforce
+// at init time (we can support larger and unlimited sizes for shadow
+// scaledowns, but it is difficult for 1:1 mappings).
 //
 // Our shadow memory is scaled from a 1:1 mapping and supports a scale
 // specified at library initialization time that can be any power-of-2
@@ -103,6 +107,10 @@
 };
 static const u32 NumAppRegions = sizeof(AppRegions)/sizeof(AppRegions[0]);
 
+// See the comment above: we do not currently support a stack size rlimit
+// equal to or larger than 1TB.
+static const uptr MaxStackSize = (1ULL << 40) - 4096;
+
 class ShadowMapping {
 public:
   static const uptr Mask = 0x00000fffffffffffu;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
index 5aaedde..f78f507 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
@@ -298,6 +298,7 @@
 char **GetArgv();
 void PrintCmdline();
 bool StackSizeIsUnlimited();
+uptr GetStackSizeLimitInBytes();
 void SetStackSizeLimitInBytes(uptr limit);
 bool AddressSpaceIsUnlimited();
 void SetAddressSpaceUnlimited();
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cc b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cc
index 12c774d..f1e8b50 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cc
@@ -98,6 +98,10 @@
   return (stack_size == RLIM_INFINITY);
 }
 
+uptr GetStackSizeLimitInBytes() {
+  return (uptr)getlim(RLIMIT_STACK);
+}
+
 void SetStackSizeLimitInBytes(uptr limit) {
   setlim(RLIMIT_STACK, (rlim_t)limit);
   CHECK(!StackSizeIsUnlimited());
diff --git a/compiler-rt/test/esan/TestCases/large-stack-linux.c b/compiler-rt/test/esan/TestCases/large-stack-linux.c
new file mode 100644
index 0000000..856da2a
--- /dev/null
+++ b/compiler-rt/test/esan/TestCases/large-stack-linux.c
@@ -0,0 +1,74 @@
+// RUN: %clang_esan_wset -O0 %s -o %t 2>&1
+// RUN: %env_esan_opts=verbosity=1 %run %t %t 2>&1 | FileCheck %s
+
+#include <assert.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static void testChildStackLimit(rlim_t StackLimit, char *ToRun) {
+  int Res;
+  struct rlimit Limit;
+  Limit.rlim_cur = RLIM_INFINITY;
+  Limit.rlim_max = RLIM_INFINITY;
+  Res = setrlimit(RLIMIT_STACK, &Limit);
+  if (Res != 0) {
+    // Probably our environment had a large limit and we ourselves got
+    // re-execed and can no longer raise our limit.
+    // We have to bail and emulate the regular test.
+    // We'd prefer to have branches in our FileCheck output to ensure the
+    // initial program was re-execed but this is the best we can do for now.
+    fprintf(stderr, "in esan::initializeLibrary\n");
+    fprintf(stderr, "==1234==The stack size limit is beyond the maximum supported.\n");
+    fprintf(stderr, "Re-execing with a stack size below 1TB.\n");
+    fprintf(stderr, "in esan::initializeLibrary\n");
+    fprintf(stderr, "done\n");
+    fprintf(stderr, "in esan::finalizeLibrary\n");
+    return;
+  }
+
+  pid_t Child = fork();
+  assert(Child >= 0);
+  if (Child > 0) {
+    pid_t WaitRes = waitpid(Child, NULL, 0);
+    assert(WaitRes == Child);
+  } else {
+    char *Args[2];
+    Args[0] = ToRun;
+    Args[1] = NULL;
+    Res = execv(ToRun, Args);
+    assert(0); // Should not be reached.
+  }
+}
+
+int main(int argc, char *argv[]) {
+  // The path to the program to exec must be passed in the first time.
+  if (argc == 2) {
+    fprintf(stderr, "Testing child with infinite stack\n");
+    testChildStackLimit(RLIM_INFINITY, argv[1]);
+    fprintf(stderr, "Testing child with 1TB stack\n");
+    testChildStackLimit(1ULL << 40, argv[1]);
+  }
+  fprintf(stderr, "done\n");
+  // CHECK:      in esan::initializeLibrary
+  // CHECK:      Testing child with infinite stack
+  // CHECK-NEXT: in esan::initializeLibrary
+  // CHECK-NEXT: =={{[0-9]+}}==The stack size limit is beyond the maximum supported.
+  // CHECK-NEXT: Re-execing with a stack size below 1TB.
+  // CHECK-NEXT: in esan::initializeLibrary
+  // CHECK:      done
+  // CHECK:      in esan::finalizeLibrary
+  // CHECK:      Testing child with 1TB stack
+  // CHECK-NEXT: in esan::initializeLibrary
+  // CHECK-NEXT: =={{[0-9]+}}==The stack size limit is beyond the maximum supported.
+  // CHECK-NEXT: Re-execing with a stack size below 1TB.
+  // CHECK-NEXT: in esan::initializeLibrary
+  // CHECK:      done
+  // CHECK-NEXT: in esan::finalizeLibrary
+  // CHECK:      done
+  // CHECK-NEXT: in esan::finalizeLibrary
+  return 0;
+}
diff --git a/compiler-rt/test/esan/TestCases/mmap-shadow-conflict.c b/compiler-rt/test/esan/TestCases/mmap-shadow-conflict.c
index cb45bd1..4b3c58b 100644
--- a/compiler-rt/test/esan/TestCases/mmap-shadow-conflict.c
+++ b/compiler-rt/test/esan/TestCases/mmap-shadow-conflict.c
@@ -16,7 +16,8 @@
                    MAP_ANON|MAP_PRIVATE, -1, 0);
   fprintf(stderr, "mapped %p\n", Map);
   // CHECK:      in esan::initializeLibrary
-  // CHECK-NEXT: Shadow scale=2 offset=0x440000000000
+  // (There can be a re-exec for stack limit here.)
+  // CHECK:      Shadow scale=2 offset=0x440000000000
   // CHECK-NEXT: Shadow #0: [110000000000-114000000000) (256GB)
   // CHECK-NEXT: Shadow #1: [124000000000-12c000000000) (512GB)
   // CHECK-NEXT: Shadow #2: [148000000000-150000000000) (512GB)
diff --git a/compiler-rt/test/esan/TestCases/verbose-simple.c b/compiler-rt/test/esan/TestCases/verbose-simple.c
index e793f08..c136dc4 100644
--- a/compiler-rt/test/esan/TestCases/verbose-simple.c
+++ b/compiler-rt/test/esan/TestCases/verbose-simple.c
@@ -3,7 +3,8 @@
 
 int main(int argc, char **argv) {
   // CHECK:      in esan::initializeLibrary
-  // CHECK-NEXT: Shadow scale=2 offset=0x440000000000
+  // (There can be a re-exec for stack limit here.)
+  // CHECK:      Shadow scale=2 offset=0x440000000000
   // CHECK-NEXT: Shadow #0: [110000000000-114000000000) (256GB)
   // CHECK-NEXT: Shadow #1: [124000000000-12c000000000) (512GB)
   // CHECK-NEXT: Shadow #2: [148000000000-150000000000) (512GB)
diff --git a/compiler-rt/test/esan/TestCases/workingset-memset.cpp b/compiler-rt/test/esan/TestCases/workingset-memset.cpp
index 0c5db69..a0c36e3 100644
--- a/compiler-rt/test/esan/TestCases/workingset-memset.cpp
+++ b/compiler-rt/test/esan/TestCases/workingset-memset.cpp
@@ -8,12 +8,13 @@
 #include <string.h>
 
 int main(int argc, char **argv) {
-  const int iters = 630;
-  const int size = 64*iters;
-  char *p = (char *)malloc(size);
+  const int size = 128*1024*1024;
+  char *p = (char *)mmap(0, size, PROT_READ | PROT_WRITE,
+                         MAP_ANON | MAP_PRIVATE, -1, 0);
   // Test the slowpath at different cache line boundaries.
   for (int i = 0; i < 630; i++)
     memset((char *)p + 63*i, i, 63*i);
+  munmap(p, size);
   return 0;
   // FIXME: once the memory scan and size report is in place add it here.
   // CHECK: {{.*}}EfficiencySanitizer is not finished: nothing yet to report