bpo-35214: Initial clang MemorySanitizer support (GH-10479)

Adds configure flags for msan and ubsan builds to make it easier to enable.
These also encode the detail that address sanitizer and memory sanitizer
should disable pymalloc.

Define MEMORY_SANITIZER when appropriate at build time and adds workarounds
to existing code to mark things as initialized where the sanitizer is otherwise unable to
determine that.  This lets our build succeed under the memory sanitizer.  not all tests
pass without sanitizer failures yet but we're in pretty good shape after this.
diff --git a/Include/Python.h b/Include/Python.h
index cf87a5c..bece208 100644
--- a/Include/Python.h
+++ b/Include/Python.h
@@ -53,6 +53,15 @@
 #include "pyport.h"
 #include "pymacro.h"
 
+/* A convenient way for code to know if clang's memory sanitizer is enabled. */
+#if defined(__has_feature)
+#  if __has_feature(memory_sanitizer)
+#    if !defined(MEMORY_SANITIZER)
+#      define MEMORY_SANITIZER
+#    endif
+#  endif
+#endif
+
 /* Debug-mode build with pymalloc implies PYMALLOC_DEBUG.
  *  PYMALLOC_DEBUG is in error if pymalloc is not in use.
  */
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-12-11-38-06.bpo-35214.PCHKbX.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-12-11-38-06.bpo-35214.PCHKbX.rst
new file mode 100644
index 0000000..c7842f3
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-12-11-38-06.bpo-35214.PCHKbX.rst
@@ -0,0 +1,4 @@
+The interpreter and extension modules have had annotations added so that
+they work properly under clang's Memory Sanitizer.  A new configure flag
+--with-memory-sanitizer has been added to make test builds of this nature
+easier to perform.
diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c
index ba154fe..33b7055 100644
--- a/Modules/_ctypes/callproc.c
+++ b/Modules/_ctypes/callproc.c
@@ -75,6 +75,10 @@
 #include <alloca.h>
 #endif
 
+#ifdef MEMORY_SANITIZER
+#include <sanitizer/msan_interface.h>
+#endif
+
 #if defined(_DEBUG) || defined(__MINGW32__)
 /* Don't use structured exception handling on Windows if this is defined.
    MingW, AFAIK, doesn't support it.
@@ -1125,6 +1129,13 @@
     rtype = _ctypes_get_ffi_type(restype);
     resbuf = alloca(max(rtype->size, sizeof(ffi_arg)));
 
+#ifdef MEMORY_SANITIZER
+    /* ffi_call actually initializes resbuf, but from asm, which
+     * MemorySanitizer can't detect. Avoid false positives from MSan. */
+    if (resbuf != NULL) {
+        __msan_unpoison(resbuf, max(rtype->size, sizeof(ffi_arg)));
+    }
+#endif
     avalues = (void **)alloca(sizeof(void *) * argcount);
     atypes = (ffi_type **)alloca(sizeof(ffi_type *) * argcount);
     if (!resbuf || !avalues || !atypes) {
diff --git a/Modules/_posixsubprocess.c b/Modules/_posixsubprocess.c
index 9661e38..7ee3f71 100644
--- a/Modules/_posixsubprocess.c
+++ b/Modules/_posixsubprocess.c
@@ -21,6 +21,10 @@
 #include <dirent.h>
 #endif
 
+#ifdef MEMORY_SANITIZER
+# include <sanitizer/msan_interface.h>
+#endif
+
 #if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64)
 # include <sys/linux-syscalls.h>
 # define SYS_getdents64  __NR_getdents64
@@ -287,6 +291,9 @@
                                 sizeof(buffer))) > 0) {
             struct linux_dirent64 *entry;
             int offset;
+#ifdef MEMORY_SANITIZER
+            __msan_unpoison(buffer, bytes);
+#endif
             for (offset = 0; offset < bytes; offset += entry->d_reclen) {
                 int fd;
                 entry = (struct linux_dirent64 *)(buffer + offset);
diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c
index 2f9c2f6..17bf3fa 100644
--- a/Modules/faulthandler.c
+++ b/Modules/faulthandler.c
@@ -1370,7 +1370,7 @@
 #ifdef HAVE_SIGALTSTACK
     if (stack.ss_sp != NULL) {
         /* Fetch the current alt stack */
-        stack_t current_stack;
+        stack_t current_stack = {};
         if (sigaltstack(NULL, &current_stack) == 0) {
             if (current_stack.ss_sp == stack.ss_sp) {
                 /* The current alt stack is the one that we installed.
diff --git a/Python/bootstrap_hash.c b/Python/bootstrap_hash.c
index 7b187f1..1b8f9d9 100644
--- a/Python/bootstrap_hash.c
+++ b/Python/bootstrap_hash.c
@@ -20,6 +20,10 @@
 #  endif
 #endif
 
+#ifdef MEMORY_SANITIZER
+#  include <sanitizer/msan_interface.h>
+#endif
+
 #ifdef Py_DEBUG
 int _Py_HashSecret_Initialized = 0;
 #else
@@ -143,6 +147,11 @@
         else {
             n = syscall(SYS_getrandom, dest, n, flags);
         }
+#  ifdef MEMORY_SANITIZER
+        if (n > 0) {
+             __msan_unpoison(dest, n);
+        }
+#  endif
 #endif
 
         if (n < 0) {
diff --git a/Python/pymath.c b/Python/pymath.c
index 6799d20..c1606bd 100644
--- a/Python/pymath.c
+++ b/Python/pymath.c
@@ -17,7 +17,9 @@
 
 /* inline assembly for getting and setting the 387 FPU control word on
    gcc/x86 */
-
+#ifdef MEMORY_SANITIZER
+__attribute__((no_sanitize_memory))
+#endif
 unsigned short _Py_get_387controlword(void) {
     unsigned short cw;
     __asm__ __volatile__ ("fnstcw %0" : "=m" (cw));
diff --git a/configure b/configure
index 3d66c1c..e9e33b3 100755
--- a/configure
+++ b/configure
@@ -819,6 +819,8 @@
 with_lto
 with_hash_algorithm
 with_address_sanitizer
+with_memory_sanitizer
+with_undefined_behavior_sanitizer
 with_libs
 with_system_expat
 with_system_ffi
@@ -1515,7 +1517,10 @@
   --with-hash-algorithm=[fnv|siphash24]
                           select hash algorithm
   --with-address-sanitizer
-                          enable AddressSanitizer
+                          enable AddressSanitizer (asan)
+  --with-memory-sanitizer enable MemorySanitizer (msan)
+  --with-undefined-behavior-sanitizer
+                          enable UndefinedBehaviorSanitizer (ubsan)
   --with-libs='lib1 ...'  link against additional libs
   --with-system-expat     build pyexpat module using an installed expat
                           library
@@ -10038,6 +10043,44 @@
 $as_echo "$withval" >&6; }
 BASECFLAGS="-fsanitize=address -fno-omit-frame-pointer $BASECFLAGS"
 LDFLAGS="-fsanitize=address $LDFLAGS"
+# ASan works by controlling memory allocation, our own malloc interferes.
+with_pymalloc="no"
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-memory-sanitizer" >&5
+$as_echo_n "checking for --with-memory-sanitizer... " >&6; }
+
+# Check whether --with-memory_sanitizer was given.
+if test "${with_memory_sanitizer+set}" = set; then :
+  withval=$with_memory_sanitizer;
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $withval" >&5
+$as_echo "$withval" >&6; }
+BASECFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-omit-frame-pointer $BASECFLAGS"
+LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2 $LDFLAGS"
+# MSan works by controlling memory allocation, our own malloc interferes.
+with_pymalloc="no"
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-undefined-behavior-sanitizer" >&5
+$as_echo_n "checking for --with-undefined-behavior-sanitizer... " >&6; }
+
+# Check whether --with-undefined_behavior_sanitizer was given.
+if test "${with_undefined_behavior_sanitizer+set}" = set; then :
+  withval=$with_undefined_behavior_sanitizer;
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $withval" >&5
+$as_echo "$withval" >&6; }
+BASECFLAGS="-fsanitize=undefined $BASECFLAGS"
+LDFLAGS="-fsanitize=undefined $LDFLAGS"
 
 else
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
diff --git a/configure.ac b/configure.ac
index d1502df..15d03ba 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2854,11 +2854,37 @@
 AC_MSG_CHECKING(for --with-address-sanitizer)
 AC_ARG_WITH(address_sanitizer,
             AS_HELP_STRING([--with-address-sanitizer],
-                           [enable AddressSanitizer]),
+                           [enable AddressSanitizer (asan)]),
 [
 AC_MSG_RESULT($withval)
 BASECFLAGS="-fsanitize=address -fno-omit-frame-pointer $BASECFLAGS"
 LDFLAGS="-fsanitize=address $LDFLAGS"
+# ASan works by controlling memory allocation, our own malloc interferes.
+with_pymalloc="no"
+],
+[AC_MSG_RESULT(no)])
+
+AC_MSG_CHECKING(for --with-memory-sanitizer)
+AC_ARG_WITH(memory_sanitizer,
+            AS_HELP_STRING([--with-memory-sanitizer],
+                           [enable MemorySanitizer (msan)]),
+[
+AC_MSG_RESULT($withval)
+BASECFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-omit-frame-pointer $BASECFLAGS"
+LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2 $LDFLAGS"
+# MSan works by controlling memory allocation, our own malloc interferes.
+with_pymalloc="no"
+],
+[AC_MSG_RESULT(no)])
+
+AC_MSG_CHECKING(for --with-undefined-behavior-sanitizer)
+AC_ARG_WITH(undefined_behavior_sanitizer,
+            AS_HELP_STRING([--with-undefined-behavior-sanitizer],
+                           [enable UndefinedBehaviorSanitizer (ubsan)]),
+[
+AC_MSG_RESULT($withval)
+BASECFLAGS="-fsanitize=undefined $BASECFLAGS"
+LDFLAGS="-fsanitize=undefined $LDFLAGS"
 ],
 [AC_MSG_RESULT(no)])