Disable floating point code/linking when possible.

Unless heap profiling is enabled, disable floating point code and don't
link with libm.  This, in combination with e.g. EXTRA_CFLAGS=-mno-sse on
x64 systems, makes it possible to completely disable floating point
register use.  Some versions of glibc neglect to save/restore
caller-saved floating point registers during dynamic lazy symbol
loading, and the symbol loading code uses whatever malloc the
application happens to have linked/loaded with, the result being
potential floating point register corruption.
diff --git a/ChangeLog b/ChangeLog
index 90ab107..34d017e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -9,6 +9,13 @@
 * 3.5.0 (XXX)
 
   Bug fixes:
+  - Unless heap profiling is enabled, disable floating point code and don't link
+    with libm.  This, in combination with e.g. EXTRA_CFLAGS=-mno-sse on x64
+    systems, makes it possible to completely disable floating point register
+    use.  Some versions of glibc neglect to save/restore caller-saved floating
+    point registers during dynamic lazy symbol loading, and the symbol loading
+    code uses whatever malloc the application happens to have linked/loaded
+    with, the result being potential floating point register corruption.
   - Change the default private namespace prefix from empty to je_, and change
     --with-private-namespace-prefix so that it prepends an additional prefix
     rather than replacing je_.  This reduces the likelihood of applications
diff --git a/configure.ac b/configure.ac
index 7f0fecc..1103cc7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -356,11 +356,6 @@
 AC_SUBST([AROUT])
 AC_SUBST([CC_MM])
 
-if test "x$abi" != "xpecoff"; then
-  dnl Heap profiling uses the log(3) function.
-  LIBS="$LIBS -lm"
-fi
-
 JE_COMPILABLE([__attribute__ syntax],
               [static __attribute__((unused)) void foo(void){}],
               [],
@@ -774,6 +769,12 @@
     AC_MSG_ERROR([Heap profiling requires TLS]);
   fi
   force_tls="1"
+
+  if test "x$abi" != "xpecoff"; then
+    dnl Heap profiling uses the log(3) function.
+    LIBS="$LIBS -lm"
+  fi
+
   AC_DEFINE([JEMALLOC_PROF], [ ])
 fi
 AC_SUBST([enable_prof])
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 119a5b1..38a761b 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -320,6 +320,20 @@
 JEMALLOC_INLINE void
 prof_sample_threshold_update(prof_tdata_t *prof_tdata)
 {
+	/*
+	 * The body of this function is compiled out unless heap profiling is
+	 * enabled, so that it is possible to compile jemalloc with floating
+	 * point support completely disabled.  Avoiding floating point code is
+	 * important on memory-constrained systems, but it also enables a
+	 * workaround for versions of glibc that don't properly save/restore
+	 * floating point registers during dynamic lazy symbol loading (which
+	 * internally calls into whatever malloc implementation happens to be
+	 * integrated into the application).  Note that some compilers (e.g.
+	 * gcc 4.8) may use floating point registers for fast memory moves, so
+	 * jemalloc must be compiled with such optimizations disabled (e.g.
+	 * -mno-sse) in order for the workaround to be complete.
+	 */
+#ifdef JEMALLOC_PROF
 	uint64_t r;
 	double u;
 
@@ -349,6 +363,7 @@
 	prof_tdata->threshold = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
 	    + (uint64_t)1U;
+#endif
 }
 
 JEMALLOC_INLINE prof_ctx_t *