Update aosp/master compiler-rt for rebase to r256229

http://b/26987366

Change-Id: I0ca3d7d3f1b7926fcffcb5b467e79958de576437
diff --git a/lib/builtins/CMakeLists.txt b/lib/builtins/CMakeLists.txt
index d79feae..5ffad1d 100644
--- a/lib/builtins/CMakeLists.txt
+++ b/lib/builtins/CMakeLists.txt
@@ -2,6 +2,9 @@
 # generic implementations of the core runtime library along with optimized
 # architecture-specific code in various subdirectories.
 
+# TODO: Need to add a mechanism for logging errors when builtin source files are
+# added to a sub-directory and not this CMakeLists file.
+
 set(GENERIC_SOURCES
   absvdi2.c
   absvsi2.c
@@ -38,6 +41,7 @@
   divsc3.c
   divsf3.c
   divsi3.c
+  divtc3.c
   divti3.c
   divtf3.c
   divxc3.c
@@ -139,47 +143,102 @@
   umodsi3.c
   umodti3.c)
 
+if(APPLE)
+  set(GENERIC_SOURCES
+    ${GENERIC_SOURCES}
+    atomic_flag_clear.c
+    atomic_flag_clear_explicit.c
+    atomic_flag_test_and_set.c
+    atomic_flag_test_and_set_explicit.c
+    atomic_signal_fence.c
+    atomic_thread_fence.c)
+endif()
+
+if(NOT WIN32 OR MINGW)
+  set(GENERIC_SOURCES
+      ${GENERIC_SOURCES}
+      emutls.c)
+endif()
+
 if (HAVE_UNWIND_H)
   set(GENERIC_SOURCES
       ${GENERIC_SOURCES}
       gcc_personality_v0.c)
 endif ()
 
-set(x86_64_SOURCES
-  x86_64/floatdidf.c
-  x86_64/floatdisf.c
-  x86_64/floatdixf.c
-  x86_64/floatundidf.S
-  x86_64/floatundisf.S
-  x86_64/floatundixf.S
-  ${GENERIC_SOURCES})
+if (NOT MSVC)
+  set(x86_64_SOURCES
+      x86_64/chkstk.S
+      x86_64/chkstk2.S
+      x86_64/floatdidf.c
+      x86_64/floatdisf.c
+      x86_64/floatdixf.c
+      x86_64/floatundidf.S
+      x86_64/floatundisf.S
+      x86_64/floatundixf.S
+      ${GENERIC_SOURCES})
+  set(x86_64h_SOURCES ${x86_64_SOURCES})
 
-set(i386_SOURCES
-  i386/ashldi3.S
-  i386/ashrdi3.S
-  i386/divdi3.S
-  i386/floatdidf.S
-  i386/floatdisf.S
-  i386/floatdixf.S
-  i386/floatundidf.S
-  i386/floatundisf.S
-  i386/floatundixf.S
-  i386/lshrdi3.S
-  i386/moddi3.S
-  i386/muldi3.S
-  i386/udivdi3.S
-  i386/umoddi3.S
-  ${GENERIC_SOURCES})
+  if (WIN32)
+    set(x86_64_SOURCES
+        ${x86_64_SOURCES}
+        x86_64/chkstk.S
+        x86_64/chkstk2.S)
+  endif()
 
-set(i686_SOURCES
-  ${i386_SOURCES})
+  set(i386_SOURCES
+      i386/ashldi3.S
+      i386/ashrdi3.S
+      i386/chkstk.S
+      i386/chkstk2.S
+      i386/divdi3.S
+      i386/floatdidf.S
+      i386/floatdisf.S
+      i386/floatdixf.S
+      i386/floatundidf.S
+      i386/floatundisf.S
+      i386/floatundixf.S
+      i386/lshrdi3.S
+      i386/moddi3.S
+      i386/muldi3.S
+      i386/udivdi3.S
+      i386/umoddi3.S
+      ${GENERIC_SOURCES})
+
+  if (WIN32)
+    set(i386_SOURCES
+        ${i386_SOURCES}
+        i386/chkstk.S
+        i386/chkstk2.S)
+  endif()
+
+  set(i686_SOURCES
+      ${i386_SOURCES})
+else () # MSVC
+  # Use C versions of functions when building on MSVC
+  # MSVC's assembler takes Intel syntax, not AT&T syntax
+  set(x86_64_SOURCES
+      x86_64/floatdidf.c
+      x86_64/floatdisf.c
+      x86_64/floatdixf.c
+      ${GENERIC_SOURCES})
+  set(x86_64h_SOURCES ${x86_64_SOURCES})
+  set(i386_SOURCES ${GENERIC_SOURCES})
+  set(i686_SOURCES ${i386_SOURCES})
+endif () # if (NOT MSVC)
 
 set(arm_SOURCES
   arm/adddf3vfp.S
   arm/addsf3vfp.S
+  arm/aeabi_cdcmp.S
+  arm/aeabi_cdcmpeq_check_nan.c
+  arm/aeabi_cfcmp.S
+  arm/aeabi_cfcmpeq_check_nan.c
   arm/aeabi_dcmp.S
   arm/aeabi_div0.c
+  arm/aeabi_drsub.c
   arm/aeabi_fcmp.S
+  arm/aeabi_frsub.c
   arm/aeabi_idivmod.S
   arm/aeabi_ldivmod.S
   arm/aeabi_memcmp.S
@@ -190,6 +249,8 @@
   arm/aeabi_uldivmod.S
   arm/bswapdi2.S
   arm/bswapsi2.S
+  arm/clzdi2.S
+  arm/clzsi2.S
   arm/comparesf2.S
   arm/divdf3vfp.S
   arm/divmodsi4.S
@@ -258,10 +319,50 @@
   arm/unordsf2vfp.S
   ${GENERIC_SOURCES})
 
+set(aarch64_SOURCES
+  comparetf2.c
+  extenddftf2.c
+  extendsftf2.c
+  fixtfdi.c
+  fixtfsi.c
+  fixtfti.c
+  fixunstfdi.c
+  fixunstfsi.c
+  fixunstfti.c
+  floatditf.c
+  floatsitf.c
+  floatunditf.c
+  floatunsitf.c
+  multc3.c
+  trunctfdf2.c
+  trunctfsf2.c
+  ${GENERIC_SOURCES})
+
+set(armhf_SOURCES ${arm_SOURCES})
+set(armv7_SOURCES ${arm_SOURCES})
+set(armv7s_SOURCES ${arm_SOURCES})
+set(arm64_SOURCES ${aarch64_SOURCES})
+
+# macho_embedded archs
+set(armv6m_SOURCES ${GENERIC_SOURCES})
+set(armv7m_SOURCES ${arm_SOURCES})
+set(armv7em_SOURCES ${arm_SOURCES})
+
+set(mips_SOURCES ${GENERIC_SOURCES})
+set(mipsel_SOURCES ${mips_SOURCES})
+set(mips64_SOURCES ${mips_SOURCES})
+set(mips64el_SOURCES ${mips_SOURCES})
+
 add_custom_target(builtins)
 
-if (NOT WIN32)
-  foreach (arch x86_64 i386 i686 arm)
+if (APPLE)
+  add_subdirectory(Darwin-excludes)
+  add_subdirectory(macho_embedded)
+  darwin_add_builtin_libraries(${BUILTIN_SUPPORTED_OS})
+elseif (NOT WIN32 OR MINGW)
+  append_string_if(COMPILER_RT_HAS_STD_C99_FLAG -std=c99 maybe_stdc99)
+
+  foreach (arch ${BUILTIN_SUPPORTED_ARCH})
     if (CAN_TARGET_${arch})
       # Filter out generic versions of routines that are re-implemented in
       # architecture specific manner.  This prevents multiple definitions of the
@@ -274,11 +375,12 @@
         endif ()
       endforeach ()
 
-      set_source_files_properties(${${arch}_SOURCES} PROPERTIES LANGUAGE C)
-      add_compiler_rt_runtime(clang_rt.builtins-${arch} ${arch} STATIC
+      add_compiler_rt_runtime(clang_rt.builtins
+                              STATIC
+                              ARCHS ${arch}
                               SOURCES ${${arch}_SOURCES}
-                              CFLAGS "-std=c99")
-      add_dependencies(builtins clang_rt.builtins-${arch})
+                              CFLAGS ${maybe_stdc99}
+                              PARENT_TARGET builtins)
     endif ()
   endforeach ()
 endif ()
diff --git a/lib/builtins/Darwin-excludes/10.4-x86_64.txt b/lib/builtins/Darwin-excludes/10.4-x86_64.txt
new file mode 100644
index 0000000..f2ee7fe
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/10.4-x86_64.txt
@@ -0,0 +1,35 @@
+absvti2
+addvti3
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divti3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+muloti4
+multi3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+subvti3
+ucmpti2
+udivmodti4
+udivti3
+umodti3
diff --git a/lib/builtins/Darwin-excludes/10.4.txt b/lib/builtins/Darwin-excludes/10.4.txt
new file mode 100644
index 0000000..70d3644
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/10.4.txt
@@ -0,0 +1,96 @@
+apple_versioning
+absvdi2
+absvsi2
+adddf3
+addsf3
+addvdi3
+addvsi3
+ashldi3
+ashrdi3
+clear_cache
+clzdi2
+clzsi2
+cmpdi2
+ctzdi2
+ctzsi2
+divdc3
+divdf3
+divdi3
+divmoddi4
+divmodsi4
+divsc3
+divsf3
+divsi3
+divxc3
+enable_execute_stack
+comparedf2
+comparesf2
+extendhfsf2
+extendsfdf2
+ffsdi2
+fixdfdi
+fixdfsi
+fixsfdi
+fixsfsi
+fixunsdfdi
+fixunsdfsi
+fixunssfdi
+fixunssfsi
+fixunsxfdi
+fixunsxfsi
+fixxfdi
+floatdidf
+floatdisf
+floatdixf
+floatsidf
+floatsisf
+floatunsidf
+floatunsisf
+gcc_personality_v0
+gnu_f2h_ieee
+gnu_h2f_ieee
+lshrdi3
+moddi3
+modsi3
+muldc3
+muldf3
+muldi3
+mulodi4
+mulosi4
+mulsc3
+mulsf3
+mulvdi3
+mulvsi3
+mulxc3
+negdf2
+negdi2
+negsf2
+negvdi2
+negvsi2
+paritydi2
+paritysi2
+popcountdi2
+popcountsi2
+powidf2
+powisf2
+powixf2
+subdf3
+subsf3
+subvdi3
+subvsi3
+truncdfhf2
+truncdfsf2
+truncsfhf2
+ucmpdi2
+udivdi3
+udivmoddi4
+udivmodsi4
+udivsi3
+umoddi3
+umodsi3
+atomic_flag_clear
+atomic_flag_clear_explicit
+atomic_flag_test_and_set
+atomic_flag_test_and_set_explicit
+atomic_signal_fence
+atomic_thread_fence
\ No newline at end of file
diff --git a/lib/builtins/Darwin-excludes/CMakeLists.txt b/lib/builtins/Darwin-excludes/CMakeLists.txt
new file mode 100644
index 0000000..266e422
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/CMakeLists.txt
@@ -0,0 +1,4 @@
+file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt)
+foreach(filter_file ${filter_files})
+  set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file})
+endforeach()
diff --git a/lib/builtins/Darwin-excludes/README.TXT b/lib/builtins/Darwin-excludes/README.TXT
new file mode 100644
index 0000000..173eccc
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/README.TXT
@@ -0,0 +1,11 @@
+This folder contains list of symbols that should be excluded from the builtin
+libraries for Darwin. There are two reasons symbols are excluded:
+
+(1) They aren't supported on Darwin
+(2) They are contained within the OS on the minimum supported target
+
+The builtin libraries must contain all symbols not provided by the lowest
+supported target OS. Meaning if minimum deployment target is iOS 6, all builtins
+not included in the ios6-<arch>.txt files need to be included. The one catch is
+that this is per-architecture. Since iOS 6 doesn't support arm64, when supporting
+iOS 6, the minimum deployment target for arm64 binaries is iOS 7.
diff --git a/lib/builtins/Darwin-excludes/ios-armv7.txt b/lib/builtins/Darwin-excludes/ios-armv7.txt
new file mode 100644
index 0000000..6aa542f
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/ios-armv7.txt
@@ -0,0 +1,57 @@
+absvti2
+addtf3
+addvti3
+aeabi_cdcmp
+aeabi_cdcmpeq_check_nan
+aeabi_cfcmp
+aeabi_cfcmpeq_check_nan
+aeabi_dcmp
+aeabi_div0
+aeabi_drsub
+aeabi_fcmp
+aeabi_frsub
+aeabi_idivmod
+aeabi_ldivmod
+aeabi_memcmp
+aeabi_memcpy
+aeabi_memmove
+aeabi_memset
+aeabi_uidivmod
+aeabi_uldivmod
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divtf3
+divti3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+multf3
+multi3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subtf3
+subvti3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
diff --git a/lib/builtins/Darwin-excludes/ios-armv7s.txt b/lib/builtins/Darwin-excludes/ios-armv7s.txt
new file mode 100644
index 0000000..28167aa
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/ios-armv7s.txt
@@ -0,0 +1,57 @@
+absvti2
+addtf3
+addvti3
+aeabi_cdcmp
+aeabi_cdcmpeq_check_nan
+aeabi_cfcmp
+aeabi_cfcmpeq_check_nan
+aeabi_dcmp
+aeabi_div0
+aeabi_drsub
+aeabi_fcmp
+aeabi_frsub
+aeabi_idivmod
+aeabi_ldivmod
+aeabi_memcmp
+aeabi_memcpy
+aeabi_memmove
+aeabi_memset
+aeabi_uidivmod
+aeabi_uldivmod
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divtf3
+divti3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+multf
+multi3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subtf3
+subvti3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
diff --git a/lib/builtins/Darwin-excludes/ios.txt b/lib/builtins/Darwin-excludes/ios.txt
new file mode 100644
index 0000000..5db2400
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/ios.txt
@@ -0,0 +1 @@
+apple_versioning
diff --git a/lib/builtins/Darwin-excludes/ios6-armv7.txt b/lib/builtins/Darwin-excludes/ios6-armv7.txt
new file mode 100644
index 0000000..b01fa71
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/ios6-armv7.txt
@@ -0,0 +1,120 @@
+absvdi2
+absvsi2
+adddf3
+adddf3vfp
+addsf3
+addsf3vfp
+addvdi3
+addvsi3
+ashldi3
+ashrdi3
+bswapdi2
+bswapsi2
+clzdi2
+clzsi2
+cmpdi2
+ctzdi2
+ctzsi2
+divdc3
+divdf3
+divdf3vfp
+divdi3
+divmodsi4
+divsc3
+divsf3
+divsf3vfp
+divsi3
+eqdf2
+eqdf2vfp
+eqsf2
+eqsf2vfp
+extendsfdf2
+extendsfdf2vfp
+ffsdi2
+fixdfdi
+fixdfsi
+fixdfsivfp
+fixsfdi
+fixsfsi
+fixsfsivfp
+fixunsdfdi
+fixunsdfsi
+fixunsdfsivfp
+fixunssfdi
+fixunssfsi
+fixunssfsivfp
+floatdidf
+floatdisf
+floatsidf
+floatsidfvfp
+floatsisf
+floatsisfvfp
+floatundidf
+floatundisf
+floatunsidf
+floatunsisf
+floatunssidfvfp
+floatunssisfvfp
+gcc_personality_sj0
+gedf2
+gedf2vfp
+gesf2
+gesf2vfp
+gtdf2
+gtdf2vfp
+gtsf2
+gtsf2vfp
+ledf2
+ledf2vfp
+lesf2
+lesf2vfp
+lshrdi3
+ltdf2
+ltdf2vfp
+ltsf2
+ltsf2vfp
+moddi3
+modsi3
+muldc3
+muldf3
+muldf3vfp
+muldi3
+mulodi4
+mulosi4
+mulsc3
+mulsf3
+mulsf3vfp
+mulvdi3
+mulvsi3
+nedf2
+nedf2vfp
+negdi2
+negvdi2
+negvsi2
+nesf2
+nesf2vfp
+paritydi2
+paritysi2
+popcountdi2
+popcountsi2
+powidf2
+powisf2
+subdf3
+subdf3vfp
+subsf3
+subsf3vfp
+subvdi3
+subvsi3
+truncdfsf2
+truncdfsf2vfp
+ucmpdi2
+udivdi3
+udivmoddi4
+udivmodsi4
+udivsi3
+umoddi3
+umodsi3
+unorddf2
+unorddf2vfp
+unordsf2
+unordsf2vfp
diff --git a/lib/builtins/Darwin-excludes/ios6-armv7s.txt b/lib/builtins/Darwin-excludes/ios6-armv7s.txt
new file mode 100644
index 0000000..b01fa71
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/ios6-armv7s.txt
@@ -0,0 +1,120 @@
+absvdi2
+absvsi2
+adddf3
+adddf3vfp
+addsf3
+addsf3vfp
+addvdi3
+addvsi3
+ashldi3
+ashrdi3
+bswapdi2
+bswapsi2
+clzdi2
+clzsi2
+cmpdi2
+ctzdi2
+ctzsi2
+divdc3
+divdf3
+divdf3vfp
+divdi3
+divmodsi4
+divsc3
+divsf3
+divsf3vfp
+divsi3
+eqdf2
+eqdf2vfp
+eqsf2
+eqsf2vfp
+extendsfdf2
+extendsfdf2vfp
+ffsdi2
+fixdfdi
+fixdfsi
+fixdfsivfp
+fixsfdi
+fixsfsi
+fixsfsivfp
+fixunsdfdi
+fixunsdfsi
+fixunsdfsivfp
+fixunssfdi
+fixunssfsi
+fixunssfsivfp
+floatdidf
+floatdisf
+floatsidf
+floatsidfvfp
+floatsisf
+floatsisfvfp
+floatundidf
+floatundisf
+floatunsidf
+floatunsisf
+floatunssidfvfp
+floatunssisfvfp
+gcc_personality_sj0
+gedf2
+gedf2vfp
+gesf2
+gesf2vfp
+gtdf2
+gtdf2vfp
+gtsf2
+gtsf2vfp
+ledf2
+ledf2vfp
+lesf2
+lesf2vfp
+lshrdi3
+ltdf2
+ltdf2vfp
+ltsf2
+ltsf2vfp
+moddi3
+modsi3
+muldc3
+muldf3
+muldf3vfp
+muldi3
+mulodi4
+mulosi4
+mulsc3
+mulsf3
+mulsf3vfp
+mulvdi3
+mulvsi3
+nedf2
+nedf2vfp
+negdi2
+negvdi2
+negvsi2
+nesf2
+nesf2vfp
+paritydi2
+paritysi2
+popcountdi2
+popcountsi2
+powidf2
+powisf2
+subdf3
+subdf3vfp
+subsf3
+subsf3vfp
+subvdi3
+subvsi3
+truncdfsf2
+truncdfsf2vfp
+ucmpdi2
+udivdi3
+udivmoddi4
+udivmodsi4
+udivsi3
+umoddi3
+umodsi3
+unorddf2
+unorddf2vfp
+unordsf2
+unordsf2vfp
diff --git a/lib/builtins/Darwin-excludes/ios7-arm64.txt b/lib/builtins/Darwin-excludes/ios7-arm64.txt
new file mode 100644
index 0000000..5e4caf9
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/ios7-arm64.txt
@@ -0,0 +1,16 @@
+clzti2
+divti3
+fixdfti
+fixsfti
+fixunsdfti
+floattidf
+floattisf
+floatuntidf
+floatuntisf
+gcc_personality_v0
+modti3
+powidf2
+powisf2
+udivmodti4
+udivti3
+umodti3
diff --git a/lib/builtins/Darwin-excludes/iossim-i386.txt b/lib/builtins/Darwin-excludes/iossim-i386.txt
new file mode 100644
index 0000000..60c0e2d
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/iossim-i386.txt
@@ -0,0 +1,82 @@
+absvti2
+addtf3
+addvti3
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divti3
+divtf3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+muloti4
+multi3
+multf3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subvti3
+subtf3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
+absvti2
+addtf3
+addvti3
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divti3
+divtf3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+muloti4
+multi3
+multf3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subvti3
+subtf3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
diff --git a/lib/builtins/Darwin-excludes/iossim-x86_64.txt b/lib/builtins/Darwin-excludes/iossim-x86_64.txt
new file mode 100644
index 0000000..de1574e
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/iossim-x86_64.txt
@@ -0,0 +1,12 @@
+addtf3
+divtf3
+multf3
+powitf2
+subtf3
+trampoline_setup
+addtf3
+divtf3
+multf3
+powitf2
+subtf3
+trampoline_setup
diff --git a/lib/builtins/Darwin-excludes/iossim.txt b/lib/builtins/Darwin-excludes/iossim.txt
new file mode 100644
index 0000000..5db2400
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/iossim.txt
@@ -0,0 +1 @@
+apple_versioning
diff --git a/lib/builtins/Darwin-excludes/osx-i386.txt b/lib/builtins/Darwin-excludes/osx-i386.txt
new file mode 100644
index 0000000..60c0e2d
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/osx-i386.txt
@@ -0,0 +1,82 @@
+absvti2
+addtf3
+addvti3
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divti3
+divtf3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+muloti4
+multi3
+multf3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subvti3
+subtf3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
+absvti2
+addtf3
+addvti3
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divti3
+divtf3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+muloti4
+multi3
+multf3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subvti3
+subtf3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
diff --git a/lib/builtins/Darwin-excludes/osx-x86_64.txt b/lib/builtins/Darwin-excludes/osx-x86_64.txt
new file mode 100644
index 0000000..de1574e
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/osx-x86_64.txt
@@ -0,0 +1,12 @@
+addtf3
+divtf3
+multf3
+powitf2
+subtf3
+trampoline_setup
+addtf3
+divtf3
+multf3
+powitf2
+subtf3
+trampoline_setup
diff --git a/lib/builtins/Darwin-excludes/osx.txt b/lib/builtins/Darwin-excludes/osx.txt
new file mode 100644
index 0000000..5db2400
--- /dev/null
+++ b/lib/builtins/Darwin-excludes/osx.txt
@@ -0,0 +1 @@
+apple_versioning
diff --git a/lib/builtins/README.txt b/lib/builtins/README.txt
index 1c08e74..ad36e4e 100644
--- a/lib/builtins/README.txt
+++ b/lib/builtins/README.txt
@@ -220,7 +220,9 @@
 // for use with some implementations of assert() in <assert.h>
 void __eprintf(const char* format, const char* assertion_expression,
 				const char* line, const char* file);
-				
+
+// for systems with emulated thread local storage
+void* __emutls_get_address(struct __emutls_control*);
 
 
 //   Power PC specific functions
diff --git a/lib/builtins/arm/aeabi_cdcmp.S b/lib/builtins/arm/aeabi_cdcmp.S
new file mode 100644
index 0000000..036a6f5
--- /dev/null
+++ b/lib/builtins/arm/aeabi_cdcmp.S
@@ -0,0 +1,96 @@
+//===-- aeabi_cdcmp.S - EABI cdcmp* implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error big endian support not implemented
+#endif
+
+#define APSR_Z (1 << 30)
+#define APSR_C (1 << 29)
+
+// void __aeabi_cdcmpeq(double a, double b) {
+//   if (isnan(a) || isnan(b)) {
+//     Z = 0; C = 1;
+//   } else {
+//     __aeabi_cdcmple(a, b);
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
+        push {r0-r3, lr}
+        bl __aeabi_cdcmpeq_check_nan
+        cmp r0, #1
+        pop {r0-r3, lr}
+
+        // NaN has been ruled out, so __aeabi_cdcmple can't trap
+        bne __aeabi_cdcmple
+
+        msr CPSR_f, #APSR_C
+        JMP(lr)
+END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
+
+
+// void __aeabi_cdcmple(double a, double b) {
+//   if (__aeabi_dcmplt(a, b)) {
+//     Z = 0; C = 0;
+//   } else if (__aeabi_dcmpeq(a, b)) {
+//     Z = 1; C = 1;
+//   } else {
+//     Z = 0; C = 1;
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple)
+        // Per the RTABI, this function must preserve r0-r11.
+        // Save lr in the same instruction for compactness
+        push {r0-r3, lr}
+
+        bl __aeabi_dcmplt
+        cmp r0, #1
+        moveq ip, #0
+        beq 1f
+
+        ldm sp, {r0-r3}
+        bl __aeabi_dcmpeq
+        cmp r0, #1
+        moveq ip, #(APSR_C | APSR_Z)
+        movne ip, #(APSR_C)
+
+1:
+        msr CPSR_f, ip
+        pop {r0-r3}
+        POP_PC()
+END_COMPILERRT_FUNCTION(__aeabi_cdcmple)
+
+// int __aeabi_cdrcmple(double a, double b) {
+//   return __aeabi_cdcmple(b, a);
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
+        // Swap r0 and r2
+        mov ip, r0
+        mov r0, r2
+        mov r2, ip
+
+        // Swap r1 and r3
+        mov ip, r1
+        mov r1, r3
+        mov r3, ip
+
+        b __aeabi_cdcmple
+END_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
+
diff --git a/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c b/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
new file mode 100644
index 0000000..577f6b2
--- /dev/null
+++ b/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
@@ -0,0 +1,16 @@
+//===-- lib/arm/aeabi_cdcmpeq_helper.c - Helper for cdcmpeq ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+__attribute__((pcs("aapcs")))
+__attribute__((visibility("hidden")))
+int __aeabi_cdcmpeq_check_nan(double a, double b) {
+    return __builtin_isnan(a) || __builtin_isnan(b);
+}
diff --git a/lib/builtins/arm/aeabi_cfcmp.S b/lib/builtins/arm/aeabi_cfcmp.S
new file mode 100644
index 0000000..43594e5
--- /dev/null
+++ b/lib/builtins/arm/aeabi_cfcmp.S
@@ -0,0 +1,91 @@
+//===-- aeabi_cfcmp.S - EABI cfcmp* implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error big endian support not implemented
+#endif
+
+#define APSR_Z (1 << 30)
+#define APSR_C (1 << 29)
+
+// void __aeabi_cfcmpeq(float a, float b) {
+//   if (isnan(a) || isnan(b)) {
+//     Z = 0; C = 1;
+//   } else {
+//     __aeabi_cfcmple(a, b);
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
+        push {r0-r3, lr}
+        bl __aeabi_cfcmpeq_check_nan
+        cmp r0, #1
+        pop {r0-r3, lr}
+
+        // NaN has been ruled out, so __aeabi_cfcmple can't trap
+        bne __aeabi_cfcmple
+
+        msr CPSR_f, #APSR_C
+        JMP(lr)
+END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
+
+
+// void __aeabi_cfcmple(float a, float b) {
+//   if (__aeabi_fcmplt(a, b)) {
+//     Z = 0; C = 0;
+//   } else if (__aeabi_fcmpeq(a, b)) {
+//     Z = 1; C = 1;
+//   } else {
+//     Z = 0; C = 1;
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple)
+        // Per the RTABI, this function must preserve r0-r11.
+        // Save lr in the same instruction for compactness
+        push {r0-r3, lr}
+
+        bl __aeabi_fcmplt
+        cmp r0, #1
+        moveq ip, #0
+        beq 1f
+
+        ldm sp, {r0-r3}
+        bl __aeabi_fcmpeq
+        cmp r0, #1
+        moveq ip, #(APSR_C | APSR_Z)
+        movne ip, #(APSR_C)
+
+1:
+        msr CPSR_f, ip
+        pop {r0-r3}
+        POP_PC()
+END_COMPILERRT_FUNCTION(__aeabi_cfcmple)
+
+// int __aeabi_cfrcmple(float a, float b) {
+//   return __aeabi_cfcmple(b, a);
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
+        // Swap r0 and r1
+        mov ip, r0
+        mov r0, r1
+        mov r1, ip
+
+        b __aeabi_cfcmple
+END_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
+
diff --git a/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c b/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
new file mode 100644
index 0000000..992e31f
--- /dev/null
+++ b/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
@@ -0,0 +1,16 @@
+//===-- lib/arm/aeabi_cfcmpeq_helper.c - Helper for cdcmpeq ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+__attribute__((pcs("aapcs")))
+__attribute__((visibility("hidden")))
+int __aeabi_cfcmpeq_check_nan(float a, float b) {
+    return __builtin_isnan(a) || __builtin_isnan(b);
+}
diff --git a/lib/builtins/arm/aeabi_drsub.c b/lib/builtins/arm/aeabi_drsub.c
new file mode 100644
index 0000000..fc17d5a
--- /dev/null
+++ b/lib/builtins/arm/aeabi_drsub.c
@@ -0,0 +1,19 @@
+//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "../fp_lib.h"
+
+COMPILER_RT_ABI fp_t
+__aeabi_dsub(fp_t, fp_t);
+
+COMPILER_RT_ABI fp_t
+__aeabi_drsub(fp_t a, fp_t b) {
+    return __aeabi_dsub(b, a);
+}
diff --git a/lib/builtins/arm/aeabi_frsub.c b/lib/builtins/arm/aeabi_frsub.c
new file mode 100644
index 0000000..64258dc
--- /dev/null
+++ b/lib/builtins/arm/aeabi_frsub.c
@@ -0,0 +1,19 @@
+//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "../fp_lib.h"
+
+COMPILER_RT_ABI fp_t
+__aeabi_fsub(fp_t, fp_t);
+
+COMPILER_RT_ABI fp_t
+__aeabi_frsub(fp_t a, fp_t b) {
+    return __aeabi_fsub(b, a);
+}
diff --git a/lib/builtins/assembly.h b/lib/builtins/assembly.h
index 8bb0ddc..c289705 100644
--- a/lib/builtins/assembly.h
+++ b/lib/builtins/assembly.h
@@ -73,6 +73,15 @@
 #define JMPc(r, c) mov##c pc, r
 #endif
 
+// pop {pc} can't switch Thumb mode on ARMv4T
+#if __ARM_ARCH >= 5
+#define POP_PC() pop {pc}
+#else
+#define POP_PC()                                                               \
+  pop {ip};                                                                    \
+  JMP(ip)
+#endif
+
 #if __ARM_ARCH_ISA_THUMB == 2
 #define IT(cond)  it cond
 #define ITT(cond) itt cond
diff --git a/lib/builtins/atomic.c b/lib/builtins/atomic.c
index 35c8837..f1ddc3e 100644
--- a/lib/builtins/atomic.c
+++ b/lib/builtins/atomic.c
@@ -56,13 +56,13 @@
 #include <machine/atomic.h>
 #include <sys/umtx.h>
 typedef struct _usem Lock;
-inline static void unlock(Lock *l) {
+__inline static void unlock(Lock *l) {
   __c11_atomic_store((_Atomic(uint32_t)*)&l->_count, 1, __ATOMIC_RELEASE);
   __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
   if (l->_has_waiters)
       _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0);
 }
-inline static void lock(Lock *l) {
+__inline static void lock(Lock *l) {
   uint32_t old = 1;
   while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t)*)&l->_count, &old,
         0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
@@ -76,12 +76,12 @@
 #elif defined(__APPLE__)
 #include <libkern/OSAtomic.h>
 typedef OSSpinLock Lock;
-inline static void unlock(Lock *l) {
+__inline static void unlock(Lock *l) {
   OSSpinLockUnlock(l);
 }
 /// Locks a lock.  In the current implementation, this is potentially
 /// unbounded in the contended case.
-inline static void lock(Lock *l) {  
+__inline static void lock(Lock *l) {
   OSSpinLockLock(l);
 }
 static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0
@@ -89,12 +89,12 @@
 #else
 typedef _Atomic(uintptr_t) Lock;
 /// Unlock a lock.  This is a release operation.
-inline static void unlock(Lock *l) {
+__inline static void unlock(Lock *l) {
   __c11_atomic_store(l, 0, __ATOMIC_RELEASE);
 }
 /// Locks a lock.  In the current implementation, this is potentially
 /// unbounded in the contended case.
-inline static void lock(Lock *l) {
+__inline static void lock(Lock *l) {
   uintptr_t old = 0;
   while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE,
         __ATOMIC_RELAXED))
@@ -106,7 +106,7 @@
 
 
 /// Returns a lock to use for a given pointer.  
-static inline Lock *lock_for_pointer(void *ptr) {
+static __inline Lock *lock_for_pointer(void *ptr) {
   intptr_t hash = (intptr_t)ptr;
   // Disregard the lowest 4 bits.  We want all values that may be part of the
   // same memory operation to hash to the same value and therefore use the same
diff --git a/lib/builtins/atomic_flag_clear.c b/lib/builtins/atomic_flag_clear.c
index 15984cd..da912af 100644
--- a/lib/builtins/atomic_flag_clear.c
+++ b/lib/builtins/atomic_flag_clear.c
@@ -12,8 +12,16 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_flag_clear
 void atomic_flag_clear(volatile atomic_flag *object) {
-  return __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST);
+  __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST);
 }
+
+#endif
diff --git a/lib/builtins/atomic_flag_clear_explicit.c b/lib/builtins/atomic_flag_clear_explicit.c
index 0f7859c..1059b78 100644
--- a/lib/builtins/atomic_flag_clear_explicit.c
+++ b/lib/builtins/atomic_flag_clear_explicit.c
@@ -12,9 +12,17 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_flag_clear_explicit
 void atomic_flag_clear_explicit(volatile atomic_flag *object,
                                 memory_order order) {
-  return __c11_atomic_store(&(object)->_Value, 0, order);
+  __c11_atomic_store(&(object)->_Value, 0, order);
 }
+
+#endif
diff --git a/lib/builtins/atomic_flag_test_and_set.c b/lib/builtins/atomic_flag_test_and_set.c
index 07209fc..e8811d3 100644
--- a/lib/builtins/atomic_flag_test_and_set.c
+++ b/lib/builtins/atomic_flag_test_and_set.c
@@ -12,8 +12,16 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_flag_test_and_set
 _Bool atomic_flag_test_and_set(volatile atomic_flag *object) {
   return __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST);
 }
+
+#endif
diff --git a/lib/builtins/atomic_flag_test_and_set_explicit.c b/lib/builtins/atomic_flag_test_and_set_explicit.c
index eaa5be0..5c8c2df 100644
--- a/lib/builtins/atomic_flag_test_and_set_explicit.c
+++ b/lib/builtins/atomic_flag_test_and_set_explicit.c
@@ -12,9 +12,17 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_flag_test_and_set_explicit
 _Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object,
                                         memory_order order) {
   return __c11_atomic_exchange(&(object)->_Value, 1, order);
 }
+
+#endif
diff --git a/lib/builtins/atomic_signal_fence.c b/lib/builtins/atomic_signal_fence.c
index ad292d2..9ccc2ae 100644
--- a/lib/builtins/atomic_signal_fence.c
+++ b/lib/builtins/atomic_signal_fence.c
@@ -12,8 +12,16 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_signal_fence
 void atomic_signal_fence(memory_order order) {
   __c11_atomic_signal_fence(order);
 }
+
+#endif
diff --git a/lib/builtins/atomic_thread_fence.c b/lib/builtins/atomic_thread_fence.c
index 71f698c..d225601 100644
--- a/lib/builtins/atomic_thread_fence.c
+++ b/lib/builtins/atomic_thread_fence.c
@@ -12,8 +12,16 @@
  *===------------------------------------------------------------------------===
  */
 
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
 #include <stdatomic.h>
 #undef atomic_thread_fence
 void atomic_thread_fence(memory_order order) {
   __c11_atomic_thread_fence(order);
 }
+
+#endif
diff --git a/lib/builtins/comparedf2.c b/lib/builtins/comparedf2.c
index 64eea12..9e29752 100644
--- a/lib/builtins/comparedf2.c
+++ b/lib/builtins/comparedf2.c
@@ -80,6 +80,11 @@
     }
 }
 
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmpdf2, __ledf2);
+#endif
+
 enum GE_RESULT {
     GE_LESS      = -1,
     GE_EQUAL     =  0,
diff --git a/lib/builtins/comparesf2.c b/lib/builtins/comparesf2.c
index 442289c..1fd5063 100644
--- a/lib/builtins/comparesf2.c
+++ b/lib/builtins/comparesf2.c
@@ -80,6 +80,11 @@
     }
 }
 
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmpsf2, __lesf2);
+#endif
+
 enum GE_RESULT {
     GE_LESS      = -1,
     GE_EQUAL     =  0,
diff --git a/lib/builtins/comparetf2.c b/lib/builtins/comparetf2.c
index a6436de..c0ad8ed 100644
--- a/lib/builtins/comparetf2.c
+++ b/lib/builtins/comparetf2.c
@@ -79,6 +79,11 @@
     }
 }
 
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmptf2, __letf2);
+#endif
+
 enum GE_RESULT {
     GE_LESS      = -1,
     GE_EQUAL     =  0,
diff --git a/lib/builtins/divdc3.c b/lib/builtins/divdc3.c
index 7de78c8..3c88390 100644
--- a/lib/builtins/divdc3.c
+++ b/lib/builtins/divdc3.c
@@ -17,7 +17,7 @@
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
-COMPILER_RT_ABI double _Complex
+COMPILER_RT_ABI Dcomplex
 __divdc3(double __a, double __b, double __c, double __d)
 {
     int __ilogbw = 0;
@@ -29,31 +29,31 @@
         __d = crt_scalbn(__d, -__ilogbw);
     }
     double __denom = __c * __c + __d * __d;
-    double _Complex z;
-    __real__ z = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Dcomplex z;
+    COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = crt_copysign(CRT_INFINITY, __c) * __a;
-            __imag__ z = crt_copysign(CRT_INFINITY, __c) * __b;
+            COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b;
         }
         else if ((crt_isinf(__a) || crt_isinf(__b)) &&
                  crt_isfinite(__c) && crt_isfinite(__d))
         {
             __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a);
             __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b);
-            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
-            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
         }
         else if (crt_isinf(__logbw) && __logbw > 0.0 &&
                  crt_isfinite(__a) && crt_isfinite(__b))
         {
             __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c);
             __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d);
-            __real__ z = 0.0 * (__a * __c + __b * __d);
-            __imag__ z = 0.0 * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
         }
     }
     return z;
diff --git a/lib/builtins/divsc3.c b/lib/builtins/divsc3.c
index 710d532..42a4831 100644
--- a/lib/builtins/divsc3.c
+++ b/lib/builtins/divsc3.c
@@ -17,7 +17,7 @@
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
-COMPILER_RT_ABI float _Complex
+COMPILER_RT_ABI Fcomplex
 __divsc3(float __a, float __b, float __c, float __d)
 {
     int __ilogbw = 0;
@@ -29,31 +29,31 @@
         __d = crt_scalbnf(__d, -__ilogbw);
     }
     float __denom = __c * __c + __d * __d;
-    float _Complex z;
-    __real__ z = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Fcomplex z;
+    COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = crt_copysignf(CRT_INFINITY, __c) * __a;
-            __imag__ z = crt_copysignf(CRT_INFINITY, __c) * __b;
+            COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b;
         }
         else if ((crt_isinf(__a) || crt_isinf(__b)) &&
                  crt_isfinite(__c) && crt_isfinite(__d))
         {
             __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a);
             __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b);
-            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
-            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
         }
         else if (crt_isinf(__logbw) && __logbw > 0 &&
                  crt_isfinite(__a) && crt_isfinite(__b))
         {
             __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c);
             __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d);
-            __real__ z = 0 * (__a * __c + __b * __d);
-            __imag__ z = 0 * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d);
         }
     }
     return z;
diff --git a/lib/builtins/divtc3.c b/lib/builtins/divtc3.c
new file mode 100644
index 0000000..04693df
--- /dev/null
+++ b/lib/builtins/divtc3.c
@@ -0,0 +1,60 @@
+/*===-- divtc3.c - Implement __divtc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divtc3 for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the quotient of (a + ib) / (c + id) */
+
+COMPILER_RT_ABI long double _Complex
+__divtc3(long double __a, long double __b, long double __c, long double __d)
+{
+    int __ilogbw = 0;
+    long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
+    if (crt_isfinite(__logbw))
+    {
+        __ilogbw = (int)__logbw;
+        __c = crt_scalbnl(__c, -__ilogbw);
+        __d = crt_scalbnl(__d, -__ilogbw);
+    }
+    long double __denom = __c * __c + __d * __d;
+    long double _Complex z;
+    __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+    __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    {
+        if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
+        {
+            __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a;
+            __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b;
+        }
+        else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+                 crt_isfinite(__c) && crt_isfinite(__d))
+        {
+            __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
+            __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
+            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
+            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+        }
+        else if (crt_isinf(__logbw) && __logbw > 0.0 &&
+                 crt_isfinite(__a) && crt_isfinite(__b))
+        {
+            __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
+            __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
+            __real__ z = 0.0 * (__a * __c + __b * __d);
+            __imag__ z = 0.0 * (__b * __c - __a * __d);
+        }
+    }
+    return z;
+}
diff --git a/lib/builtins/divxc3.c b/lib/builtins/divxc3.c
index 175ae3c..6f49280 100644
--- a/lib/builtins/divxc3.c
+++ b/lib/builtins/divxc3.c
@@ -18,7 +18,7 @@
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
-COMPILER_RT_ABI long double _Complex
+COMPILER_RT_ABI Lcomplex
 __divxc3(long double __a, long double __b, long double __c, long double __d)
 {
     int __ilogbw = 0;
@@ -30,31 +30,31 @@
         __d = crt_scalbnl(__d, -__ilogbw);
     }
     long double __denom = __c * __c + __d * __d;
-    long double _Complex z;
-    __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Lcomplex z;
+    COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a;
-            __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b;
+            COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
         }
         else if ((crt_isinf(__a) || crt_isinf(__b)) &&
                  crt_isfinite(__c) && crt_isfinite(__d))
         {
             __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a);
             __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b);
-            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
-            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
         }
         else if (crt_isinf(__logbw) && __logbw > 0 &&
                  crt_isfinite(__a) && crt_isfinite(__b))
         {
             __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c);
             __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d);
-            __real__ z = 0 * (__a * __c + __b * __d);
-            __imag__ z = 0 * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d);
         }
     }
     return z;
diff --git a/lib/builtins/emutls.c b/lib/builtins/emutls.c
new file mode 100644
index 0000000..09e7956
--- /dev/null
+++ b/lib/builtins/emutls.c
@@ -0,0 +1,183 @@
+/* ===---------- emutls.c - Implements __emutls_get_address ---------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "int_lib.h"
+#include "int_util.h"
+
+/* Default is not to use posix_memalign, so systems like Android
+ * can use thread local data without heavier POSIX memory allocators.
+ */
+#ifndef EMUTLS_USE_POSIX_MEMALIGN
+#define EMUTLS_USE_POSIX_MEMALIGN 0
+#endif
+
+/* For every TLS variable xyz,
+ * there is one __emutls_control variable named __emutls_v.xyz.
+ * If xyz has non-zero initial value, __emutls_v.xyz's "value"
+ * will point to __emutls_t.xyz, which has the initial value.
+ */
+typedef struct __emutls_control {
+    size_t size;  /* size of the object in bytes */
+    size_t align;  /* alignment of the object in bytes */
+    union {
+        uintptr_t index;  /* data[index-1] is the object address */
+        void* address;  /* object address, when in single thread env */
+    } object;
+    void* value;  /* null or non-zero initial value for the object */
+} __emutls_control;
+
+static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
+    void *base;
+#if EMUTLS_USE_POSIX_MEMALIGN
+    if (posix_memalign(&base, align, size) != 0)
+        abort();
+#else
+    #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*))
+    char* object;
+    if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
+        abort();
+    base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES))
+                    & ~(uintptr_t)(align - 1));
+
+    ((void**)base)[-1] = object;
+#endif
+    return base;
+}
+
+static __inline void emutls_memalign_free(void *base) {
+#if EMUTLS_USE_POSIX_MEMALIGN
+    free(base);
+#else
+    /* The mallocated address is in ((void**)base)[-1] */
+    free(((void**)base)[-1]);
+#endif
+}
+
+/* Emulated TLS objects are always allocated at run-time. */
+static __inline void *emutls_allocate_object(__emutls_control *control) {
+    /* Use standard C types, check with gcc's emutls.o. */
+    typedef unsigned int gcc_word __attribute__((mode(word)));
+    typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
+    COMPILE_TIME_ASSERT(sizeof(size_t) == sizeof(gcc_word));
+    COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
+    COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*));
+
+    size_t size = control->size;
+    size_t align = control->align;
+    if (align < sizeof(void*))
+        align = sizeof(void*);
+    /* Make sure that align is power of 2. */
+    if ((align & (align - 1)) != 0)
+        abort();
+
+    void* base = emutls_memalign_alloc(align, size);
+    if (control->value)
+        memcpy(base, control->value, size);
+    else
+        memset(base, 0, size);
+    return base;
+}
+
+static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static size_t emutls_num_object = 0;  /* number of allocated TLS objects */
+
+typedef struct emutls_address_array {
+    uintptr_t size;  /* number of elements in the 'data' array */
+    void* data[];
+} emutls_address_array;
+
+static pthread_key_t emutls_pthread_key;
+
+static void emutls_key_destructor(void* ptr) {
+    emutls_address_array* array = (emutls_address_array*)ptr;
+    uintptr_t i;
+    for (i = 0; i < array->size; ++i) {
+        if (array->data[i])
+            emutls_memalign_free(array->data[i]);
+    }
+    free(ptr);
+}
+
+static void emutls_init(void) {
+    if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
+        abort();
+}
+
+/* Returns control->object.index; set index if not allocated yet. */
+static __inline uintptr_t emutls_get_index(__emutls_control *control) {
+    uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
+    if (!index) {
+        static pthread_once_t once = PTHREAD_ONCE_INIT;
+        pthread_once(&once, emutls_init);
+        pthread_mutex_lock(&emutls_mutex);
+        index = control->object.index;
+        if (!index) {
+            index = ++emutls_num_object;
+            __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
+        }
+        pthread_mutex_unlock(&emutls_mutex);
+    }
+    return index;
+}
+
+/* Updates newly allocated thread local emutls_address_array. */
+static __inline void emutls_check_array_set_size(emutls_address_array *array,
+                                                 uintptr_t size) {
+    if (array == NULL)
+        abort();
+    array->size = size;
+    pthread_setspecific(emutls_pthread_key, (void*)array);
+}
+
+/* Returns the new 'data' array size, number of elements,
+ * which must be no smaller than the given index.
+ */
+static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
+   /* Need to allocate emutls_address_array with one extra slot
+    * to store the data array size.
+    * Round up the emutls_address_array size to multiple of 16.
+    */
+    return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
+}
+
+/* Returns the thread local emutls_address_array.
+ * Extends its size if necessary to hold address at index.
+ */
+static __inline emutls_address_array *
+emutls_get_address_array(uintptr_t index) {
+    emutls_address_array* array = pthread_getspecific(emutls_pthread_key);
+    if (array == NULL) {
+        uintptr_t new_size = emutls_new_data_array_size(index);
+        array = calloc(new_size + 1, sizeof(void*));
+        emutls_check_array_set_size(array, new_size);
+    } else if (index > array->size) {
+        uintptr_t orig_size = array->size;
+        uintptr_t new_size = emutls_new_data_array_size(index);
+        array = realloc(array, (new_size + 1) * sizeof(void*));
+        if (array)
+            memset(array->data + orig_size, 0,
+                   (new_size - orig_size) * sizeof(void*));
+        emutls_check_array_set_size(array, new_size);
+    }
+    return array;
+}
+
+void* __emutls_get_address(__emutls_control* control) {
+    uintptr_t index = emutls_get_index(control);
+    emutls_address_array* array = emutls_get_address_array(index);
+    if (array->data[index - 1] == NULL)
+        array->data[index - 1] = emutls_allocate_object(control);
+    return array->data[index - 1];
+}
diff --git a/lib/builtins/enable_execute_stack.c b/lib/builtins/enable_execute_stack.c
index 63d836b..0dc3482 100644
--- a/lib/builtins/enable_execute_stack.c
+++ b/lib/builtins/enable_execute_stack.c
@@ -10,7 +10,9 @@
 
 #include "int_lib.h"
 
+#ifndef _WIN32
 #include <sys/mman.h>
+#endif
 
 /* #include "config.h"
  * FIXME: CMake - include when cmake system is ready.
@@ -18,9 +20,14 @@
  */
 #define HAVE_SYSCONF 1
 
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
+#else
 #ifndef __APPLE__
 #include <unistd.h>
 #endif /* __APPLE__ */
+#endif /* _WIN32 */
 
 #if __LP64__
 	#define TRAMPOLINE_SIZE 48
@@ -40,6 +47,12 @@
 __enable_execute_stack(void* addr)
 {
 
+#if _WIN32
+	MEMORY_BASIC_INFORMATION mbi;
+	if (!VirtualQuery (addr, &mbi, sizeof(mbi)))
+		return; /* We should probably assert here because there is no return value */
+	VirtualProtect (mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, &mbi.Protect);
+#else
 #if __APPLE__
 	/* On Darwin, pagesize is always 4096 bytes */
 	const uintptr_t pageSize = 4096;
@@ -55,4 +68,5 @@
 	unsigned char* endPage = (unsigned char*)((p+TRAMPOLINE_SIZE+pageSize) & pageAlignMask);
 	size_t length = endPage - startPage;
 	(void) mprotect((void *)startPage, length, PROT_READ | PROT_WRITE | PROT_EXEC);
+#endif
 }
diff --git a/lib/builtins/extendhfsf2.c b/lib/builtins/extendhfsf2.c
index 7524e2e..27115a4 100644
--- a/lib/builtins/extendhfsf2.c
+++ b/lib/builtins/extendhfsf2.c
@@ -12,9 +12,11 @@
 #define DST_SINGLE
 #include "fp_extend_impl.inc"
 
+ARM_EABI_FNALIAS(h2f, extendhfsf2)
+
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
-COMPILER_RT_ABI __attribute__((noinline)) float __extendhfsf2(uint16_t a) {
+COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
     return __extendXfYf2__(a);
 }
 
diff --git a/lib/builtins/fixunsdfdi.c b/lib/builtins/fixunsdfdi.c
index 2e0d87e..4b0bc9e 100644
--- a/lib/builtins/fixunsdfdi.c
+++ b/lib/builtins/fixunsdfdi.c
@@ -22,8 +22,8 @@
 __fixunsdfdi(double a)
 {
     if (a <= 0.0) return 0;
-    su_int high = a/0x1p32f;
-    su_int low = a - (double)high*0x1p32f;
+    su_int high = a / 4294967296.f;               /* a / 0x1p32f; */
+    su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */
     return ((du_int)high << 32) | low;
 }
 
diff --git a/lib/builtins/fixunssfdi.c b/lib/builtins/fixunssfdi.c
index 5a154e8..f8ebab8 100644
--- a/lib/builtins/fixunssfdi.c
+++ b/lib/builtins/fixunssfdi.c
@@ -23,8 +23,8 @@
 {
     if (a <= 0.0f) return 0;
     double da = a;
-    su_int high = da/0x1p32f;
-    su_int low = da - (double)high*0x1p32f;
+    su_int high = da / 4294967296.f;               /* da / 0x1p32f; */
+    su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */
     return ((du_int)high << 32) | low;
 }
 
diff --git a/lib/builtins/floatdidf.c b/lib/builtins/floatdidf.c
index e53fa25..a300c9f 100644
--- a/lib/builtins/floatdidf.c
+++ b/lib/builtins/floatdidf.c
@@ -32,8 +32,8 @@
 COMPILER_RT_ABI double
 __floatdidf(di_int a)
 {
-	static const double twop52 = 0x1.0p52;
-	static const double twop32 = 0x1.0p32;
+	static const double twop52 = 4503599627370496.0; // 0x1.0p52
+	static const double twop32 = 4294967296.0; // 0x1.0p32
 	
 	union { int64_t x; double d; } low = { .d = twop52 };
 	
diff --git a/lib/builtins/floatditf.c b/lib/builtins/floatditf.c
new file mode 100644
index 0000000..cd51dd8
--- /dev/null
+++ b/lib/builtins/floatditf.c
@@ -0,0 +1,50 @@
+//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements di_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __floatditf(di_int a) {
+
+    const int aWidth = sizeof a * CHAR_BIT;
+
+    // Handle zero as a special case to protect clz
+    if (a == 0)
+        return fromRep(0);
+
+    // All other cases begin by extracting the sign and absolute value of a
+    rep_t sign = 0;
+    du_int aAbs = (du_int)a;
+    if (a < 0) {
+        sign = signBit;
+        aAbs = ~(du_int)a + 1U;
+    }
+
+    // Exponent of (fp_t)a is the width of abs(a).
+    const int exponent = (aWidth - 1) - __builtin_clzll(aAbs);
+    rep_t result;
+
+    // Shift a into the significand field, rounding if it is a right-shift
+    const int shift = significandBits - exponent;
+    result = (rep_t)aAbs << shift ^ implicitBit;
+
+    // Insert the exponent
+    result += (rep_t)(exponent + exponentBias) << significandBits;
+    // Insert the sign bit and return
+    return fromRep(result | sign);
+}
+
+#endif
diff --git a/lib/builtins/floatsitf.c b/lib/builtins/floatsitf.c
index 8534693..f0abca3 100644
--- a/lib/builtins/floatsitf.c
+++ b/lib/builtins/floatsitf.c
@@ -30,16 +30,14 @@
     unsigned aAbs = (unsigned)a;
     if (a < 0) {
         sign = signBit;
-        aAbs += 0x80000000;
+        aAbs = ~(unsigned)a + 1U;
     }
 
     // Exponent of (fp_t)a is the width of abs(a).
-    const int exponent = (aWidth - 1) - __builtin_clz(a);
+    const int exponent = (aWidth - 1) - __builtin_clz(aAbs);
     rep_t result;
 
-    // Shift a into the significand field and clear the implicit bit.  Extra
-    // cast to unsigned int is necessary to get the correct behavior for
-    // the input INT_MIN.
+    // Shift a into the significand field and clear the implicit bit.
     const int shift = significandBits - exponent;
     result = (rep_t)aAbs << shift ^ implicitBit;
 
diff --git a/lib/builtins/floatundidf.c b/lib/builtins/floatundidf.c
index 73b8bac..67aa86e 100644
--- a/lib/builtins/floatundidf.c
+++ b/lib/builtins/floatundidf.c
@@ -32,9 +32,9 @@
 COMPILER_RT_ABI double
 __floatundidf(du_int a)
 {
-	static const double twop52 = 0x1.0p52;
-	static const double twop84 = 0x1.0p84;
-	static const double twop84_plus_twop52 = 0x1.00000001p84;
+	static const double twop52 = 4503599627370496.0; // 0x1.0p52
+	static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84
+	static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84
 	
 	union { uint64_t x; double d; } high = { .d = twop84 };
 	union { uint64_t x; double d; } low = { .d = twop52 };
diff --git a/lib/builtins/floatunditf.c b/lib/builtins/floatunditf.c
new file mode 100644
index 0000000..8098e95
--- /dev/null
+++ b/lib/builtins/floatunditf.c
@@ -0,0 +1,40 @@
+//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements du_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __floatunditf(du_int a) {
+
+    const int aWidth = sizeof a * CHAR_BIT;
+
+    // Handle zero as a special case to protect clz
+    if (a == 0) return fromRep(0);
+
+    // Exponent of (fp_t)a is the width of abs(a).
+    const int exponent = (aWidth - 1) - __builtin_clzll(a);
+    rep_t result;
+
+    // Shift a into the significand field and clear the implicit bit.
+    const int shift = significandBits - exponent;
+    result = (rep_t)a << shift ^ implicitBit;
+
+    // Insert the exponent
+    result += (rep_t)(exponent + exponentBias) << significandBits;
+    return fromRep(result);
+}
+
+#endif
diff --git a/lib/builtins/fp_add_impl.inc b/lib/builtins/fp_add_impl.inc
index 5741889..b47be1b 100644
--- a/lib/builtins/fp_add_impl.inc
+++ b/lib/builtins/fp_add_impl.inc
@@ -14,7 +14,7 @@
 
 #include "fp_lib.h"
 
-static inline fp_t __addXf3__(fp_t a, fp_t b) {
+static __inline fp_t __addXf3__(fp_t a, fp_t b) {
     rep_t aRep = toRep(a);
     rep_t bRep = toRep(b);
     const rep_t aAbs = aRep & absMask;
diff --git a/lib/builtins/fp_extend.h b/lib/builtins/fp_extend.h
index 5c2b923..6d95a06 100644
--- a/lib/builtins/fp_extend.h
+++ b/lib/builtins/fp_extend.h
@@ -28,7 +28,7 @@
 typedef uint64_t src_rep_t;
 #define SRC_REP_C UINT64_C
 static const int srcSigBits = 52;
-static inline int src_rep_t_clz(src_rep_t a) {
+static __inline int src_rep_t_clz(src_rep_t a) {
 #if defined __LP64__
     return __builtin_clzl(a);
 #else
@@ -75,12 +75,12 @@
 // End of specialization parameters.  Two helper routines for conversion to and
 // from the representation of floating-point data as integer values follow.
 
-static inline src_rep_t srcToRep(src_t x) {
+static __inline src_rep_t srcToRep(src_t x) {
     const union { src_t f; src_rep_t i; } rep = {.f = x};
     return rep.i;
 }
 
-static inline dst_t dstFromRep(dst_rep_t x) {
+static __inline dst_t dstFromRep(dst_rep_t x) {
     const union { dst_t f; dst_rep_t i; } rep = {.i = x};
     return rep.f;
 }
diff --git a/lib/builtins/fp_extend_impl.inc b/lib/builtins/fp_extend_impl.inc
index edcfa8d..b785cc7 100644
--- a/lib/builtins/fp_extend_impl.inc
+++ b/lib/builtins/fp_extend_impl.inc
@@ -38,7 +38,7 @@
 
 #include "fp_extend.h"
 
-static inline dst_t __extendXfYf2__(src_t a) {
+static __inline dst_t __extendXfYf2__(src_t a) {
     // Various constants whose values follow from the type parameters.
     // Any reasonable optimizer will fold and propagate all of these.
     const int srcBits = sizeof(src_t)*CHAR_BIT;
diff --git a/lib/builtins/fp_fixint_impl.inc b/lib/builtins/fp_fixint_impl.inc
index 035e87c..da70d4d 100644
--- a/lib/builtins/fp_fixint_impl.inc
+++ b/lib/builtins/fp_fixint_impl.inc
@@ -14,7 +14,7 @@
 
 #include "fp_lib.h"
 
-static inline fixint_t __fixint(fp_t a) {
+static __inline fixint_t __fixint(fp_t a) {
     const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2);
     const fixint_t fixint_min = -fixint_max - 1;
     // Break a into sign, exponent, significand
diff --git a/lib/builtins/fp_fixuint_impl.inc b/lib/builtins/fp_fixuint_impl.inc
index 5fefab0..d68ccf2 100644
--- a/lib/builtins/fp_fixuint_impl.inc
+++ b/lib/builtins/fp_fixuint_impl.inc
@@ -14,7 +14,7 @@
 
 #include "fp_lib.h"
 
-static inline fixuint_t __fixuint(fp_t a) {
+static __inline fixuint_t __fixuint(fp_t a) {
     // Break a into sign, exponent, significand
     const rep_t aRep = toRep(a);
     const rep_t aAbs = aRep & absMask;
@@ -27,7 +27,7 @@
         return 0;
 
     // If the value is too large for the integer type, saturate.
-    if ((unsigned)exponent > sizeof(fixuint_t) * CHAR_BIT)
+    if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT)
         return ~(fixuint_t)0;
 
     // If 0 <= exponent < significandBits, right shift to get the result.
diff --git a/lib/builtins/fp_lib.h b/lib/builtins/fp_lib.h
index faebb99..223fb98 100644
--- a/lib/builtins/fp_lib.h
+++ b/lib/builtins/fp_lib.h
@@ -46,12 +46,12 @@
 #define REP_C UINT32_C
 #define significandBits 23
 
-static inline int rep_clz(rep_t a) {
+static __inline int rep_clz(rep_t a) {
     return __builtin_clz(a);
 }
 
 // 32x32 --> 64 bit multiply
-static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
     const uint64_t product = (uint64_t)a*b;
     *hi = product >> 32;
     *lo = product;
@@ -66,7 +66,7 @@
 #define REP_C UINT64_C
 #define significandBits 52
 
-static inline int rep_clz(rep_t a) {
+static __inline int rep_clz(rep_t a) {
 #if defined __LP64__
     return __builtin_clzl(a);
 #else
@@ -83,7 +83,7 @@
 // 64x64 -> 128 wide multiply for platforms that don't have such an operation;
 // many 64-bit platforms have this operation, but they tend to have hardware
 // floating-point, so we don't bother with a special case for them here.
-static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
     // Each of the component 32x32 -> 64 products
     const uint64_t plolo = loWord(a) * loWord(b);
     const uint64_t plohi = loWord(a) * hiWord(b);
@@ -112,7 +112,7 @@
 // 128-bit integer, we let the constant be casted to 128-bit integer
 #define significandBits 112
 
-static inline int rep_clz(rep_t a) {
+static __inline int rep_clz(rep_t a) {
     const union
         {
              __uint128_t ll;
@@ -148,7 +148,7 @@
 // 128x128 -> 256 wide multiply for platforms that don't have such an operation;
 // many 64-bit platforms have this operation, but they tend to have hardware
 // floating-point, so we don't bother with a special case for them here.
-static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
 
     const uint64_t product11 = Word_1(a) * Word_1(b);
     const uint64_t product12 = Word_1(a) * Word_2(b);
@@ -228,28 +228,28 @@
 #define quietBit        (implicitBit >> 1)
 #define qnanRep         (exponentMask | quietBit)
 
-static inline rep_t toRep(fp_t x) {
+static __inline rep_t toRep(fp_t x) {
     const union { fp_t f; rep_t i; } rep = {.f = x};
     return rep.i;
 }
 
-static inline fp_t fromRep(rep_t x) {
+static __inline fp_t fromRep(rep_t x) {
     const union { fp_t f; rep_t i; } rep = {.i = x};
     return rep.f;
 }
 
-static inline int normalize(rep_t *significand) {
+static __inline int normalize(rep_t *significand) {
     const int shift = rep_clz(*significand) - rep_clz(implicitBit);
     *significand <<= shift;
     return 1 - shift;
 }
 
-static inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {
+static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {
     *hi = *hi << count | *lo >> (typeWidth - count);
     *lo = *lo << count;
 }
 
-static inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) {
+static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) {
     if (count < typeWidth) {
         const bool sticky = *lo << (typeWidth - count);
         *lo = *hi << (typeWidth - count) | *lo >> count | sticky;
diff --git a/lib/builtins/fp_mul_impl.inc b/lib/builtins/fp_mul_impl.inc
index ca8a0bb..b34aa1b 100644
--- a/lib/builtins/fp_mul_impl.inc
+++ b/lib/builtins/fp_mul_impl.inc
@@ -14,7 +14,7 @@
 
 #include "fp_lib.h"
 
-static inline fp_t __mulXf3__(fp_t a, fp_t b) {
+static __inline fp_t __mulXf3__(fp_t a, fp_t b) {
     const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
     const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
     const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
diff --git a/lib/builtins/fp_trunc.h b/lib/builtins/fp_trunc.h
index 373ba1b..d5e79bb 100644
--- a/lib/builtins/fp_trunc.h
+++ b/lib/builtins/fp_trunc.h
@@ -63,12 +63,12 @@
 // End of specialization parameters.  Two helper routines for conversion to and
 // from the representation of floating-point data as integer values follow.
 
-static inline src_rep_t srcToRep(src_t x) {
+static __inline src_rep_t srcToRep(src_t x) {
     const union { src_t f; src_rep_t i; } rep = {.f = x};
     return rep.i;
 }
 
-static inline dst_t dstFromRep(dst_rep_t x) {
+static __inline dst_t dstFromRep(dst_rep_t x) {
     const union { dst_t f; dst_rep_t i; } rep = {.i = x};
     return rep.f;
 }
diff --git a/lib/builtins/fp_trunc_impl.inc b/lib/builtins/fp_trunc_impl.inc
index 372e8d6..d88ae06 100644
--- a/lib/builtins/fp_trunc_impl.inc
+++ b/lib/builtins/fp_trunc_impl.inc
@@ -39,7 +39,7 @@
 
 #include "fp_trunc.h"
 
-static inline dst_t __truncXfYf2__(src_t a) {
+static __inline dst_t __truncXfYf2__(src_t a) {
     // Various constants whose values follow from the type parameters.
     // Any reasonable optimizer will fold and propagate all of these.
     const int srcBits = sizeof(src_t)*CHAR_BIT;
diff --git a/lib/builtins/gcc_personality_v0.c b/lib/builtins/gcc_personality_v0.c
index 4b95cfd..331dc2b 100644
--- a/lib/builtins/gcc_personality_v0.c
+++ b/lib/builtins/gcc_personality_v0.c
@@ -194,15 +194,15 @@
              * Set Instruction Pointer to so we re-enter function 
              * at landing pad. The landing pad is created by the compiler
              * to take two parameters in registers.
-	     */
-            _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), 
-                                                (uintptr_t)exceptionObject);
+             */
+            _Unwind_SetGR(context, __builtin_eh_return_data_regno(0),
+                          (uintptr_t)exceptionObject);
             _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0);
-            _Unwind_SetIP(context, funcStart+landingPad);
+            _Unwind_SetIP(context, (funcStart + landingPad));
             return _URC_INSTALL_CONTEXT;
         }
     }
-    
+
     /* No landing pad found, continue unwinding. */
     return _URC_CONTINUE_UNWIND;
 }
diff --git a/lib/builtins/i386/chkstk.S b/lib/builtins/i386/chkstk.S
new file mode 100644
index 0000000..b599748
--- /dev/null
+++ b/lib/builtins/i386/chkstk.S
@@ -0,0 +1,34 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// _chkstk routine
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__chkstk_ms)
+        push   %ecx
+        push   %eax
+        cmp    $0x1000,%eax
+        lea    12(%esp),%ecx
+        jb     1f
+2:
+        sub    $0x1000,%ecx
+        test   %ecx,(%ecx)
+        sub    $0x1000,%eax
+        cmp    $0x1000,%eax
+        ja     2b
+1:
+        sub    %eax,%ecx
+        test   %ecx,(%ecx)
+        pop    %eax
+        pop    %ecx
+        ret
+END_COMPILERRT_FUNCTION(__chkstk_ms)
+
+#endif // __i386__
diff --git a/lib/builtins/i386/chkstk2.S b/lib/builtins/i386/chkstk2.S
new file mode 100644
index 0000000..7d65bb0
--- /dev/null
+++ b/lib/builtins/i386/chkstk2.S
@@ -0,0 +1,40 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+#ifdef __i386__
+
+// _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments,
+// then decrement %esp by %eax.  Preserves all registers except %esp and flags.
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function
+DEFINE_COMPILERRT_FUNCTION(__chkstk)
+        push   %ecx
+        cmp    $0x1000,%eax
+        lea    8(%esp),%ecx     // esp before calling this routine -> ecx
+        jb     1f
+2:
+        sub    $0x1000,%ecx
+        test   %ecx,(%ecx)
+        sub    $0x1000,%eax
+        cmp    $0x1000,%eax
+        ja     2b
+1:
+        sub    %eax,%ecx
+        test   %ecx,(%ecx)
+
+        lea    4(%esp),%eax     // load pointer to the return address into eax
+        mov    %ecx,%esp        // install the new top of stack pointer into esp
+        mov    -4(%eax),%ecx    // restore ecx
+        push   (%eax)           // push return address onto the stack
+        sub    %esp,%eax        // restore the original value in eax
+        ret
+END_COMPILERRT_FUNCTION(__chkstk)
+END_COMPILERRT_FUNCTION(_alloca)
+
+#endif // __i386__
diff --git a/lib/builtins/int_lib.h b/lib/builtins/int_lib.h
index bca5d81..272f9d9 100644
--- a/lib/builtins/int_lib.h
+++ b/lib/builtins/int_lib.h
@@ -20,6 +20,13 @@
 /* Assumption: Right shift of signed negative is arithmetic shift. */
 /* Assumption: Endianness is little or big (not mixed). */
 
+#if defined(__ELF__)
+#define FNALIAS(alias_name, original_name) \
+  void alias_name() __attribute__((alias(#original_name)))
+#else
+#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")")
+#endif
+
 /* ABI macro definitions */
 
 #if __ARM_EABI__
@@ -28,13 +35,25 @@
 # define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
 #else
 # define ARM_EABI_FNALIAS(aeabi_name, name)
-# if defined(__arm__) && defined(_WIN32)
+# if defined(__arm__) && defined(_WIN32) && (!defined(_MSC_VER) || defined(__clang__))
 #   define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
 # else
 #   define COMPILER_RT_ABI
 # endif
 #endif
 
+#ifdef _MSC_VER
+#define ALWAYS_INLINE __forceinline
+#define NOINLINE __declspec(noinline)
+#define NORETURN __declspec(noreturn)
+#define UNUSED
+#else
+#define ALWAYS_INLINE __attribute__((always_inline))
+#define NOINLINE __attribute__((noinline))
+#define NORETURN __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#endif
+
 #if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE))
 /*
  * Kernel and boot environment can't use normal headers,
@@ -71,4 +90,44 @@
 COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
 #endif
 
+/* Definitions for builtins unavailable on MSVC */
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <intrin.h>
+
+uint32_t __inline __builtin_ctz(uint32_t value) {
+  uint32_t trailing_zero = 0;
+  if (_BitScanForward(&trailing_zero, value))
+    return trailing_zero;
+  return 32;
+}
+
+uint32_t __inline __builtin_clz(uint32_t value) {
+  uint32_t leading_zero = 0;
+  if (_BitScanReverse(&leading_zero, value))
+    return 31 - leading_zero;
+  return 32;
+}
+
+#if defined(_M_ARM) || defined(_M_X64)
+uint32_t __inline __builtin_clzll(uint64_t value) {
+  uint32_t leading_zero = 0;
+  if (_BitScanReverse64(&leading_zero, value))
+    return 63 - leading_zero;
+  return 64;
+}
+#else
+uint32_t __inline __builtin_clzll(uint64_t value) {
+  if (value == 0)
+    return 64;
+  uint32_t msh = (uint32_t)(value >> 32);
+  uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF);
+  if (msh != 0)
+    return __builtin_clz(msh);
+  return 32 + __builtin_clz(lsh);
+}
+#endif
+
+#define __builtin_clzl __builtin_clzll
+#endif // defined(_MSC_VER) && !defined(__clang__)
+
 #endif /* INT_LIB_H */
diff --git a/lib/builtins/int_math.h b/lib/builtins/int_math.h
index d6b4bda..fc81fb7 100644
--- a/lib/builtins/int_math.h
+++ b/lib/builtins/int_math.h
@@ -25,43 +25,90 @@
 #  define  __has_builtin(x) 0
 #endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <math.h>
+#include <stdlib.h>
+#include <ymath.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define CRT_INFINITY INFINITY
+#else
 #define CRT_INFINITY __builtin_huge_valf()
+#endif
 
-#define crt_isinf(x) __builtin_isinf((x))
-#define crt_isnan(x) __builtin_isnan((x))
-
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_isfinite(x) _finite((x))
+#define crt_isinf(x) !_finite((x))
+#define crt_isnan(x) _isnan((x))
+#else
 /* Define crt_isfinite in terms of the builtin if available, otherwise provide
  * an alternate version in terms of our other functions. This supports some
  * versions of GCC which didn't have __builtin_isfinite.
  */
 #if __has_builtin(__builtin_isfinite)
 #  define crt_isfinite(x) __builtin_isfinite((x))
-#else
+#elif defined(__GNUC__)
 #  define crt_isfinite(x) \
   __extension__(({ \
       __typeof((x)) x_ = (x); \
       !crt_isinf(x_) && !crt_isnan(x_); \
     }))
-#endif
+#else
+#  error "Do not know how to check for infinity"
+#endif /* __has_builtin(__builtin_isfinite) */
+#define crt_isinf(x) __builtin_isinf((x))
+#define crt_isnan(x) __builtin_isnan((x))
+#endif /* _MSC_VER */
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_copysign(x, y) copysign((x), (y))
+#define crt_copysignf(x, y) copysignf((x), (y))
+#define crt_copysignl(x, y) copysignl((x), (y))
+#else
 #define crt_copysign(x, y) __builtin_copysign((x), (y))
 #define crt_copysignf(x, y) __builtin_copysignf((x), (y))
 #define crt_copysignl(x, y) __builtin_copysignl((x), (y))
+#endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_fabs(x) fabs((x))
+#define crt_fabsf(x) fabsf((x))
+#define crt_fabsl(x) fabs((x))
+#else
 #define crt_fabs(x) __builtin_fabs((x))
 #define crt_fabsf(x) __builtin_fabsf((x))
 #define crt_fabsl(x) __builtin_fabsl((x))
+#endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_fmax(x, y) __max((x), (y))
+#define crt_fmaxf(x, y) __max((x), (y))
+#define crt_fmaxl(x, y) __max((x), (y))
+#else
 #define crt_fmax(x, y) __builtin_fmax((x), (y))
 #define crt_fmaxf(x, y) __builtin_fmaxf((x), (y))
 #define crt_fmaxl(x, y) __builtin_fmaxl((x), (y))
+#endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_logb(x) logb((x))
+#define crt_logbf(x) logbf((x))
+#define crt_logbl(x) logbl((x))
+#else
 #define crt_logb(x) __builtin_logb((x))
 #define crt_logbf(x) __builtin_logbf((x))
 #define crt_logbl(x) __builtin_logbl((x))
+#endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_scalbn(x, y) scalbn((x), (y))
+#define crt_scalbnf(x, y) scalbnf((x), (y))
+#define crt_scalbnl(x, y) scalbnl((x), (y))
+#else
 #define crt_scalbn(x, y) __builtin_scalbn((x), (y))
 #define crt_scalbnf(x, y) __builtin_scalbnf((x), (y))
 #define crt_scalbnl(x, y) __builtin_scalbnl((x), (y))
+#endif
 
 #endif /* INT_MATH_H */
diff --git a/lib/builtins/int_types.h b/lib/builtins/int_types.h
index aedae14..2dad43b 100644
--- a/lib/builtins/int_types.h
+++ b/lib/builtins/int_types.h
@@ -20,6 +20,10 @@
 
 #include "int_endianness.h"
 
+/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */
+#ifdef si_int
+#undef si_int
+#endif
 typedef      int si_int;
 typedef unsigned su_int;
 
@@ -95,14 +99,14 @@
     }s;
 } utwords;
 
-static inline ti_int make_ti(di_int h, di_int l) {
+static __inline ti_int make_ti(di_int h, di_int l) {
     twords r;
     r.s.high = h;
     r.s.low = l;
     return r.all;
 }
 
-static inline tu_int make_tu(du_int h, du_int l) {
+static __inline tu_int make_tu(du_int h, du_int l) {
     utwords r;
     r.s.high = h;
     r.s.low = l;
@@ -140,5 +144,22 @@
     long double f;
 } long_double_bits;
 
+#if __STDC_VERSION__ >= 199901L
+typedef float _Complex Fcomplex;
+typedef double _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+
+#define COMPLEX_REAL(x) __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+#else
+typedef struct { float real, imaginary; } Fcomplex;
+
+typedef struct { double real, imaginary; } Dcomplex;
+
+typedef struct { long double real, imaginary; } Lcomplex;
+
+#define COMPLEX_REAL(x) (x).real
+#define COMPLEX_IMAGINARY(x) (x).imaginary
+#endif
 #endif /* INT_TYPES_H */
 
diff --git a/lib/builtins/int_util.c b/lib/builtins/int_util.c
index 323e461..420d1e2 100644
--- a/lib/builtins/int_util.c
+++ b/lib/builtins/int_util.c
@@ -8,8 +8,8 @@
  * ===----------------------------------------------------------------------===
  */
 
-#include "int_util.h"
 #include "int_lib.h"
+#include "int_util.h"
 
 /* NOTE: The definitions in this file are declared weak because we clients to be
  * able to arbitrarily package individual functions into separate .a files. If
@@ -23,7 +23,7 @@
 
 #ifdef KERNEL_USE
 
-extern void panic(const char *, ...) __attribute__((noreturn));
+NORETURN extern void panic(const char *, ...);
 #ifndef _WIN32
 __attribute__((visibility("hidden")))
 #endif
@@ -34,8 +34,8 @@
 #elif __APPLE__
 
 /* from libSystem.dylib */
-extern void __assert_rtn(const char *func, const char *file, 
-                     int line, const char * message) __attribute__((noreturn));
+NORETURN extern void __assert_rtn(const char *func, const char *file, int line,
+                                  const char *message);
 
 #ifndef _WIN32
 __attribute__((weak))
diff --git a/lib/builtins/int_util.h b/lib/builtins/int_util.h
index a9b595d..a7b20ed 100644
--- a/lib/builtins/int_util.h
+++ b/lib/builtins/int_util.h
@@ -20,10 +20,14 @@
 #define INT_UTIL_H
 
 /** \brief Trigger a program abort (or panic for kernel code). */
-#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, \
-                                                 __func__)
+#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, __func__)
 
-void compilerrt_abort_impl(const char *file, int line,
-                           const char *function) __attribute__((noreturn));
+NORETURN void compilerrt_abort_impl(const char *file, int line,
+                                    const char *function);
+
+#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__)
+#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt)
+#define COMPILE_TIME_ASSERT2(expr, cnt)                                        \
+  typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED
 
 #endif /* INT_UTIL_H */
diff --git a/lib/builtins/macho_embedded/CMakeLists.txt b/lib/builtins/macho_embedded/CMakeLists.txt
new file mode 100644
index 0000000..266e422
--- /dev/null
+++ b/lib/builtins/macho_embedded/CMakeLists.txt
@@ -0,0 +1,4 @@
+file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt)
+foreach(filter_file ${filter_files})
+  set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file})
+endforeach()
diff --git a/lib/builtins/macho_embedded/arm.txt b/lib/builtins/macho_embedded/arm.txt
new file mode 100644
index 0000000..4b1683a
--- /dev/null
+++ b/lib/builtins/macho_embedded/arm.txt
@@ -0,0 +1,16 @@
+aeabi_cdcmpeq
+aeabi_cdrcmple
+aeabi_cfcmpeq
+aeabi_cfrcmple
+aeabi_dcmpeq
+aeabi_dcmpge
+aeabi_dcmpgt
+aeabi_dcmple
+aeabi_dcmplt
+aeabi_drsub
+aeabi_fcmpeq
+aeabi_fcmpge
+aeabi_fcmpgt
+aeabi_fcmple
+aeabi_fcmplt
+aeabi_frsub
diff --git a/lib/builtins/macho_embedded/common.txt b/lib/builtins/macho_embedded/common.txt
new file mode 100644
index 0000000..6ac85a7
--- /dev/null
+++ b/lib/builtins/macho_embedded/common.txt
@@ -0,0 +1,92 @@
+absvdi2
+absvsi2
+addvdi3
+addvsi3
+ashldi3
+ashrdi3
+clzdi2
+clzsi2
+cmpdi2
+ctzdi2
+ctzsi2
+divdc3
+divdi3
+divsc3
+divmodsi4
+udivmodsi4
+do_global_dtors
+ffsdi2
+fixdfdi
+fixsfdi
+fixunsdfdi
+fixunsdfsi
+fixunssfdi
+fixunssfsi
+floatdidf
+floatdisf
+floatundidf
+floatundisf
+gcc_bcmp
+lshrdi3
+moddi3
+muldc3
+muldi3
+mulsc3
+mulvdi3
+mulvsi3
+negdi2
+negvdi2
+negvsi2
+paritydi2
+paritysi2
+popcountdi2
+popcountsi2
+powidf2
+powisf2
+subvdi3
+subvsi3
+ucmpdi2
+udiv_w_sdiv
+udivdi3
+udivmoddi4
+umoddi3
+adddf3
+addsf3
+cmpdf2
+cmpsf2
+div0
+divdf3
+divsf3
+divsi3
+extendsfdf2
+extendhfsf2
+ffssi2
+fixdfsi
+fixsfsi
+floatsidf
+floatsisf
+floatunsidf
+floatunsisf
+comparedf2
+comparesf2
+modsi3
+muldf3
+mulsf3
+negdf2
+negsf2
+subdf3
+subsf3
+truncdfhf2
+truncdfsf2
+truncsfhf2
+udivsi3
+umodsi3
+unorddf2
+unordsf2
+atomic_flag_clear
+atomic_flag_clear_explicit
+atomic_flag_test_and_set
+atomic_flag_test_and_set_explicit
+atomic_signal_fence
+atomic_thread_fence
+int_util
diff --git a/lib/builtins/macho_embedded/i386.txt b/lib/builtins/macho_embedded/i386.txt
new file mode 100644
index 0000000..b92e44b
--- /dev/null
+++ b/lib/builtins/macho_embedded/i386.txt
@@ -0,0 +1,7 @@
+i686.get_pc_thunk.eax
+i686.get_pc_thunk.ebp
+i686.get_pc_thunk.ebx
+i686.get_pc_thunk.ecx
+i686.get_pc_thunk.edi
+i686.get_pc_thunk.edx
+i686.get_pc_thunk.esi
diff --git a/lib/builtins/macho_embedded/thumb2-64.txt b/lib/builtins/macho_embedded/thumb2-64.txt
new file mode 100644
index 0000000..1c72fb1
--- /dev/null
+++ b/lib/builtins/macho_embedded/thumb2-64.txt
@@ -0,0 +1,10 @@
+sync_fetch_and_add_8
+sync_fetch_and_sub_8
+sync_fetch_and_and_8
+sync_fetch_and_or_8
+sync_fetch_and_xor_8
+sync_fetch_and_nand_8
+sync_fetch_and_max_8
+sync_fetch_and_umax_8
+sync_fetch_and_min_8
+sync_fetch_and_umin_8
diff --git a/lib/builtins/macho_embedded/thumb2.txt b/lib/builtins/macho_embedded/thumb2.txt
new file mode 100644
index 0000000..6add5ec
--- /dev/null
+++ b/lib/builtins/macho_embedded/thumb2.txt
@@ -0,0 +1,14 @@
+switch16
+switch32
+switch8
+switchu8
+sync_fetch_and_add_4
+sync_fetch_and_sub_4
+sync_fetch_and_and_4
+sync_fetch_and_or_4
+sync_fetch_and_xor_4
+sync_fetch_and_nand_4
+sync_fetch_and_max_4
+sync_fetch_and_umax_4
+sync_fetch_and_min_4
+sync_fetch_and_umin_4
diff --git a/lib/builtins/muldc3.c b/lib/builtins/muldc3.c
index 3bfae2c..16d8e98 100644
--- a/lib/builtins/muldc3.c
+++ b/lib/builtins/muldc3.c
@@ -17,17 +17,17 @@
 
 /* Returns: the product of a + ib and c + id */
 
-COMPILER_RT_ABI double _Complex
+COMPILER_RT_ABI Dcomplex
 __muldc3(double __a, double __b, double __c, double __d)
 {
     double __ac = __a * __c;
     double __bd = __b * __d;
     double __ad = __a * __d;
     double __bc = __b * __c;
-    double _Complex z;
-    __real__ z = __ac - __bd;
-    __imag__ z = __ad + __bc;
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Dcomplex z;
+    COMPLEX_REAL(z) = __ac - __bd;
+    COMPLEX_IMAGINARY(z) = __ad + __bc;
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         int __recalc = 0;
         if (crt_isinf(__a) || crt_isinf(__b))
@@ -65,8 +65,8 @@
         }
         if (__recalc)
         {
-            __real__ z = CRT_INFINITY * (__a * __c - __b * __d);
-            __imag__ z = CRT_INFINITY * (__a * __d + __b * __c);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
         }
     }
     return z;
diff --git a/lib/builtins/mulsc3.c b/lib/builtins/mulsc3.c
index 29d46c6..c89cfd2 100644
--- a/lib/builtins/mulsc3.c
+++ b/lib/builtins/mulsc3.c
@@ -17,17 +17,17 @@
 
 /* Returns: the product of a + ib and c + id */
 
-COMPILER_RT_ABI float _Complex
+COMPILER_RT_ABI Fcomplex
 __mulsc3(float __a, float __b, float __c, float __d)
 {
     float __ac = __a * __c;
     float __bd = __b * __d;
     float __ad = __a * __d;
     float __bc = __b * __c;
-    float _Complex z;
-    __real__ z = __ac - __bd;
-    __imag__ z = __ad + __bc;
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Fcomplex z;
+    COMPLEX_REAL(z) = __ac - __bd;
+    COMPLEX_IMAGINARY(z) = __ad + __bc;
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         int __recalc = 0;
         if (crt_isinf(__a) || crt_isinf(__b))
@@ -65,8 +65,8 @@
         }
         if (__recalc)
         {
-            __real__ z = CRT_INFINITY * (__a * __c - __b * __d);
-            __imag__ z = CRT_INFINITY * (__a * __d + __b * __c);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
         }
     }
     return z;
diff --git a/lib/builtins/multc3.c b/lib/builtins/multc3.c
new file mode 100644
index 0000000..0518bc2
--- /dev/null
+++ b/lib/builtins/multc3.c
@@ -0,0 +1,68 @@
+/* ===-- multc3.c - Implement __multc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __multc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the product of a + ib and c + id */
+
+COMPILER_RT_ABI long double _Complex
+__multc3(long double a, long double b, long double c, long double d)
+{
+    long double ac = a * c;
+    long double bd = b * d;
+    long double ad = a * d;
+    long double bc = b * c;
+    long double _Complex z;
+    __real__ z = ac - bd;
+    __imag__ z = ad + bc;
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) {
+        int recalc = 0;
+        if (crt_isinf(a) || crt_isinf(b)) {
+            a = crt_copysignl(crt_isinf(a) ? 1 : 0, a);
+            b = crt_copysignl(crt_isinf(b) ? 1 : 0, b);
+            if (crt_isnan(c))
+                c = crt_copysignl(0, c);
+            if (crt_isnan(d))
+                d = crt_copysignl(0, d);
+            recalc = 1;
+        }
+        if (crt_isinf(c) || crt_isinf(d)) {
+            c = crt_copysignl(crt_isinf(c) ? 1 : 0, c);
+            d = crt_copysignl(crt_isinf(d) ? 1 : 0, d);
+            if (crt_isnan(a))
+                a = crt_copysignl(0, a);
+            if (crt_isnan(b))
+                b = crt_copysignl(0, b);
+            recalc = 1;
+        }
+        if (!recalc && (crt_isinf(ac) || crt_isinf(bd) ||
+                          crt_isinf(ad) || crt_isinf(bc))) {
+            if (crt_isnan(a))
+                a = crt_copysignl(0, a);
+            if (crt_isnan(b))
+                b = crt_copysignl(0, b);
+            if (crt_isnan(c))
+                c = crt_copysignl(0, c);
+            if (crt_isnan(d))
+                d = crt_copysignl(0, d);
+            recalc = 1;
+        }
+        if (recalc) {
+            __real__ z = CRT_INFINITY * (a * c - b * d);
+            __imag__ z = CRT_INFINITY * (a * d + b * c);
+        }
+    }
+    return z;
+}
diff --git a/lib/builtins/mulxc3.c b/lib/builtins/mulxc3.c
index 161fd0c..ba32216 100644
--- a/lib/builtins/mulxc3.c
+++ b/lib/builtins/mulxc3.c
@@ -19,17 +19,17 @@
 
 /* Returns: the product of a + ib and c + id */
 
-COMPILER_RT_ABI long double _Complex
+COMPILER_RT_ABI Lcomplex
 __mulxc3(long double __a, long double __b, long double __c, long double __d)
 {
     long double __ac = __a * __c;
     long double __bd = __b * __d;
     long double __ad = __a * __d;
     long double __bc = __b * __c;
-    long double _Complex z;
-    __real__ z = __ac - __bd;
-    __imag__ z = __ad + __bc;
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Lcomplex z;
+    COMPLEX_REAL(z) = __ac - __bd;
+    COMPLEX_IMAGINARY(z) = __ad + __bc;
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         int __recalc = 0;
         if (crt_isinf(__a) || crt_isinf(__b))
@@ -67,8 +67,8 @@
         }
         if (__recalc)
         {
-            __real__ z = CRT_INFINITY * (__a * __c - __b * __d);
-            __imag__ z = CRT_INFINITY * (__a * __d + __b * __c);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
         }
     }
     return z;
diff --git a/lib/builtins/ppc/DD.h b/lib/builtins/ppc/DD.h
index fc3e41c..3e5f9e5 100644
--- a/lib/builtins/ppc/DD.h
+++ b/lib/builtins/ppc/DD.h
@@ -1,5 +1,5 @@
-#ifndef __DD_HEADER
-#define __DD_HEADER
+#ifndef COMPILERRT_DD_HEADER
+#define COMPILERRT_DD_HEADER
 
 #include "../int_lib.h"
 
@@ -9,7 +9,7 @@
 		double hi;
 		double lo;
 	}s;
-}DD;
+} DD;
 
 typedef union { 
 	double d;
@@ -19,28 +19,27 @@
 #define LOWORDER(xy,xHi,xLo,yHi,yLo) \
 	(((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo))
 
-static inline double __attribute__((always_inline))
-local_fabs(double x)
-{
-	doublebits result = { .d = x };
-	result.x &= UINT64_C(0x7fffffffffffffff);
-	return result.d;
+static __inline ALWAYS_INLINE double local_fabs(double x) {
+  doublebits result = {.d = x};
+  result.x &= UINT64_C(0x7fffffffffffffff);
+  return result.d;
 }
 
-static inline double __attribute__((always_inline))
-high26bits(double x)
-{
-	doublebits result = { .d = x };
-	result.x &= UINT64_C(0xfffffffff8000000);
-	return result.d;
+static __inline ALWAYS_INLINE double high26bits(double x) {
+  doublebits result = {.d = x};
+  result.x &= UINT64_C(0xfffffffff8000000);
+  return result.d;
 }
 
-static inline int __attribute__((always_inline))
-different_sign(double x, double y)
-{
-	doublebits xsignbit = { .d = x }, ysignbit = { .d = y };
-	int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63);
-	return result;
+static __inline ALWAYS_INLINE int different_sign(double x, double y) {
+  doublebits xsignbit = {.d = x}, ysignbit = {.d = y};
+  int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63);
+  return result;
 }
 
-#endif /* __DD_HEADER */
+long double __gcc_qadd(long double, long double);
+long double __gcc_qsub(long double, long double);
+long double __gcc_qmul(long double, long double);
+long double __gcc_qdiv(long double, long double);
+
+#endif /* COMPILERRT_DD_HEADER */
diff --git a/lib/builtins/ppc/divtc3.c b/lib/builtins/ppc/divtc3.c
index 2991281..8ec41c5 100644
--- a/lib/builtins/ppc/divtc3.c
+++ b/lib/builtins/ppc/divtc3.c
@@ -14,11 +14,6 @@
     (x).s.lo = 0.0;                                                     \
   }
 
-long double __gcc_qadd(long double, long double);
-long double __gcc_qsub(long double, long double);
-long double __gcc_qmul(long double, long double);
-long double __gcc_qdiv(long double, long double);
-
 long double _Complex
 __divtc3(long double a, long double b, long double c, long double d)
 {
diff --git a/lib/builtins/ppc/multc3.c b/lib/builtins/ppc/multc3.c
index 738b65a..9dd79c9 100644
--- a/lib/builtins/ppc/multc3.c
+++ b/lib/builtins/ppc/multc3.c
@@ -17,10 +17,6 @@
     }                                                                   \
   }
 
-long double __gcc_qadd(long double, long double);
-long double __gcc_qsub(long double, long double);
-long double __gcc_qmul(long double, long double);
-
 long double _Complex
 __multc3(long double a, long double b, long double c, long double d)
 {
diff --git a/lib/builtins/subdf3.c b/lib/builtins/subdf3.c
index 089e062..7a79e5e 100644
--- a/lib/builtins/subdf3.c
+++ b/lib/builtins/subdf3.c
@@ -23,4 +23,3 @@
     return __adddf3(a, fromRep(toRep(b) ^ signBit));
 }
 
-/* FIXME: rsub for ARM EABI */
diff --git a/lib/builtins/subsf3.c b/lib/builtins/subsf3.c
index 47f5e5e..c3b8514 100644
--- a/lib/builtins/subsf3.c
+++ b/lib/builtins/subsf3.c
@@ -23,4 +23,3 @@
     return __addsf3(a, fromRep(toRep(b) ^ signBit));
 }
 
-/* FIXME: rsub for ARM EABI */
diff --git a/lib/builtins/truncdfhf2.c b/lib/builtins/truncdfhf2.c
index 0852df3..17195cd 100644
--- a/lib/builtins/truncdfhf2.c
+++ b/lib/builtins/truncdfhf2.c
@@ -11,6 +11,8 @@
 #define DST_HALF
 #include "fp_trunc_impl.inc"
 
+ARM_EABI_FNALIAS(d2h, truncdfhf2)
+
 COMPILER_RT_ABI uint16_t __truncdfhf2(double a) {
     return __truncXfYf2__(a);
 }
diff --git a/lib/builtins/truncsfhf2.c b/lib/builtins/truncsfhf2.c
index 381e590..9d61895 100644
--- a/lib/builtins/truncsfhf2.c
+++ b/lib/builtins/truncsfhf2.c
@@ -11,9 +11,11 @@
 #define DST_HALF
 #include "fp_trunc_impl.inc"
 
+ARM_EABI_FNALIAS(f2h, truncsfhf2)
+
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
-COMPILER_RT_ABI __attribute__((noinline)) uint16_t __truncsfhf2(float a) {
+COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
     return __truncXfYf2__(a);
 }
 
diff --git a/lib/builtins/x86_64/chkstk.S b/lib/builtins/x86_64/chkstk.S
new file mode 100644
index 0000000..4149ac6
--- /dev/null
+++ b/lib/builtins/x86_64/chkstk.S
@@ -0,0 +1,39 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// _chkstk routine
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+// Notes from r227519
+// MSVC x64s __chkstk and cygmings ___chkstk_ms do not adjust %rsp
+// themselves. It also does not clobber %rax so we can reuse it when
+// adjusting %rsp.
+
+#ifdef __x86_64__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(___chkstk_ms)
+        push   %rcx
+        push   %rax
+        cmp    $0x1000,%rax
+        lea    24(%rsp),%rcx
+        jb     1f
+2:
+        sub    $0x1000,%rcx
+        test   %rcx,(%rcx)
+        sub    $0x1000,%rax
+        cmp    $0x1000,%rax
+        ja     2b
+1:
+        sub    %rax,%rcx
+        test   %rcx,(%rcx)
+        pop    %rax
+        pop    %rcx
+        ret
+END_COMPILERRT_FUNCTION(___chkstk_ms)
+
+#endif // __x86_64__
diff --git a/lib/builtins/x86_64/chkstk2.S b/lib/builtins/x86_64/chkstk2.S
new file mode 100644
index 0000000..ac1eb92
--- /dev/null
+++ b/lib/builtins/x86_64/chkstk2.S
@@ -0,0 +1,42 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+#ifdef __x86_64__
+
+// _chkstk (_alloca) routine - probe stack between %rsp and (%rsp-%rax) in 4k increments,
+// then decrement %rsp by %rax.  Preserves all registers except %rsp and flags.
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__alloca)
+        mov    %rcx,%rax        // x64 _alloca is a normal function with parameter in rcx
+        // fallthrough
+DEFINE_COMPILERRT_FUNCTION(___chkstk)
+        push   %rcx
+        cmp    $0x1000,%rax
+        lea    16(%rsp),%rcx     // rsp before calling this routine -> rcx
+        jb     1f
+2:
+        sub    $0x1000,%rcx
+        test   %rcx,(%rcx)
+        sub    $0x1000,%rax
+        cmp    $0x1000,%rax
+        ja     2b
+1:
+        sub    %rax,%rcx
+        test   %rcx,(%rcx)
+
+        lea    8(%rsp),%rax     // load pointer to the return address into rax
+        mov    %rcx,%rsp        // install the new top of stack pointer into rsp
+        mov    -8(%rax),%rcx    // restore rcx
+        push   (%rax)           // push return address onto the stack
+        sub    %rsp,%rax        // restore the original value in rax
+        ret
+END_COMPILERRT_FUNCTION(___chkstk)
+END_COMPILERRT_FUNCTION(__alloca)
+
+#endif // __x86_64__