Merge "ART: x86_64 disassembler improvements"
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 83c536f..d65f11a 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -119,18 +119,29 @@
 endif
 
 # Clang build support.
-# Target builds use GCC by default.
-ART_TARGET_CLANG := false
+
+# Host.
 ART_HOST_CLANG := false
 ifneq ($(WITHOUT_HOST_CLANG),true)
   # By default, host builds use clang for better warnings.
   ART_HOST_CLANG := true
 endif
 
-# enable ART_TARGET_CLANG for ARM64
-ifneq (,$(filter $(TARGET_ARCH),arm64))
-ART_TARGET_CLANG := true
-endif
+# Clang on the target: only enabled for ARM64. Target builds use GCC by default.
+ART_TARGET_CLANG :=
+ART_TARGET_CLANG_arm :=
+ART_TARGET_CLANG_arm64 := true
+ART_TARGET_CLANG_mips :=
+ART_TARGET_CLANG_x86 :=
+ART_TARGET_CLANG_x86_64 :=
+
+define set-target-local-clang-vars
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
+    $(foreach arch,$(ART_SUPPORTED_ARCH),
+    	ifneq ($$(ART_TARGET_CLANG_$(arch)),)
+        LOCAL_CLANG_$(arch) := $$(ART_TARGET_CLANG_$(arch))
+      endif)
+endef
 
 # directory used for dalvik-cache on device
 ART_DALVIK_CACHE_DIR := /data/dalvik-cache
@@ -190,13 +201,18 @@
 	-Wstrict-aliasing \
 	-fstrict-aliasing
 
+ART_TARGET_CLANG_CFLAGS :=
+ART_TARGET_CLANG_CFLAGS_arm :=
+ART_TARGET_CLANG_CFLAGS_arm64 :=
+ART_TARGET_CLANG_CFLAGS_mips :=
+ART_TARGET_CLANG_CFLAGS_x86 :=
+ART_TARGET_CLANG_CFLAGS_x86_64 :=
+
 # these are necessary for Clang ARM64 ART builds
-ifeq ($(ART_TARGET_CLANG), true)
-art_cflags += \
+ART_TARGET_CLANG_CFLAGS_arm64  += \
 	-Wno-implicit-exception-spec-mismatch \
 	-DNVALGRIND \
 	-Wno-unused-value
-endif
 
 ifeq ($(ART_SMALL_MODE),true)
   art_cflags += -DART_SMALL_MODE=1
@@ -215,10 +231,8 @@
 	-O3
 
 # FIXME: upstream LLVM has a vectorizer bug that needs to be fixed
-ifeq ($(ART_TARGET_CLANG),true)
-art_non_debug_cflags += \
-        -fno-vectorize
-endif
+ART_TARGET_CLANG_CFLAGS_arm64 += \
+	-fno-vectorize
 
 art_debug_cflags := \
 	-O1 \
@@ -296,6 +310,24 @@
 
 ART_TARGET_DEBUG_CFLAGS := $(art_debug_cflags)
 
+# $(1): ndebug_or_debug
+define set-target-local-cflags-vars
+    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
+    LOCAL_CFLAGS_x86 += $(ART_TARGET_CFLAGS_x86)
+    art_target_cflags_ndebug_or_debug := $(1)
+    ifeq ($$(art_target_cflags_ndebug_or_debug),debug)
+      LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
+    endif
+
+    # TODO: Also set when ART_TARGET_CLANG_$(arch)!=false and ART_TARGET_CLANG==true
+    $(foreach arch,$(ART_SUPPORTED_ARCH),
+    	ifeq ($$(ART_TARGET_CLANG_$(arch)),true)
+        LOCAL_CFLAGS_$(arch) += $$(ART_TARGET_CLANG_CFLAGS_$(arch))
+      endif)
+endef
+
 ART_BUILD_TARGET := false
 ART_BUILD_HOST := false
 ART_BUILD_NDEBUG := false
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index a186e85..49e7384 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -66,14 +66,8 @@
 
   LOCAL_CFLAGS := $(ART_EXECUTABLES_CFLAGS)
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
-    LOCAL_CFLAGS_x86 += $(ART_TARGET_CFLAGS_x86)
-    ifeq ($$(art_ndebug_or_debug),debug)
-      LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
-    else
-      LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
-    endif
+  	$(call set-target-local-clang-vars)
+  	$(call set-target-local-cflags-vars,$(6))
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 9f1d0f1..314d672 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -187,16 +187,15 @@
   LOCAL_CFLAGS := $(ART_TEST_CFLAGS)
   include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
-    LOCAL_CFLAGS_x86 := $(ART_TARGET_CFLAGS_x86)
+  	$(call set-target-local-clang-vars)
+  	$(call set-target-local-cflags-vars,debug)
     LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl
     LOCAL_STATIC_LIBRARIES += libgtest_libc++
     LOCAL_MODULE_PATH_32 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
     include $(BUILD_EXECUTABLE)
-    
+
     ART_TARGET_GTEST_EXECUTABLES$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$(TARGET_ARCH)/$$(LOCAL_MODULE)
     art_gtest_target := test-art-$$(art_target_or_host)-gtest-$$(art_gtest_name)
 
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index c080928..b4c99b5 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -49,9 +49,8 @@
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/build/Android.libarttest.mk
   include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    LOCAL_CFLAGS := $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
-    LOCAL_CFLAGS_x86 := $(ART_TARGET_CFLAGS_x86)
+  	$(call set-target-local-clang-vars)
+  	$(call set-target-local-cflags-vars,debug)
     LOCAL_SHARED_LIBRARIES += libdl libcutils
     LOCAL_STATIC_LIBRARIES := libgtest
     LOCAL_MULTILIB := both
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 3bed01d..4d42215 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -177,8 +177,10 @@
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
+    LOCAL_SHARED_LIBRARIES += libart
   else # debug
     LOCAL_MODULE := libartd-compiler
+    LOCAL_SHARED_LIBRARIES += libartd
   endif
 
   LOCAL_MODULE_TAGS := optional
@@ -200,32 +202,21 @@
   LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS)
   include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
+    $(call set-target-local-clang-vars)
+    $(call set-target-local-cflags-vars,$(2))
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+    ifeq ($$(art_ndebug_or_debug),debug)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+    endif
   endif
 
   # TODO: clean up the compilers and remove this.
   LOCAL_CFLAGS += -Wno-unused-parameter
 
-  LOCAL_SHARED_LIBRARIES += liblog
-  ifeq ($$(art_ndebug_or_debug),debug)
-    ifeq ($$(art_target_or_host),target)
-      LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
-    else # host
-      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
-    endif
-    LOCAL_SHARED_LIBRARIES += libartd
-  else
-    ifeq ($$(art_target_or_host),target)
-      LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
-    else # host
-      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
-    endif
-    LOCAL_SHARED_LIBRARIES += libart
-  endif
   ifeq ($(ART_USE_PORTABLE_COMPILER),true)
     LOCAL_SHARED_LIBRARIES += libLLVM
     LOCAL_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 33084df..1284a97 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -95,14 +95,26 @@
     // All predecessors have already been visited because we are visiting in reverse post order.
     // We merge the values of all locals, creating phis if those values differ.
     for (size_t local = 0; local < current_locals_->Size(); local++) {
+      bool one_predecessor_has_no_value = false;
       bool is_different = false;
       HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(0), local);
-      for (size_t i = 1; i < block->GetPredecessors().Size(); i++) {
-        if (ValueOfLocal(block->GetPredecessors().Get(i), local) != value) {
+
+      for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
+        HInstruction* current = ValueOfLocal(block->GetPredecessors().Get(i), local);
+        if (current == nullptr) {
+//          one_predecessor_has_no_value = true;
+//          break;
+        } else if (current != value) {
           is_different = true;
-          break;
         }
       }
+
+      if (one_predecessor_has_no_value) {
+        // If one predecessor has no value for this local, we trust the verifier has
+        // successfully checked that there is a store dominating any read after this block.
+        continue;
+      }
+
       if (is_different) {
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, block->GetPredecessors().Size(), Primitive::kPrimVoid);
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index d104619..485ea27 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -459,4 +459,34 @@
   TestCode(data, expected);
 }
 
+TEST(SsaTest, LocalInIf) {
+  // Test that we do not create a phi in the join block when one predecessor
+  // does not update the local.
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  0: IntConstant 0 [3, 3]\n"
+    "  1: IntConstant 4\n"
+    "  2: Goto\n"
+    "BasicBlock 1, pred: 0, succ: 2, 5\n"
+    "  3: Equal(0, 0) [4]\n"
+    "  4: If(3)\n"
+    "BasicBlock 2, pred: 1, succ: 3\n"
+    "  5: Goto\n"
+    "BasicBlock 3, pred: 2, 5, succ: 4\n"
+    "  6: ReturnVoid\n"
+    "BasicBlock 4, pred: 3\n"
+    "  7: Exit\n"
+    // Synthesized block to avoid critical edge.
+    "BasicBlock 5, pred: 1, succ: 3\n"
+    "  8: Goto\n";
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::CONST_4 | 4 << 12 | 1 << 8,
+    Instruction::RETURN_VOID);
+
+  TestCode(data, expected);
+}
+
 }  // namespace art
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index 814323c..b4b194d 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -59,27 +59,22 @@
   LOCAL_SRC_FILES := $$(LIBART_DISASSEMBLER_SRC_FILES)
 
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
+  	$(call set-target-local-clang-vars)
+  	$(call set-target-local-cflags-vars,$(2))
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+    ifeq ($$(art_ndebug_or_debug),debug)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+    endif
   endif
 
   LOCAL_SHARED_LIBRARIES += liblog
   ifeq ($$(art_ndebug_or_debug),debug)
-    ifeq ($$(art_target_or_host),target)
-      LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
-    else # host
-      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
-    endif
     LOCAL_SHARED_LIBRARIES += libartd
   else
-    ifeq ($$(art_target_or_host),target)
-      LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
-    else # host
-      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
-    endif
     LOCAL_SHARED_LIBRARIES += libart
   endif
 
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 17f0493..7a832c1 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -314,7 +314,6 @@
 
 # $(1): target or host
 # $(2): ndebug or debug
-# $(3): true or false for LOCAL_CLANG
 define build-libart
   ifneq ($(1),target)
     ifneq ($(1),host)
@@ -326,15 +325,9 @@
       $$(error expected ndebug or debug for argument 2, received $(2))
     endif
   endif
-  ifneq ($(3),true)
-    ifneq ($(3),false)
-      $$(error expected true or false for argument 3, received $(3))
-    endif
-  endif
 
   art_target_or_host := $(1)
   art_ndebug_or_debug := $(2)
-  art_clang := $(3)
 
   include $(CLEAR_VARS)
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
@@ -374,31 +367,32 @@
   $(foreach arch,$(ART_SUPPORTED_ARCH),
     LOCAL_LDFLAGS_$(arch) := $$(LIBART_TARGET_LDFLAGS_$(arch)))
 
-  ifeq ($$(art_clang),false)
-    LOCAL_SRC_FILES += $(LIBART_GCC_ONLY_SRC_FILES)
-  else
-    LOCAL_CLANG := true
-  endif
+  # Clang usage
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
+    $(call set-target-local-clang-vars)
+    $(call set-target-local-cflags-vars,$(2))
+    # TODO: Loop with ifeq, ART_TARGET_CLANG
+    ifneq ($$(ART_TARGET_CLANG_$(TARGET_ARCH)),true)
+      LOCAL_SRC_FILES_$(TARGET_ARCH) += $(LIBART_GCC_ONLY_SRC_FILES)
+    endif
+    ifneq ($$(ART_TARGET_CLANG_$(TARGET_2ND_ARCH)),true)
+      LOCAL_SRC_FILES_$(TARGET_2ND_ARCH) += $(LIBART_GCC_ONLY_SRC_FILES)
+    endif
   else # host
+    LOCAL_CLANG := $(ART_HOST_CLANG)
+    ifeq ($(ART_HOST_CLANG),false)
+      LOCAL_SRC_FILES += $(LIBART_GCC_ONLY_SRC_FILES)
+    endif
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
-  endif
-  ifeq ($$(art_ndebug_or_debug),debug)
-    ifeq ($$(art_target_or_host),target)
-      LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
-    else # host
+    ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
       LOCAL_LDLIBS += $(ART_HOST_DEBUG_LDLIBS)
       LOCAL_STATIC_LIBRARIES := libgtest_host
-    endif
-  else
-    ifeq ($$(art_target_or_host),target)
-      LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
-    else # host
+    else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
     endif
   endif
+
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES)
   LOCAL_C_INCLUDES += art/sigchainlib
 
@@ -446,17 +440,17 @@
 # they are used to cross compile for the target.
 ifeq ($(WITH_HOST_DALVIK),true)
   ifeq ($(ART_BUILD_NDEBUG),true)
-    $(eval $(call build-libart,host,ndebug,$(ART_HOST_CLANG)))
+    $(eval $(call build-libart,host,ndebug))
   endif
   ifeq ($(ART_BUILD_DEBUG),true)
-    $(eval $(call build-libart,host,debug,$(ART_HOST_CLANG)))
+    $(eval $(call build-libart,host,debug))
   endif
 endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-  $(eval $(call build-libart,target,ndebug,$(ART_TARGET_CLANG)))
+  $(eval $(call build-libart,target,ndebug))
 endif
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
-  $(eval $(call build-libart,target,debug,$(ART_TARGET_CLANG)))
+  $(eval $(call build-libart,target,debug))
 endif
 
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 44edd4b..0b7f268c 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -125,27 +125,24 @@
         : );  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
-        // Spill space for d8 - d15
+        // Spill x0-x7 which we say we don't clobber. May contain args.
         "sub sp, sp, #64\n\t"
         ".cfi_adjust_cfa_offset 64\n\t"
-        "stp d8, d9,   [sp]\n\t"
-        "stp d10, d11, [sp, #16]\n\t"
-        "stp d12, d13, [sp, #32]\n\t"
-        "stp d14, d15, [sp, #48]\n\t"
+        "stp x0, x1, [sp]\n\t"
+        "stp x2, x3, [sp, #16]\n\t"
+        "stp x4, x5, [sp, #32]\n\t"
+        "stp x6, x7, [sp, #48]\n\t"
 
-        "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
-        ".cfi_adjust_cfa_offset 48\n\t"
-        "stp %[referrer], x1, [sp]\n\t"// referrer, x1
-        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
-        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+        "sub sp, sp, #16\n\t"          // Reserve stack space, 16B aligned
+        ".cfi_adjust_cfa_offset 16\n\t"
+        "str %[referrer], [sp]\n\t"    // referrer
 
         // Push everything on the stack, so we don't rely on the order. What a mess. :-(
         "sub sp, sp, #48\n\t"
         ".cfi_adjust_cfa_offset 48\n\t"
-        "str %[arg0], [sp]\n\t"
-        "str %[arg1], [sp, #8]\n\t"
-        "str %[arg2], [sp, #16]\n\t"
-        "str %[code], [sp, #24]\n\t"
+        // All things are "r" constraints, so direct str/stp should work.
+        "stp %[arg0], %[arg1], [sp]\n\t"
+        "stp %[arg2], %[code], [sp, #16]\n\t"
         "str %[self], [sp, #32]\n\t"
 
         // Now we definitely have x0-x3 free, use it to garble d8 - d15
@@ -169,17 +166,18 @@
         "add x0, x0, 1\n\t"
         "fmov d15, x0\n\t"
 
-        // Load call params
-        "ldr x0, [sp]\n\t"
-        "ldr x1, [sp, #8]\n\t"
-        "ldr x2, [sp, #16]\n\t"
-        "ldr x3, [sp, #24]\n\t"
+        // Load call params into the right registers.
+        "ldp x0, x1, [sp]\n\t"
+        "ldp x2, x3, [sp, #16]\n\t"
         "ldr x18, [sp, #32]\n\t"
         "add sp, sp, #48\n\t"
         ".cfi_adjust_cfa_offset -48\n\t"
 
 
         "blr x3\n\t"              // Call the stub
+        "mov x8, x0\n\t"          // Store result
+        "add sp, sp, #16\n\t"     // Drop the quick "frame"
+        ".cfi_adjust_cfa_offset -16\n\t"
 
         // Test d8 - d15. We can use x1 and x2.
         "movk x1, #0xfad0\n\t"
@@ -225,31 +223,25 @@
         "cmp x1, x2\n\t"
         "b.ne 1f\n\t"
 
-        "mov x2, #0\n\t"
-        "str x2, %[fpr_result]\n\t"
+        "mov x9, #0\n\t"              // Use x9 as flag, in clobber list
 
         // Finish up.
         "2:\n\t"
-        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
-        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
-        "ldr x30, [sp, #40]\n\t"       // Restore xLR
-        "add sp, sp, #48\n\t"          // Free stack space
-        ".cfi_adjust_cfa_offset -48\n\t"
-        "mov %[result], x0\n\t"        // Save the result
-
-        "ldp d8, d9,   [sp]\n\t"       // Restore d8 - d15
-        "ldp d10, d11, [sp, #16]\n\t"
-        "ldp d12, d13, [sp, #32]\n\t"
-        "ldp d14, d15, [sp, #48]\n\t"
-        "add sp, sp, #64\n\t"
+        "ldp x0, x1, [sp]\n\t"        // Restore stuff not named clobbered, may contain fpr_result
+        "ldp x2, x3, [sp, #16]\n\t"
+        "ldp x4, x5, [sp, #32]\n\t"
+        "ldp x6, x7, [sp, #48]\n\t"
+        "add sp, sp, #64\n\t"         // Free stack space, now sp as on entry
         ".cfi_adjust_cfa_offset -64\n\t"
 
+        "str x9, %[fpr_result]\n\t"   // Store the FPR comparison result
+        "mov %[result], x8\n\t"              // Store the call result
+
         "b 3f\n\t"                     // Goto end
 
         // Failed fpr verification.
         "1:\n\t"
-        "mov x2, #1\n\t"
-        "str x2, %[fpr_result]\n\t"
+        "mov x9, #1\n\t"
         "b 2b\n\t"                     // Goto finish-up
 
         // End
@@ -258,7 +250,12 @@
           // Use the result from r0
         : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer), [fpr_result] "m" (fpr_result)
-        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+        : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20",
+          "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x30",
+          "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+          "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+          "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -351,29 +348,25 @@
             : );  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
-        // Spill space for d8 - d15
+        // Spill x0-x7 which we say we don't clobber. May contain args.
         "sub sp, sp, #64\n\t"
         ".cfi_adjust_cfa_offset 64\n\t"
-        "stp d8, d9,   [sp]\n\t"
-        "stp d10, d11, [sp, #16]\n\t"
-        "stp d12, d13, [sp, #32]\n\t"
-        "stp d14, d15, [sp, #48]\n\t"
+        "stp x0, x1, [sp]\n\t"
+        "stp x2, x3, [sp, #16]\n\t"
+        "stp x4, x5, [sp, #32]\n\t"
+        "stp x6, x7, [sp, #48]\n\t"
 
-        "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
-        ".cfi_adjust_cfa_offset 48\n\t"
-        "stp %[referrer], x1, [sp]\n\t"// referrer, x1
-        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
-        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+        "sub sp, sp, #16\n\t"          // Reserve stack space, 16B aligned
+        ".cfi_adjust_cfa_offset 16\n\t"
+        "str %[referrer], [sp]\n\t"    // referrer
 
         // Push everything on the stack, so we don't rely on the order. What a mess. :-(
         "sub sp, sp, #48\n\t"
         ".cfi_adjust_cfa_offset 48\n\t"
-        "str %[arg0], [sp]\n\t"
-        "str %[arg1], [sp, #8]\n\t"
-        "str %[arg2], [sp, #16]\n\t"
-        "str %[code], [sp, #24]\n\t"
-        "str %[self], [sp, #32]\n\t"
-        "str %[hidden], [sp, #40]\n\t"
+        // All things are "r" constraints, so direct str/stp should work.
+        "stp %[arg0], %[arg1], [sp]\n\t"
+        "stp %[arg2], %[code], [sp, #16]\n\t"
+        "stp %[self], %[hidden], [sp, #32]\n\t"
 
         // Now we definitely have x0-x3 free, use it to garble d8 - d15
         "movk x0, #0xfad0\n\t"
@@ -396,18 +389,17 @@
         "add x0, x0, 1\n\t"
         "fmov d15, x0\n\t"
 
-        // Load call params
-        "ldr x0, [sp]\n\t"
-        "ldr x1, [sp, #8]\n\t"
-        "ldr x2, [sp, #16]\n\t"
-        "ldr x3, [sp, #24]\n\t"
-        "ldr x18, [sp, #32]\n\t"
-        "ldr x12, [sp, #40]\n\t"
+        // Load call params into the right registers.
+        "ldp x0, x1, [sp]\n\t"
+        "ldp x2, x3, [sp, #16]\n\t"
+        "ldp x18, x12, [sp, #32]\n\t"
         "add sp, sp, #48\n\t"
         ".cfi_adjust_cfa_offset -48\n\t"
 
-
         "blr x3\n\t"              // Call the stub
+        "mov x8, x0\n\t"          // Store result
+        "add sp, sp, #16\n\t"     // Drop the quick "frame"
+        ".cfi_adjust_cfa_offset -16\n\t"
 
         // Test d8 - d15. We can use x1 and x2.
         "movk x1, #0xfad0\n\t"
@@ -453,38 +445,39 @@
         "cmp x1, x2\n\t"
         "b.ne 1f\n\t"
 
-        "mov %[fpr_result], #0\n\t"
+        "mov x9, #0\n\t"              // Use x9 as flag, in clobber list
 
         // Finish up.
         "2:\n\t"
-        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
-        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
-        "ldr x30, [sp, #40]\n\t"       // Restore xLR
-        "add sp, sp, #48\n\t"          // Free stack space
-        ".cfi_adjust_cfa_offset -48\n\t"
-        "mov %[result], x0\n\t"        // Save the result
-
-        "ldp d8, d9,   [sp]\n\t"       // Restore d8 - d15
-        "ldp d10, d11, [sp, #16]\n\t"
-        "ldp d12, d13, [sp, #32]\n\t"
-        "ldp d14, d15, [sp, #48]\n\t"
-        "add sp, sp, #64\n\t"
+        "ldp x0, x1, [sp]\n\t"        // Restore stuff not named clobbered, may contain fpr_result
+        "ldp x2, x3, [sp, #16]\n\t"
+        "ldp x4, x5, [sp, #32]\n\t"
+        "ldp x6, x7, [sp, #48]\n\t"
+        "add sp, sp, #64\n\t"         // Free stack space, now sp as on entry
         ".cfi_adjust_cfa_offset -64\n\t"
 
+        "str x9, %[fpr_result]\n\t"   // Store the FPR comparison result
+        "mov %[result], x8\n\t"              // Store the call result
+
         "b 3f\n\t"                     // Goto end
 
         // Failed fpr verification.
         "1:\n\t"
-        "mov %[fpr_result], #1\n\t"
+        "mov x9, #1\n\t"
         "b 2b\n\t"                     // Goto finish-up
 
         // End
         "3:\n\t"
-        : [result] "=r" (result), [fpr_result] "=r" (fpr_result)
-        // Use the result from r0
+        : [result] "=r" (result)
+          // Use the result from r0
         : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
-          [referrer] "r"(referrer), [hidden] "r"(hidden)
-        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+          [referrer] "r"(referrer), [hidden] "r"(hidden), [fpr_result] "m" (fpr_result)
+        : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20",
+          "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x30",
+          "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+          "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+          "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
     // TODO: Set the thread?
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index a0a294a..8588c3a 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -35,6 +35,7 @@
 #include "profiler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
+#include "ScopedFd.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "well_known_classes.h"
@@ -220,8 +221,8 @@
 
 // Copy a profile file
 static void CopyProfileFile(const char* oldfile, const char* newfile) {
-  int fd = open(oldfile, O_RDONLY);
-  if (fd < 0) {
+  ScopedFd fd(open(oldfile, O_RDONLY));
+  if (fd.get() == -1) {
     // If we can't open the file show the uid:gid of the this process to allow
     // diagnosis of the problem.
     LOG(ERROR) << "Failed to open profile file " << oldfile<< ".  My uid:gid is "
@@ -230,8 +231,8 @@
   }
 
   // Create the copy with rw------- (only accessible by system)
-  int fd2 = open(newfile, O_WRONLY|O_CREAT|O_TRUNC, 0600);
-  if (fd2 < 0) {
+  ScopedFd fd2(open(newfile, O_WRONLY|O_CREAT|O_TRUNC, 0600));
+  if (fd2.get()  == -1) {
     // If we can't open the file show the uid:gid of the this process to allow
     // diagnosis of the problem.
     LOG(ERROR) << "Failed to create/write prev profile file " << newfile << ".  My uid:gid is "
@@ -240,14 +241,12 @@
   }
   char buf[4096];
   while (true) {
-    int n = read(fd, buf, sizeof(buf));
+    int n = read(fd.get(), buf, sizeof(buf));
     if (n <= 0) {
       break;
     }
-    write(fd2, buf, n);
+    write(fd2.get(), buf, n);
   }
-  close(fd);
-  close(fd2);
 }
 
 static double GetDoubleProperty(const char* property, double minValue, double maxValue, double defaultValue) {
diff --git a/test/402-optimizing-control-flow/src/Main.java b/test/402-optimizing-control-flow/src/Main.java
index 3339ef4..c9c24dd 100644
--- a/test/402-optimizing-control-flow/src/Main.java
+++ b/test/402-optimizing-control-flow/src/Main.java
@@ -40,6 +40,9 @@
 
     result = $opt$testForLoop(42);
     expectEquals(44, result);
+
+    result = $opt$testIfWithLocal(5);
+    expectEquals(7, result);
   }
 
   static int $opt$testIfEq1(int a) {
@@ -73,4 +76,14 @@
     for (; a != 44; a++) {}
     return a;
   }
+
+  static int $opt$testIfWithLocal(int a) {
+    if (a == 5) {
+      int f = 2;
+      a += f;
+    }
+    // The SSA builder should not create a phi for f.
+
+    return a;
+  }
 }