Merge "ARM: Specify if some branches go to far targets"
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 6d2b243..117684a 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -54,11 +54,11 @@
 
 inline bool IsAdrpPatch(const LinkerPatch& patch) {
   switch (patch.GetType()) {
-    case LinkerPatch::Type::kMethodRelative:
     case LinkerPatch::Type::kCall:
     case LinkerPatch::Type::kCallRelative:
     case LinkerPatch::Type::kBakerReadBarrierBranch:
       return false;
+    case LinkerPatch::Type::kMethodRelative:
     case LinkerPatch::Type::kTypeRelative:
     case LinkerPatch::Type::kTypeBssEntry:
     case LinkerPatch::Type::kStringRelative:
@@ -567,10 +567,10 @@
       return false;
     }
 
-    // And since LinkerPatch::Type::kStringRelative is using the result of the ADRP
-    // for an ADD immediate, check for that as well. We generalize a bit to include
-    // ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination or stores
-    // the result to a different register.
+    // And since LinkerPatch::Type::k{Method,Type,String}Relative is using the result
+    // of the ADRP for an ADD immediate, check for that as well. We generalize a bit
+    // to include ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination
+    // or stores the result to a different register.
     if ((next_insn & 0x1f000000) == 0x11000000 &&
         ((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) {
       return false;
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 963df5a..94787c9 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -173,6 +173,39 @@
   return false;
 }
 
+// Detect situations with same-extension narrower operands.
+// Returns true on success and sets is_unsigned accordingly.
+static bool IsNarrowerOperands(HInstruction* a,
+                               HInstruction* b,
+                               Primitive::Type type,
+                               /*out*/ HInstruction** r,
+                               /*out*/ HInstruction** s,
+                               /*out*/ bool* is_unsigned) {
+  if (IsSignExtensionAndGet(a, type, r) && IsSignExtensionAndGet(b, type, s)) {
+    *is_unsigned = false;
+    return true;
+  } else if (IsZeroExtensionAndGet(a, type, r) && IsZeroExtensionAndGet(b, type, s)) {
+    *is_unsigned = true;
+    return true;
+  }
+  return false;
+}
+
+// As above, single operand.
+static bool IsNarrowerOperand(HInstruction* a,
+                              Primitive::Type type,
+                              /*out*/ HInstruction** r,
+                              /*out*/ bool* is_unsigned) {
+  if (IsSignExtensionAndGet(a, type, r)) {
+    *is_unsigned = false;
+    return true;
+  } else if (IsZeroExtensionAndGet(a, type, r)) {
+    *is_unsigned = true;
+    return true;
+  }
+  return false;
+}
+
 // Detect up to two instructions a and b, and an acccumulated constant c.
 static bool IsAddConstHelper(HInstruction* instruction,
                              /*out*/ HInstruction** a,
@@ -756,7 +789,7 @@
   return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
 }
 
-// TODO: more operations and intrinsics, detect saturation arithmetic, etc.
+// TODO: saturation arithmetic.
 bool HLoopOptimization::VectorizeUse(LoopNode* node,
                                      HInstruction* instruction,
                                      bool generate_code,
@@ -867,25 +900,38 @@
       return true;
     }
     // Deal with vector restrictions.
+    HInstruction* opa = instruction->InputAt(0);
+    HInstruction* opb = instruction->InputAt(1);
+    HInstruction* r = opa;
+    bool is_unsigned = false;
     if ((HasVectorRestrictions(restrictions, kNoShift)) ||
         (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
       return false;  // unsupported instruction
-    } else if ((instruction->IsShr() || instruction->IsUShr()) &&
-               HasVectorRestrictions(restrictions, kNoHiBits)) {
-      return false;  // hibits may impact lobits; TODO: we can do better!
+    } else if (HasVectorRestrictions(restrictions, kNoHiBits)) {
+      // Shifts right need extra care to account for higher order bits.
+      // TODO: less likely shr/unsigned and ushr/signed can by flipping signess.
+      if (instruction->IsShr() &&
+          (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+        return false;  // reject, unless all operands are sign-extension narrower
+      } else if (instruction->IsUShr() &&
+                 (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || !is_unsigned)) {
+        return false;  // reject, unless all operands are zero-extension narrower
+      }
     }
     // Accept shift operator for vectorizable/invariant operands.
     // TODO: accept symbolic, albeit loop invariant shift factors.
-    HInstruction* opa = instruction->InputAt(0);
-    HInstruction* opb = instruction->InputAt(1);
+    DCHECK(r != nullptr);
+    if (generate_code && vector_mode_ != kVector) {  // de-idiom
+      r = opa;
+    }
     int64_t distance = 0;
-    if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+    if (VectorizeUse(node, r, generate_code, type, restrictions) &&
         IsInt64AndGet(opb, /*out*/ &distance)) {
       // Restrict shift distance to packed data type width.
       int64_t max_distance = Primitive::ComponentSize(type) * 8;
       if (0 <= distance && distance < max_distance) {
         if (generate_code) {
-          GenerateVecOp(instruction, vector_map_->Get(opa), opb, type);
+          GenerateVecOp(instruction, vector_map_->Get(r), opb, type);
         }
         return true;
       }
@@ -899,16 +945,23 @@
       case Intrinsics::kMathAbsFloat:
       case Intrinsics::kMathAbsDouble: {
         // Deal with vector restrictions.
-        if (HasVectorRestrictions(restrictions, kNoAbs) ||
-            HasVectorRestrictions(restrictions, kNoHiBits)) {
-          // TODO: we can do better for some hibits cases.
+        HInstruction* opa = instruction->InputAt(0);
+        HInstruction* r = opa;
+        bool is_unsigned = false;
+        if (HasVectorRestrictions(restrictions, kNoAbs)) {
           return false;
+        } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+                   (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+          return false;  // reject, unless operand is sign-extension narrower
         }
         // Accept ABS(x) for vectorizable operand.
-        HInstruction* opa = instruction->InputAt(0);
-        if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+        DCHECK(r != nullptr);
+        if (generate_code && vector_mode_ != kVector) {  // de-idiom
+          r = opa;
+        }
+        if (VectorizeUse(node, r, generate_code, type, restrictions)) {
           if (generate_code) {
-            GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+            GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type);
           }
           return true;
         }
@@ -923,18 +976,28 @@
       case Intrinsics::kMathMaxFloatFloat:
       case Intrinsics::kMathMaxDoubleDouble: {
         // Deal with vector restrictions.
-        if (HasVectorRestrictions(restrictions, kNoMinMax) ||
-            HasVectorRestrictions(restrictions, kNoHiBits)) {
-          // TODO: we can do better for some hibits cases.
-          return false;
-        }
-        // Accept MIN/MAX(x, y) for vectorizable operands.
         HInstruction* opa = instruction->InputAt(0);
         HInstruction* opb = instruction->InputAt(1);
-        if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
-            VectorizeUse(node, opb, generate_code, type, restrictions)) {
+        HInstruction* r = opa;
+        HInstruction* s = opb;
+        bool is_unsigned = false;
+        if (HasVectorRestrictions(restrictions, kNoMinMax)) {
+          return false;
+        } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+                   !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
+          return false;  // reject, unless all operands are same-extension narrower
+        }
+        // Accept MIN/MAX(x, y) for vectorizable operands.
+        DCHECK(r != nullptr && s != nullptr);
+        if (generate_code && vector_mode_ != kVector) {  // de-idiom
+          r = opa;
+          s = opb;
+        }
+        if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+            VectorizeUse(node, s, generate_code, type, restrictions)) {
           if (generate_code) {
-            GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+            GenerateVecOp(
+                instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
           }
           return true;
         }
@@ -959,11 +1022,11 @@
       switch (type) {
         case Primitive::kPrimBoolean:
         case Primitive::kPrimByte:
-          *restrictions |= kNoDiv | kNoAbs;
+          *restrictions |= kNoDiv;
           return TrySetVectorLength(16);
         case Primitive::kPrimChar:
         case Primitive::kPrimShort:
-          *restrictions |= kNoDiv | kNoAbs;
+          *restrictions |= kNoDiv;
           return TrySetVectorLength(8);
         case Primitive::kPrimInt:
           *restrictions |= kNoDiv;
@@ -1098,13 +1161,14 @@
 void HLoopOptimization::GenerateVecOp(HInstruction* org,
                                       HInstruction* opa,
                                       HInstruction* opb,
-                                      Primitive::Type type) {
+                                      Primitive::Type type,
+                                      bool is_unsigned) {
   if (vector_mode_ == kSequential) {
-    // Scalar code follows implicit integral promotion.
-    if (type == Primitive::kPrimBoolean ||
-        type == Primitive::kPrimByte ||
-        type == Primitive::kPrimChar ||
-        type == Primitive::kPrimShort) {
+    // Non-converting scalar code follows implicit integral promotion.
+    if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean ||
+                                     type == Primitive::kPrimByte ||
+                                     type == Primitive::kPrimChar ||
+                                     type == Primitive::kPrimShort)) {
       type = Primitive::kPrimInt;
     }
   }
@@ -1185,7 +1249,6 @@
           case Intrinsics::kMathMinLongLong:
           case Intrinsics::kMathMinFloatFloat:
           case Intrinsics::kMathMinDoubleDouble: {
-            bool is_unsigned = false;  // TODO: detect unsigned versions
             vector = new (global_allocator_)
                 HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
             break;
@@ -1194,7 +1257,6 @@
           case Intrinsics::kMathMaxLongLong:
           case Intrinsics::kMathMaxFloatFloat:
           case Intrinsics::kMathMaxDoubleDouble: {
-            bool is_unsigned = false;  // TODO: detect unsigned versions
             vector = new (global_allocator_)
                 HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
             break;
@@ -1258,7 +1320,7 @@
                                                  Primitive::Type type,
                                                  uint64_t restrictions) {
   // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
-  // (note whether the sign bit in higher precision is shifted in has no effect
+  // (note whether the sign bit in wider precision is shifted in has no effect
   // on the narrow precision computed by the idiom).
   int64_t distance = 0;
   if ((instruction->IsShr() ||
@@ -1269,6 +1331,7 @@
     HInstruction* b = nullptr;
     int64_t       c = 0;
     if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
+      DCHECK(a != nullptr && b != nullptr);
       // Accept c == 1 (rounded) or c == 0 (not rounded).
       bool is_rounded = false;
       if (c == 1) {
@@ -1280,11 +1343,7 @@
       HInstruction* r = nullptr;
       HInstruction* s = nullptr;
       bool is_unsigned = false;
-      if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) {
-        is_unsigned = true;
-      } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) {
-        is_unsigned = false;
-      } else {
+      if (!IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned)) {
         return false;
       }
       // Deal with vector restrictions.
@@ -1295,6 +1354,10 @@
       // Accept recognized halving add for vectorizable operands. Vectorized code uses the
       // shorthand idiomatic operation. Sequential code uses the original scalar expressions.
       DCHECK(r != nullptr && s != nullptr);
+      if (generate_code && vector_mode_ != kVector) {  // de-idiom
+        r = instruction->InputAt(0);
+        s = instruction->InputAt(1);
+      }
       if (VectorizeUse(node, r, generate_code, type, restrictions) &&
           VectorizeUse(node, s, generate_code, type, restrictions)) {
         if (generate_code) {
@@ -1308,12 +1371,7 @@
                 is_unsigned,
                 is_rounded));
           } else {
-            VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions);
-            VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions);
-            GenerateVecOp(instruction,
-                          vector_map_->Get(instruction->InputAt(0)),
-                          vector_map_->Get(instruction->InputAt(1)),
-                          type);
+            GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
           }
         }
         return true;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6d5978d..35298d4 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -137,7 +137,11 @@
                       HInstruction* opa,
                       HInstruction* opb,
                       Primitive::Type type);
-  void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
+  void GenerateVecOp(HInstruction* org,
+                     HInstruction* opa,
+                     HInstruction* opb,
+                     Primitive::Type type,
+                     bool is_unsigned = false);
 
   // Vectorization idioms.
   bool VectorizeHalvingAddIdiom(LoopNode* node,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index bde7f2c..6899910 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2642,7 +2642,7 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
       return os << "BootImageLinkTimePcRelative";
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
-      return os << "Direct";
+      return os << "DirectAddress";
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
       return os << "DexCachePcRelative";
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 8ee5498..aa7dc65 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -352,6 +352,7 @@
                 "libdl",
                 // For android::FileMap used by libziparchive.
                 "libutils",
+                "libtombstoned_client"
             ],
             static_libs: [
                 // ZipArchive support, the order matters here to get all symbols.
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 5232252..5ce5447 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -1269,6 +1269,7 @@
                                       std::vector<ProfileMethodInfo>& methods) {
   ScopedTrace trace(__FUNCTION__);
   MutexLock mu(Thread::Current(), lock_);
+  uint16_t jit_compile_threshold = Runtime::Current()->GetJITOptions()->GetCompileThreshold();
   for (const ProfilingInfo* info : profiling_infos_) {
     ArtMethod* method = info->GetMethod();
     const DexFile* dex_file = method->GetDexFile();
@@ -1277,6 +1278,16 @@
       continue;
     }
     std::vector<ProfileMethodInfo::ProfileInlineCache> inline_caches;
+
+    // If the method didn't reach the compilation threshold don't save the inline caches.
+    // They might be incomplete and cause unnecessary deoptimizations.
+    // If the inline cache is empty the compiler will generate a regular invoke virtual/interface.
+    if (method->GetCounter() < jit_compile_threshold) {
+      methods.emplace_back(/*ProfileMethodInfo*/
+          dex_file, method->GetDexMethodIndex(), inline_caches);
+      continue;
+    }
+
     for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
       std::vector<ProfileMethodInfo::ProfileClassReference> profile_classes;
       const InlineCache& cache = info->cache_[i];
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index fc91efa..ef4957c 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -238,9 +238,9 @@
       .Define("-Xlockprofthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::LockProfThreshold)
-      .Define("-Xstacktracedir:_")
-          .WithType<std::string>()
-          .IntoKey(M::StackTraceDir)
+      .Define("-Xusetombstonedtraces")
+          .WithValue(true)
+          .IntoKey(M::UseTombstonedTraces)
       .Define("-Xstacktracefile:_")
           .WithType<std::string>()
           .IntoKey(M::StackTraceFile)
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index c46bd8d..968f02a 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -834,7 +834,7 @@
 
 void Runtime::StartSignalCatcher() {
   if (!is_zygote_) {
-    signal_catcher_ = new SignalCatcher(stack_trace_dir_, stack_trace_file_);
+    signal_catcher_ = new SignalCatcher(stack_trace_file_, use_tombstoned_traces_);
   }
 }
 
@@ -1069,7 +1069,11 @@
   abort_ = runtime_options.GetOrDefault(Opt::HookAbort);
 
   default_stack_size_ = runtime_options.GetOrDefault(Opt::StackSize);
-  stack_trace_dir_ = runtime_options.ReleaseOrDefault(Opt::StackTraceDir);
+  use_tombstoned_traces_ = runtime_options.GetOrDefault(Opt::UseTombstonedTraces);
+#if !defined(ART_TARGET_ANDROID)
+  CHECK(!use_tombstoned_traces_)
+      << "-Xusetombstonedtraces is only supported in an Android environment";
+#endif
   stack_trace_file_ = runtime_options.ReleaseOrDefault(Opt::StackTraceFile);
 
   compiler_executable_ = runtime_options.ReleaseOrDefault(Opt::Compiler);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 2e3b8d7..483d255 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -783,7 +783,13 @@
   ClassLinker* class_linker_;
 
   SignalCatcher* signal_catcher_;
-  std::string stack_trace_dir_;
+
+  // If true, the runtime will connect to tombstoned via a socket to
+  // request an open file descriptor to write its traces to.
+  bool use_tombstoned_traces_;
+
+  // Location to which traces must be written on SIGQUIT. Only used if
+  // tombstoned_traces_ == false.
   std::string stack_trace_file_;
 
   std::unique_ptr<JavaVMExt> java_vm_;
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 77132a8..cfc681f 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -100,7 +100,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                ForceNativeBridge)
 RUNTIME_OPTIONS_KEY (LogVerbosity,        Verbose)
 RUNTIME_OPTIONS_KEY (unsigned int,        LockProfThreshold)
-RUNTIME_OPTIONS_KEY (std::string,         StackTraceDir)
+RUNTIME_OPTIONS_KEY (bool,                UseTombstonedTraces, false)
 RUNTIME_OPTIONS_KEY (std::string,         StackTraceFile)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTrace)
 RUNTIME_OPTIONS_KEY (std::string,         MethodTraceFile,                "/data/misc/trace/method-trace-file.bin")
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index faea7b3..e3dfc74 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -42,6 +42,10 @@
 #include "thread_list.h"
 #include "utils.h"
 
+#if defined(ART_TARGET_ANDROID)
+#include "tombstoned/tombstoned.h"
+#endif
+
 namespace art {
 
 static void DumpCmdLine(std::ostream& os) {
@@ -66,13 +70,19 @@
 #endif
 }
 
-SignalCatcher::SignalCatcher(const std::string& stack_trace_dir,
-                             const std::string& stack_trace_file)
-    : stack_trace_dir_(stack_trace_dir),
-      stack_trace_file_(stack_trace_file),
+SignalCatcher::SignalCatcher(const std::string& stack_trace_file,
+                             bool use_tombstoned_stack_trace_fd)
+    : stack_trace_file_(stack_trace_file),
+      use_tombstoned_stack_trace_fd_(use_tombstoned_stack_trace_fd),
       lock_("SignalCatcher lock"),
       cond_("SignalCatcher::cond_", lock_),
       thread_(nullptr) {
+#if !defined(ART_TARGET_ANDROID)
+  // We're not running on Android, so we can't communicate with tombstoned
+  // to ask for an open file.
+  CHECK(!use_tombstoned_stack_trace_fd_);
+#endif
+
   SetHaltFlag(false);
 
   // Create a raw pthread; its start routine will attach to the runtime.
@@ -103,62 +113,65 @@
   return halt_;
 }
 
-std::string SignalCatcher::GetStackTraceFileName() {
-  if (!stack_trace_dir_.empty()) {
-    // We'll try a maximum of ten times (arbitrarily selected) to create a file
-    // with a unique name, seeding the pseudo random generator each time.
-    //
-    // If this doesn't work, give up and log to stdout. Note that we could try
-    // indefinitely, but that would make problems in this code harder to detect
-    // since we'd be spinning in the signal catcher thread.
-    static constexpr uint32_t kMaxRetries = 10;
-
-    for (uint32_t i = 0; i < kMaxRetries; ++i) {
-        std::srand(NanoTime());
-        // Sample output for PID 1234 : /data/anr/anr-pid1234-cafeffee.txt
-        const std::string file_name = android::base::StringPrintf(
-            "%s/anr-pid%" PRId32 "-%08" PRIx32 ".txt",
-            stack_trace_dir_.c_str(),
-            static_cast<int32_t>(getpid()),
-            static_cast<uint32_t>(std::rand()));
-
-        if (!OS::FileExists(file_name.c_str())) {
-          return file_name;
-        }
-    }
-
-    LOG(ERROR) << "Unable to obtain stack trace filename at path : " << stack_trace_dir_;
-    return "";
+bool SignalCatcher::OpenStackTraceFile(android::base::unique_fd* tombstone_fd,
+                                       android::base::unique_fd* output_fd) {
+  if (use_tombstoned_stack_trace_fd_) {
+#if defined(ART_TARGET_ANDROID)
+    return tombstoned_connect(getpid(), tombstone_fd, output_fd, false /* is_native_crash */);
+#else
+    UNUSED(tombstone_fd);
+    UNUSED(output_fd);
+#endif
   }
 
-  return stack_trace_file_;
+  // The runtime is not configured to dump traces to a file, will LOG(INFO)
+  // instead.
+  if (stack_trace_file_.empty()) {
+    return false;
+  }
+
+  int fd = open(stack_trace_file_.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
+  if (fd == -1) {
+      PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'";
+      return false;
+  }
+
+  output_fd->reset(fd);
+  return true;
 }
 
 void SignalCatcher::Output(const std::string& s) {
-  const std::string output_file = GetStackTraceFileName();
-  if (output_file.empty()) {
+  android::base::unique_fd tombstone_fd;
+  android::base::unique_fd output_fd;
+  if (!OpenStackTraceFile(&tombstone_fd, &output_fd)) {
     LOG(INFO) << s;
     return;
   }
 
   ScopedThreadStateChange tsc(Thread::Current(), kWaitingForSignalCatcherOutput);
-  int fd = open(output_file.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
-  if (fd == -1) {
-    PLOG(ERROR) << "Unable to open stack trace file '" << output_file << "'";
-    return;
-  }
-  std::unique_ptr<File> file(new File(fd, output_file, true));
+
+  std::unique_ptr<File> file(new File(output_fd.release(), true /* check_usage */));
   bool success = file->WriteFully(s.data(), s.size());
   if (success) {
     success = file->FlushCloseOrErase() == 0;
   } else {
     file->Erase();
   }
+
+  const std::string output_path_msg = (use_tombstoned_stack_trace_fd_) ?
+      "[tombstoned]" : stack_trace_file_;
+
   if (success) {
-    LOG(INFO) << "Wrote stack traces to '" << output_file << "'";
+    LOG(INFO) << "Wrote stack traces to '" << output_path_msg << "'";
   } else {
-    PLOG(ERROR) << "Failed to write stack traces to '" << output_file << "'";
+    PLOG(ERROR) << "Failed to write stack traces to '" << output_path_msg << "'";
   }
+
+#if defined(ART_TARGET_ANDROID)
+  if (!tombstoned_notify_completion(tombstone_fd)) {
+    LOG(WARNING) << "Unable to notify tombstoned of dump completion.";
+  }
+#endif
 }
 
 void SignalCatcher::HandleSigQuit() {
diff --git a/runtime/signal_catcher.h b/runtime/signal_catcher.h
index 4cd7a98..8a2a728 100644
--- a/runtime/signal_catcher.h
+++ b/runtime/signal_catcher.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_SIGNAL_CATCHER_H_
 #define ART_RUNTIME_SIGNAL_CATCHER_H_
 
+#include "android-base/unique_fd.h"
 #include "base/mutex.h"
 
 namespace art {
@@ -32,15 +33,17 @@
  */
 class SignalCatcher {
  public:
-  // If |stack_trace_dir| is non empty, traces will be written to a
-  // unique file under that directory.
+  // If |use_tombstoned_stack_trace_fd| is |true|, traces will be
+  // written to a file descriptor provided by tombstoned. The process
+  // will communicate with tombstoned via a unix domain socket. This
+  // mode of stack trace dumping is only supported in an Android
+  // environment.
   //
-  // If |stack_trace_dir| is empty, and |stack_frace_file| is non-empty,
-  // traces will be appended to |stack_trace_file|.
-  //
-  // If both are empty, all traces will be written to the log buffer.
-  explicit SignalCatcher(const std::string& stack_trace_dir,
-                         const std::string& stack_trace_file);
+  // If false, all traces will be dumped to |stack_trace_file| if it's
+  // non-empty. If |stack_trace_file| is empty, all traces will be written
+  // to the log buffer.
+  SignalCatcher(const std::string& stack_trace_file,
+                const bool use_tombstoned_stack_trace_fd);
   ~SignalCatcher();
 
   void HandleSigQuit() REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
@@ -51,15 +54,18 @@
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
   static void* Run(void* arg) NO_THREAD_SAFETY_ANALYSIS;
 
-  std::string GetStackTraceFileName();
+  // NOTE: We're using android::base::unique_fd here for easier
+  // interoperability with tombstoned client APIs.
+  bool OpenStackTraceFile(android::base::unique_fd* tombstone_fd,
+                          android::base::unique_fd* output_fd);
   void HandleSigUsr1();
   void Output(const std::string& s);
   void SetHaltFlag(bool new_value) REQUIRES(!lock_);
   bool ShouldHalt() REQUIRES(!lock_);
   int WaitForSignal(Thread* self, SignalSet& signals) REQUIRES(!lock_);
 
-  std::string stack_trace_dir_;
   std::string stack_trace_file_;
+  const bool use_tombstoned_stack_trace_fd_;
 
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ConditionVariable cond_ GUARDED_BY(lock_);
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 520e7c3..3a2145bf 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -351,6 +351,35 @@
     }
   }
 
+  /// CHECK-START: void Main.typeConv(byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet [{{l\d+}},<<Phi>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<One>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Add>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.typeConv(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                         loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<One>>]          loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi                                   loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>]           loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Vadd:d\d+>> VecAdd [<<Load>>,<<Repl>>]            loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi1>>,<<Vadd>>] loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi                                   loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet [{{l\d+}},<<Phi2>>]          loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<One>>]                 loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Add>>]              loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi2>>,<<Cnv>>]  loop:<<Loop2>>      outer_loop:none
+  //
+  // Scalar code in cleanup loop uses correct byte type on array get and type conversion.
+  private static void typeConv(byte[] a, byte[] b) {
+    int len = Math.min(a.length, b.length);
+    for (int i = 0; i < len; i++) {
+      a[i] = (byte) (b[i] + 1);
+    }
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -453,6 +482,17 @@
       expectEquals(40, bt[i]);
     }
 
+    byte[] b1 = new byte[259];  // few extra iterations
+    byte[] b2 = new byte[259];
+    for (int i = 0; i < 259; i++) {
+      b1[i] = 0;
+      b2[i] = (byte) i;
+    }
+    typeConv(b1, b2);
+    for (int i = 0; i < 259; i++) {
+      expectEquals((byte)(i + 1), b1[i]);
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java
index 10b20b8..21d71e8 100644
--- a/test/640-checker-byte-simd/src/Main.java
+++ b/test/640-checker-byte-simd/src/Main.java
@@ -135,8 +135,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -147,9 +149,9 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  // TODO: would need signess flip.
+  /// CHECK-START: void Main.shr2() loop_optimization (after)
+  /// CHECK-NOT: VecUShr
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-char-simd/src/Main.java b/test/640-checker-char-simd/src/Main.java
index 0628b36..89d4b6b 100644
--- a/test/640-checker-char-simd/src/Main.java
+++ b/test/640-checker-char-simd/src/Main.java
@@ -134,9 +134,9 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  // TODO: would need signess flip.
+  /// CHECK-START: void Main.sar2() loop_optimization (after)
+  /// CHECK-NOT: VecShr
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -148,8 +148,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-double-simd/src/Main.java b/test/640-checker-double-simd/src/Main.java
index 0d4f87a..5709b5d 100644
--- a/test/640-checker-double-simd/src/Main.java
+++ b/test/640-checker-double-simd/src/Main.java
@@ -122,8 +122,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.conv(long[]) loop_optimization (after)
+  /// CHECK-NOT: VecLoad
+  /// CHECK-NOT: VecStore
   //
-  // TODO: fill in when supported
+  // TODO: fill in when long2double is supported
   static void conv(long[] b) {
     for (int i = 0; i < 128; i++)
       a[i] = b[i];
diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java
index 97048eb..9ee553c 100644
--- a/test/640-checker-int-simd/src/Main.java
+++ b/test/640-checker-int-simd/src/Main.java
@@ -136,8 +136,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -149,8 +151,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java
index e42c716..8f6af9d 100644
--- a/test/640-checker-long-simd/src/Main.java
+++ b/test/640-checker-long-simd/src/Main.java
@@ -134,8 +134,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -147,8 +149,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-short-simd/src/Main.java b/test/640-checker-short-simd/src/Main.java
index 241f8e6..f62c726 100644
--- a/test/640-checker-short-simd/src/Main.java
+++ b/test/640-checker-short-simd/src/Main.java
@@ -135,8 +135,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -147,9 +149,9 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  // TODO: would need signess flip.
+  /// CHECK-START: void Main.shr2() loop_optimization (after)
+  /// CHECK-NOT: VecUShr
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/645-checker-abs-simd/src/Main.java b/test/645-checker-abs-simd/src/Main.java
index 76850ab..5a63d9f 100644
--- a/test/645-checker-abs-simd/src/Main.java
+++ b/test/645-checker-abs-simd/src/Main.java
@@ -22,6 +22,67 @@
   private static final int SPQUIET = 1 << 22;
   private static final long DPQUIET = 1L << 51;
 
+  /// CHECK-START: void Main.doitByte(byte[]) loop_optimization (before)
+  /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitByte(byte[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  private static void doitByte(byte[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (byte) Math.abs(x[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitChar(char[]) loop_optimization (before)
+  /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.doitChar(char[]) loop_optimization (after)
+  /// CHECK-NOT: VecAbs
+  private static void doitChar(char[] x) {
+    // Basically a nop due to zero extension.
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (char) Math.abs(x[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitShort(short[]) loop_optimization (before)
+  /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitShort(short[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  private static void doitShort(short[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (short) Math.abs(x[i]);
+    }
+  }
+
   /// CHECK-START: void Main.doitInt(int[]) loop_optimization (before)
   /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
@@ -52,8 +113,16 @@
   /// CHECK-DAG: ArraySet                                   loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitLong(long[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                        loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                        loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                   loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsLong loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                   loop:<<Loop2>>      outer_loop:none
   //
-  // TODO: Not supported yet.
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitLong(long[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -90,8 +159,16 @@
   /// CHECK-DAG: ArraySet                                     loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitDouble(double[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                          loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                      loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                       loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                          loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                     loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsDouble loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                     loop:<<Loop2>>      outer_loop:none
   //
-  // TODO: Not supported yet.
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitDouble(double[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -99,6 +176,31 @@
   }
 
   public static void main(String[] args) {
+    // Bytes, chars, shorts.
+    byte[] xb = new byte[256];
+    for (int i = 0; i < 256; i++) {
+      xb[i] = (byte) i;
+    }
+    doitByte(xb);
+    for (int i = 0; i < 256; i++) {
+      expectEquals32((byte) Math.abs((byte) i), xb[i]);
+    }
+    char[] xc = new char[1024 * 64];
+    for (int i = 0; i < 1024 * 64; i++) {
+      xc[i] = (char) i;
+    }
+    doitChar(xc);
+    for (int i = 0; i < 1024 *64; i++) {
+      expectEquals32((char) Math.abs((char) i), xc[i]);
+    }
+    short[] xs = new short[1024 * 64];
+    for (int i = 0; i < 1024 * 64; i++) {
+      xs[i] = (short) i;
+    }
+    doitShort(xs);
+    for (int i = 0; i < 1024 * 64; i++) {
+      expectEquals32((short) Math.abs((short) i), xs[i]);
+    }
     // Set up minint32, maxint32 and some others.
     int[] xi = new int[8];
     xi[0] = 0x80000000;
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
index 8211ace..fe45807 100644
--- a/test/651-checker-byte-simd-minmax/src/Main.java
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -27,9 +27,12 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-NOT: VecMin
+  /// CHECK-START-ARM64: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -37,6 +40,30 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMinUnsigned(byte[] x, byte[] y, byte[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (byte) Math.min(y[i] & 0xff, z[i] & 0xff);
+    }
+  }
+
   /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (before)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
@@ -45,9 +72,12 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-NOT: VecMax
+  /// CHECK-START-ARM64: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -55,6 +85,30 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMaxUnsigned(byte[] x, byte[] y, byte[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (byte) Math.max(y[i] & 0xff, z[i] & 0xff);
+    }
+  }
+
   public static void main(String[] args) {
     // Initialize cross-values for all possible values.
     int total = 256 * 256;
@@ -77,11 +131,21 @@
       byte expected = (byte) Math.min(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMinUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      byte expected = (byte) Math.min(y[i] & 0xff, z[i] & 0xff);
+      expectEquals(expected, x[i]);
+    }
     doitMax(x, y, z);
     for (int i = 0; i < total; i++) {
       byte expected = (byte) Math.max(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMaxUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      byte expected = (byte) Math.max(y[i] & 0xff, z[i] & 0xff);
+      expectEquals(expected, x[i]);
+    }
 
     System.out.println("passed");
   }
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
index 5ce7b94..e2998da 100644
--- a/test/651-checker-char-simd-minmax/src/Main.java
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -27,9 +27,12 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-NOT: VecMin
+  /// CHECK-START-ARM64: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(char[] x, char[] y, char[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -45,9 +48,12 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-NOT: VecMax
+  /// CHECK-START-ARM64: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(char[] x, char[] y, char[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
diff --git a/test/651-checker-double-simd-minmax/src/Main.java b/test/651-checker-double-simd-minmax/src/Main.java
index e1711ae..cf04f85 100644
--- a/test/651-checker-double-simd-minmax/src/Main.java
+++ b/test/651-checker-double-simd-minmax/src/Main.java
@@ -48,7 +48,7 @@
   /// CHECK-DAG: <<Max:d\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxDoubleDouble loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO-x86: 0.0 vs -0.0?
+  // TODO x86: 0.0 vs -0.0?
   //
   /// CHECK-START-ARM64: void Main.doitMax(double[], double[], double[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java
index 4e05a9d..6cee7b5 100644
--- a/test/651-checker-int-simd-minmax/src/Main.java
+++ b/test/651-checker-int-simd-minmax/src/Main.java
@@ -30,7 +30,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(int[] x, int[] y, int[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -50,7 +50,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(int[] x, int[] y, int[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
index f34f526..09485a2 100644
--- a/test/651-checker-short-simd-minmax/src/Main.java
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -27,9 +27,12 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-NOT: VecMin
+  /// CHECK-START-ARM64: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -37,6 +40,30 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMinUnsigned(short[] x, short[] y, short[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (short) Math.min(y[i] & 0xffff, z[i] & 0xffff);
+    }
+  }
+
   /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (before)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
@@ -45,9 +72,12 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-NOT: VecMax
+  /// CHECK-START-ARM64: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -55,6 +85,30 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMaxUnsigned(short[] x, short[] y, short[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff);
+    }
+  }
+
   public static void main(String[] args) {
     short[] interesting = {
       (short) 0x0000, (short) 0x0001, (short) 0x007f,
@@ -91,11 +145,21 @@
       short expected = (short) Math.min(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMinUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      short expected = (short) Math.min(y[i] & 0xffff, z[i] & 0xffff);
+      expectEquals(expected, x[i]);
+    }
     doitMax(x, y, z);
     for (int i = 0; i < total; i++) {
       short expected = (short) Math.max(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMaxUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      short expected = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff);
+      expectEquals(expected, x[i]);
+    }
 
     System.out.println("passed");
   }
diff --git a/test/987-stack-trace-dumping/expected.txt b/test/987-stack-trace-dumping/expected.txt
deleted file mode 100644
index e69de29..0000000
--- a/test/987-stack-trace-dumping/expected.txt
+++ /dev/null
diff --git a/test/987-stack-trace-dumping/info.txt b/test/987-stack-trace-dumping/info.txt
deleted file mode 100644
index e69de29..0000000
--- a/test/987-stack-trace-dumping/info.txt
+++ /dev/null
diff --git a/test/987-stack-trace-dumping/run b/test/987-stack-trace-dumping/run
deleted file mode 100755
index dee3e8b..0000000
--- a/test/987-stack-trace-dumping/run
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2017 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Ask for stack traces to be dumped to a file rather than to stdout.
-./default-run "$@" --set-stack-trace-dump-dir
diff --git a/test/987-stack-trace-dumping/src/Main.java b/test/987-stack-trace-dumping/src/Main.java
deleted file mode 100644
index d1e8a1b..0000000
--- a/test/987-stack-trace-dumping/src/Main.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-
-public class Main {
-    public static void main(String[] args) throws Exception {
-        if (args.length != 3) {
-            throw new AssertionError("Unexpected number of args: " + args.length);
-        }
-
-        if (!"--stack-trace-dir".equals(args[1])) {
-            throw new AssertionError("Unexpected argument in position 1: " + args[1]);
-        }
-
-        // Send ourselves signal 3, which forces stack traces to be written to disk.
-        android.system.Os.kill(android.system.Os.getpid(), 3);
-
-        File[] files = null;
-        final String stackTraceDir = args[2];
-        for (int i = 0; i < 5; ++i) {
-            // Give the signal handler some time to run and dump traces - up to a maximum
-            // of 5 seconds. This is a kludge, but it's hard to do this without using things
-            // like inotify / WatchService and the like.
-            Thread.sleep(1000);
-
-            files = (new File(stackTraceDir)).listFiles();
-            if (files != null && files.length == 1) {
-                break;
-            }
-        }
-
-
-        if (files == null) {
-            throw new AssertionError("Gave up waiting for traces: " + java.util.Arrays.toString(files));
-        }
-
-        final String fileName = files[0].getName();
-        if (!fileName.startsWith("anr-pid")) {
-            throw new AssertionError("Unexpected prefix: " + fileName);
-        }
-
-        if (!fileName.contains(String.valueOf(android.system.Os.getpid()))) {
-            throw new AssertionError("File name does not contain process PID: " + fileName);
-        }
-    }
-}
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 4b44df7..96c2967 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -651,8 +651,7 @@
             "969-iface-super",
             "981-dedup-original-dex",
             "984-obsolete-invoke",
-            "985-re-obsolete",
-            "987-stack-trace-dumping"
+            "985-re-obsolete"
         ],
         "description": "The tests above fail with --build-with-javac-dx.",
         "env_vars": {"ANDROID_COMPILE_WITH_JACK": "false"},