interpreter: Add experimental lambda opcodes for invoke/create-lambda

These opcodes are not yet fully specified, and *will* change before they become shippable.
Do not write production code against experimental opcodes.

--

Implement partial interpreter support for new dex instructions invoke/create-lambda, and a
new opcode format 25x.

* Does not verify, in fact verification will soft fail when we see those opcodes.
* Compilers will punt to interpreter since they don't support new opcodes.
* As there is no way to capture/liberate variables yet, the "closure" is just
  an ArtMethod for the time being.

All new opcodes are disabled by default, use runtime option -Xexperimental-lambdas to enable them.

For example:
  dalvikvm ... -Xexperimental-lambdas ...
  dex2oat --runtime-arg -Xexperimental-lambdas ...

Change-Id: I6c996ca32a9b54ec45ec21d7a959b84dfb8a24eb
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 6192be7..98fd327 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -101,6 +101,19 @@
     return ::testing::AssertionFailure() << "key was not in the map";
   }
 
+  template <typename TMap, typename TKey, typename T>
+  ::testing::AssertionResult IsExpectedDefaultKeyValue(const T& expected,
+                                                       const TMap& map,
+                                                       const TKey& key) {
+    const T& actual = map.GetOrDefault(key);
+    if (!UsuallyEquals(expected, actual)) {
+      return ::testing::AssertionFailure()
+          << "expected " << detail::ToStringAny(expected) << " but got "
+          << detail::ToStringAny(actual);
+     }
+    return ::testing::AssertionSuccess();
+  }
+
 class CmdlineParserTest : public ::testing::Test {
  public:
   CmdlineParserTest() = default;
@@ -145,13 +158,23 @@
 
 #define EXPECT_KEY_EXISTS(map, key) EXPECT_TRUE((map).Exists(key))
 #define EXPECT_KEY_VALUE(map, key, expected) EXPECT_TRUE(IsExpectedKeyValue(expected, map, key))
+#define EXPECT_DEFAULT_KEY_VALUE(map, key, expected) EXPECT_TRUE(IsExpectedDefaultKeyValue(expected, map, key))
 
-#define EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv)               \
+#define _EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv)              \
   do {                                                        \
     EXPECT_TRUE(IsResultSuccessful(parser_->Parse(argv)));    \
     EXPECT_EQ(0u, parser_->GetArgumentsMap().Size());         \
+
+#define EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv)               \
+  _EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv);                   \
   } while (false)
 
+#define EXPECT_SINGLE_PARSE_DEFAULT_VALUE(expected, argv, key)\
+  _EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv);                   \
+    RuntimeArgumentMap args = parser_->ReleaseArgumentsMap(); \
+    EXPECT_DEFAULT_KEY_VALUE(args, key, expected);            \
+  } while (false)                                             // NOLINT [readability/namespace] [5]
+
 #define _EXPECT_SINGLE_PARSE_EXISTS(argv, key)                \
   do {                                                        \
     EXPECT_TRUE(IsResultSuccessful(parser_->Parse(argv)));    \
@@ -509,6 +532,24 @@
   }
 }  // TEST_F
 
+/* -X[no]experimental-lambdas */
+TEST_F(CmdlineParserTest, TestExperimentalLambdas) {
+  // Off by default
+  EXPECT_SINGLE_PARSE_DEFAULT_VALUE(false,
+                                    "",
+                                    M::ExperimentalLambdas);
+
+  // Disabled explicitly
+  EXPECT_SINGLE_PARSE_VALUE(false,
+                            "-Xnoexperimental-lambdas",
+                            M::ExperimentalLambdas);
+
+  // Enabled explicitly
+  EXPECT_SINGLE_PARSE_VALUE(true,
+                            "-Xexperimental-lambdas",
+                            M::ExperimentalLambdas);
+}
+
 TEST_F(CmdlineParserTest, TestIgnoreUnrecognized) {
   RuntimeParser::Builder parserBuilder;
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 9fa5148..cc1ba35 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -173,7 +173,7 @@
   decoded_instruction->vB = inst->HasVRegB() ? inst->VRegB() : 0;
   decoded_instruction->vB_wide = inst->HasWideVRegB() ? inst->WideVRegB() : 0;
   decoded_instruction->vC = inst->HasVRegC() ?  inst->VRegC() : 0;
-  if (inst->HasVarArgs()) {
+  if (inst->HasVarArgs35c()) {
     inst->GetVarArgs(decoded_instruction->arg);
   }
   return inst->SizeInCodeUnits();
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 58236e2..97703a5 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -377,10 +377,10 @@
     Instruction::IGET_BYTE_QUICK,
     Instruction::IGET_CHAR_QUICK,
     Instruction::IGET_SHORT_QUICK,
-    Instruction::UNUSED_F3,
+    Instruction::INVOKE_LAMBDA,
     Instruction::UNUSED_F4,
     Instruction::UNUSED_F5,
-    Instruction::UNUSED_F6,
+    Instruction::CREATE_LAMBDA,
     Instruction::UNUSED_F7,
     Instruction::UNUSED_F8,
     Instruction::UNUSED_F9,
@@ -421,7 +421,13 @@
     Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
 };
 
-// Unsupported opcodes. null can be used when everything is supported. Size of the lists is
+// TODO: Add support for lambda opcodes to the quick compiler.
+static const int kUnsupportedLambdaOpcodes[] = {
+    Instruction::INVOKE_LAMBDA,
+    Instruction::CREATE_LAMBDA,
+};
+
+// Unsupported opcodes. Null can be used when everything is supported. Size of the lists is
 // recorded below.
 static const int* kUnsupportedOpcodes[] = {
     // 0 = kNone.
@@ -429,17 +435,17 @@
     // 1 = kArm, unused (will use kThumb2).
     kAllOpcodes,
     // 2 = kArm64.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 3 = kThumb2.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 4 = kX86.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 5 = kX86_64.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 6 = kMips.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 7 = kMips64.
-    nullptr
+    kUnsupportedLambdaOpcodes,
 };
 static_assert(sizeof(kUnsupportedOpcodes) == 8 * sizeof(int*), "kUnsupportedOpcodes unexpected");
 
@@ -450,21 +456,26 @@
     // 1 = kArm, unused (will use kThumb2).
     arraysize(kAllOpcodes),
     // 2 = kArm64.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 3 = kThumb2.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 4 = kX86.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 5 = kX86_64.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 6 = kMips.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 7 = kMips64.
-    0
+    arraysize(kUnsupportedLambdaOpcodes),
 };
 static_assert(sizeof(kUnsupportedOpcodesSize) == 8 * sizeof(size_t),
               "kUnsupportedOpcodesSize unexpected");
 
+static bool IsUnsupportedExperimentalLambdasOnly(size_t i) {
+  DCHECK_LE(i, arraysize(kUnsupportedOpcodes));
+  return kUnsupportedOpcodes[i] == kUnsupportedLambdaOpcodes;
+}
+
 // The maximum amount of Dalvik register in a method for which we will start compiling. Tries to
 // avoid an abort when we need to manage more SSA registers than we can.
 static constexpr size_t kMaxAllowedDalvikRegisters = INT16_MAX / 2;
@@ -487,6 +498,30 @@
   return true;
 }
 
+// If the ISA has unsupported opcodes, should we skip scanning over them?
+//
+// Most of the time we're compiling non-experimental files, so scanning just slows
+// performance down by as much as 6% with 4 threads.
+// In the rare cases we compile experimental opcodes, the runtime has an option to enable it,
+// which will force scanning for any unsupported opcodes.
+static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) {
+  if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
+    // All opcodes are supported no matter what. Usually not the case
+    // since experimental opcodes are not implemented in the quick compiler.
+    return true;
+  } else if (LIKELY(!Runtime::Current()->AreExperimentalLambdasEnabled())) {
+    // Experimental opcodes are disabled.
+    //
+    // If all unsupported opcodes are experimental we don't need to do scanning.
+    return IsUnsupportedExperimentalLambdasOnly(instruction_set);
+  } else {
+    // Experimental opcodes are enabled.
+    //
+    // Do the opcode scanning if the ISA has any unsupported opcodes.
+    return false;
+  }
+}
+
 // Skip the method that we do not support currently.
 bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
                                      CompilationUnit* cu) const {
@@ -498,7 +533,7 @@
 
   // Check whether we do have limitations at all.
   if (kSupportedTypes[cu->instruction_set] == nullptr &&
-      kUnsupportedOpcodesSize[cu->instruction_set] == 0U) {
+      SkipScanningUnsupportedOpcodes(cu->instruction_set)) {
     return true;
   }
 
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index dd65f2c..7344d13 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -223,6 +223,7 @@
     case k22t: return true;
     case k22x: return true;
     case k23x: return true;
+    case k25x: return true;
     case k31c: return true;
     case k31i: return true;
     case k31t: return true;
@@ -252,6 +253,7 @@
     case k22t: return VRegB_22t();
     case k22x: return VRegB_22x();
     case k23x: return VRegB_23x();
+    case k25x: return VRegB_25x();
     case k31c: return VRegB_31c();
     case k31i: return VRegB_31i();
     case k31t: return VRegB_31t();
@@ -329,6 +331,12 @@
   return static_cast<uint8_t>(Fetch16(1) & 0xff);
 }
 
+// Number of additional registers in this instruction. # of var arg registers = this value + 1.
+inline uint4_t Instruction::VRegB_25x() const {
+  DCHECK_EQ(FormatOf(Opcode()), k25x);
+  return InstB(Fetch16(0));
+}
+
 inline uint32_t Instruction::VRegB_31c() const {
   DCHECK_EQ(FormatOf(Opcode()), k31c);
   return Fetch32(1);
@@ -375,6 +383,7 @@
     case k22s: return true;
     case k22t: return true;
     case k23x: return true;
+    case k25x: return true;
     case k35c: return true;
     case k3rc: return true;
     default: return false;
@@ -388,6 +397,7 @@
     case k22s: return VRegC_22s();
     case k22t: return VRegC_22t();
     case k23x: return VRegC_23x();
+    case k25x: return VRegC_25x();
     case k35c: return VRegC_35c();
     case k3rc: return VRegC_3rc();
     default:
@@ -421,6 +431,11 @@
   return static_cast<uint8_t>(Fetch16(1) >> 8);
 }
 
+inline uint4_t Instruction::VRegC_25x() const {
+  DCHECK_EQ(FormatOf(Opcode()), k25x);
+  return static_cast<uint4_t>(Fetch16(1) & 0xf);
+}
+
 inline uint4_t Instruction::VRegC_35c() const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
   return static_cast<uint4_t>(Fetch16(2) & 0x0f);
@@ -431,11 +446,78 @@
   return Fetch16(2);
 }
 
-inline bool Instruction::HasVarArgs() const {
+inline bool Instruction::HasVarArgs35c() const {
   return FormatOf(Opcode()) == k35c;
 }
 
-inline void Instruction::GetVarArgs(uint32_t arg[5], uint16_t inst_data) const {
+inline bool Instruction::HasVarArgs25x() const {
+  return FormatOf(Opcode()) == k25x;
+}
+
+// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+1.
+inline void Instruction::GetAllArgs25x(uint32_t arg[kMaxVarArgRegs]) const {
+  DCHECK_EQ(FormatOf(Opcode()), k25x);
+
+  /*
+   * The opcode looks like this:
+   *   op vC, {vD, vE, vF, vG}
+   *
+   *  and vB is the (implicit) register count (0-4) which denotes how far from vD to vG to read.
+   *
+   *  vC is always present, so with "op vC, {}" the register count will be 0 even though vC
+   *  is valid.
+   *
+   *  The exact semantic meanings of vC:vG is up to the instruction using the format.
+   *
+   *  Encoding drawing as a bit stream:
+   *  (Note that each uint16 is little endian, and each register takes up 4 bits)
+   *
+   *       uint16  |||   uint16
+   *   7-0     15-8    7-0   15-8
+   *  |------|-----|||-----|-----|
+   *  |opcode|vB|vG|||vD|vC|vF|vE|
+   *  |------|-----|||-----|-----|
+   */
+  uint16_t reg_list = Fetch16(1);
+  uint4_t count = VRegB_25x();
+  DCHECK_LE(count, 4U) << "Invalid arg count in 25x (" << count << ")";
+
+  /*
+   * TODO(iam): Change instruction encoding to one of:
+   *
+   * - (X) vA = args count, vB = closure register, {vC..vG} = args (25x)
+   * - (Y) vA = args count, vB = method index, {vC..vG} = args (35x)
+   *
+   * (do this in conjunction with adding verifier support for invoke-lambda)
+   */
+
+  /*
+   * Copy the argument registers into the arg[] array, and
+   * also copy the first argument into vC. (The
+   * DecodedInstruction structure doesn't have separate
+   * fields for {vD, vE, vF, vG}, so there's no need to make
+   * copies of those.) Note that all cases fall-through.
+   */
+  switch (count) {
+    case 4:
+      arg[4] = (Fetch16(0) >> 8) & 0x0f;  // vG
+      FALLTHROUGH_INTENDED;
+    case 3:
+      arg[3] = (reg_list >> 12) & 0x0f;  // vF
+      FALLTHROUGH_INTENDED;
+    case 2:
+      arg[2] = (reg_list >> 8) & 0x0f;  // vE
+      FALLTHROUGH_INTENDED;
+    case 1:
+      arg[1] = (reg_list >> 4) & 0x0f;  // vD
+      FALLTHROUGH_INTENDED;
+    default:  // case 0
+      arg[0] = VRegC_25x();  // vC
+      break;
+  }
+}
+
+inline void Instruction::GetVarArgs(uint32_t arg[kMaxVarArgRegs], uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
 
   /*
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 69fe874..537fa15 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -63,7 +63,7 @@
 #define INSTRUCTION_SIZE(opcode, c, p, format, r, i, a, v) \
     ((opcode == NOP)                        ? -1 : \
      ((format >= k10x) && (format <= k10t)) ?  1 : \
-     ((format >= k20t) && (format <= k22c)) ?  2 : \
+     ((format >= k20t) && (format <= k25x)) ?  2 : \
      ((format >= k32x) && (format <= k3rc)) ?  3 : \
       (format == k51l)                      ?  5 : -1),
 #include "dex_instruction_list.h"
@@ -224,6 +224,14 @@
             break;
           }
           FALLTHROUGH_INTENDED;
+        case CREATE_LAMBDA:
+          if (file != nullptr) {
+            uint32_t method_idx = VRegB_21c();
+            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << PrettyMethod(method_idx, *file, true)
+               << " // method@" << method_idx;
+            break;
+          }
+          FALLTHROUGH_INTENDED;
         default:
           os << StringPrintf("%s v%d, thing@%d", opcode, VRegA_21c(), VRegB_21c());
           break;
@@ -304,6 +312,26 @@
       }
       break;
     }
+    case k25x: {
+      if (Opcode() == INVOKE_LAMBDA) {
+        uint32_t arg[kMaxVarArgRegs];
+        GetAllArgs25x(arg);
+        const size_t num_extra_var_args = VRegB_25x();
+        DCHECK_LE(num_extra_var_args + 1, kMaxVarArgRegs);
+
+        // invoke-lambda vC, {vD, vE, vF, vG}
+        os << opcode << " v" << arg[0] << ", {";
+        for (size_t i = 0; i < num_extra_var_args; ++i) {
+          if (i != 0) {
+            os << ", ";
+          }
+          os << "v" << arg[i+1];
+        }
+        os << "}";
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    }
     case k32x:  os << StringPrintf("%s v%d, v%d", opcode, VRegA_32x(), VRegB_32x()); break;
     case k30t:  os << StringPrintf("%s %+d", opcode, VRegA_30t()); break;
     case k31t:  os << StringPrintf("%s v%d, %+d", opcode, VRegA_31t(), VRegB_31t()); break;
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index c64c21e..b043aba 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -105,6 +105,7 @@
     k22t,  // op vA, vB, +CCCC
     k22s,  // op vA, vB, #+CCCC
     k22c,  // op vA, vB, thing@CCCC
+    k25x,  // op vC, {vD, vE, vF, vG} (B: count)
     k32x,  // op vAAAA, vBBBB
     k30t,  // op +AAAAAAAA
     k31t,  // op vAA, +BBBBBBBB
@@ -116,30 +117,31 @@
   };
 
   enum Flags {
-    kBranch              = 0x000001,  // conditional or unconditional branch
-    kContinue            = 0x000002,  // flow can continue to next statement
-    kSwitch              = 0x000004,  // switch statement
-    kThrow               = 0x000008,  // could cause an exception to be thrown
-    kReturn              = 0x000010,  // returns, no additional statements
-    kInvoke              = 0x000020,  // a flavor of invoke
-    kUnconditional       = 0x000040,  // unconditional branch
-    kAdd                 = 0x000080,  // addition
-    kSubtract            = 0x000100,  // subtract
-    kMultiply            = 0x000200,  // multiply
-    kDivide              = 0x000400,  // division
-    kRemainder           = 0x000800,  // remainder
-    kAnd                 = 0x001000,  // and
-    kOr                  = 0x002000,  // or
-    kXor                 = 0x004000,  // xor
-    kShl                 = 0x008000,  // shl
-    kShr                 = 0x010000,  // shr
-    kUshr                = 0x020000,  // ushr
-    kCast                = 0x040000,  // cast
-    kStore               = 0x080000,  // store opcode
-    kLoad                = 0x100000,  // load opcode
-    kClobber             = 0x200000,  // clobbers memory in a big way (not just a write)
-    kRegCFieldOrConstant = 0x400000,  // is the third virtual register a field or literal constant (vC)
-    kRegBFieldOrConstant = 0x800000,  // is the second virtual register a field or literal constant (vB)
+    kBranch              = 0x0000001,  // conditional or unconditional branch
+    kContinue            = 0x0000002,  // flow can continue to next statement
+    kSwitch              = 0x0000004,  // switch statement
+    kThrow               = 0x0000008,  // could cause an exception to be thrown
+    kReturn              = 0x0000010,  // returns, no additional statements
+    kInvoke              = 0x0000020,  // a flavor of invoke
+    kUnconditional       = 0x0000040,  // unconditional branch
+    kAdd                 = 0x0000080,  // addition
+    kSubtract            = 0x0000100,  // subtract
+    kMultiply            = 0x0000200,  // multiply
+    kDivide              = 0x0000400,  // division
+    kRemainder           = 0x0000800,  // remainder
+    kAnd                 = 0x0001000,  // and
+    kOr                  = 0x0002000,  // or
+    kXor                 = 0x0004000,  // xor
+    kShl                 = 0x0008000,  // shl
+    kShr                 = 0x0010000,  // shr
+    kUshr                = 0x0020000,  // ushr
+    kCast                = 0x0040000,  // cast
+    kStore               = 0x0080000,  // store opcode
+    kLoad                = 0x0100000,  // load opcode
+    kClobber             = 0x0200000,  // clobbers memory in a big way (not just a write)
+    kRegCFieldOrConstant = 0x0400000,  // is the third virtual register a field or literal constant (vC)
+    kRegBFieldOrConstant = 0x0800000,  // is the second virtual register a field or literal constant (vB)
+    kExperimental        = 0x1000000,  // is an experimental opcode
   };
 
   enum VerifyFlag {
@@ -205,7 +207,7 @@
 
   // Returns a pointer to the instruction after this 2xx instruction in the stream.
   const Instruction* Next_2xx() const {
-    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k22c);
+    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k25x);
     return RelativeAt(2);
   }
 
@@ -355,6 +357,7 @@
   }
   uint16_t VRegB_22x() const;
   uint8_t VRegB_23x() const;
+  uint4_t VRegB_25x() const;
   uint32_t VRegB_31c() const;
   int32_t VRegB_31i() const;
   int32_t VRegB_31t() const;
@@ -381,15 +384,20 @@
   int16_t VRegC_22s() const;
   int16_t VRegC_22t() const;
   uint8_t VRegC_23x() const;
+  uint4_t VRegC_25x() const;
   uint4_t VRegC_35c() const;
   uint16_t VRegC_3rc() const;
 
   // Fills the given array with the 'arg' array of the instruction.
-  bool HasVarArgs() const;
+  bool HasVarArgs35c() const;
+  bool HasVarArgs25x() const;
+
+  // TODO(iam): Make this name more consistent with GetAllArgs25x by including the opcode format.
   void GetVarArgs(uint32_t args[kMaxVarArgRegs], uint16_t inst_data) const;
   void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const {
     return GetVarArgs(args, Fetch16(0));
   }
+  void GetAllArgs25x(uint32_t args[kMaxVarArgRegs]) const;
 
   // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
   // 16 bits of instruction.
@@ -489,6 +497,11 @@
     return (kInstructionFlags[Opcode()] & kInvoke) != 0;
   }
 
+  // Determine if this instruction is experimental.
+  bool IsExperimental() const {
+    return (kInstructionFlags[Opcode()] & kExperimental) != 0;
+  }
+
   int GetVerifyTypeArgumentA() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegA | kVerifyRegAWide));
   }
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index f8f85f9..41c2417 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -261,10 +261,11 @@
   V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
   V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
   V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF3, UNUSED_F3, "unused-f3", k10x, false, kUnknown, 0, kVerifyError) \
+  V(0xF3, INVOKE_LAMBDA, "invoke-lambda", k25x, false, kNone, kContinue | kThrow | kInvoke | kExperimental, kVerifyRegC /*TODO: | kVerifyVarArg*/) \
   V(0xF4, UNUSED_F4, "unused-f4", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xF5, UNUSED_F5, "unused-f5", k10x, false, kUnknown, 0, kVerifyError) \
-  V(0xF6, UNUSED_F6, "unused-f6", k10x, false, kUnknown, 0, kVerifyError) \
+  /* TODO(iam): get rid of the unused 'false' column */ \
+  V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \
   V(0xF7, UNUSED_F7, "unused-f7", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xF8, UNUSED_F8, "unused-f8", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xF9, UNUSED_F9, "unused-f9", k10x, false, kUnknown, 0, kVerifyError) \
@@ -292,6 +293,7 @@
   V(k22t) \
   V(k22s) \
   V(k22c) \
+  V(k25x) \
   V(k32x) \
   V(k30t) \
   V(k31t) \
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 0f6f788..a351e15 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -480,12 +480,28 @@
   Runtime::Current()->AbortTransactionAndThrowAbortError(self, abort_msg);
 }
 
+// Separate declaration is required solely for the attributes.
+template<bool is_range, bool do_assignability_check> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+static inline bool DoCallCommon(ArtMethod* called_method,
+                                Thread* self,
+                                ShadowFrame& shadow_frame,
+                                JValue* result,
+                                uint16_t number_of_inputs,
+                                uint32_t arg[Instruction::kMaxVarArgRegs],
+                                uint32_t vregC) ALWAYS_INLINE;
+
 template<bool is_range, bool do_assignability_check>
-bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
-            const Instruction* inst, uint16_t inst_data, JValue* result) {
+static inline bool DoCallCommon(ArtMethod* called_method,
+                                Thread* self,
+                                ShadowFrame& shadow_frame,
+                                JValue* result,
+                                uint16_t number_of_inputs,
+                                uint32_t arg[Instruction::kMaxVarArgRegs],
+                                uint32_t vregC) {
   bool string_init = false;
   // Replace calls to String.<init> with equivalent StringFactory call.
-  if (called_method->GetDeclaringClass()->IsStringClass() && called_method->IsConstructor()) {
+  if (UNLIKELY(called_method->GetDeclaringClass()->IsStringClass()
+               && called_method->IsConstructor())) {
     ScopedObjectAccessUnchecked soa(self);
     jmethodID mid = soa.EncodeMethod(called_method);
     called_method = soa.DecodeMethod(WellKnownClasses::StringInitToStringFactoryMethodID(mid));
@@ -494,28 +510,56 @@
 
   // Compute method information.
   const DexFile::CodeItem* code_item = called_method->GetCodeItem();
-  const uint16_t num_ins = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
+
+  // Number of registers for the callee's call frame.
   uint16_t num_regs;
   if (LIKELY(code_item != nullptr)) {
     num_regs = code_item->registers_size_;
-    DCHECK_EQ(string_init ? num_ins - 1 : num_ins, code_item->ins_size_);
+    DCHECK_EQ(string_init ? number_of_inputs - 1 : number_of_inputs, code_item->ins_size_);
   } else {
     DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
-    num_regs = num_ins;
-    if (string_init) {
-      // The new StringFactory call is static and has one fewer argument.
-      num_regs--;
-    }
+    num_regs = number_of_inputs;
   }
 
+  // Hack for String init:
+  //
+  // Rewrite invoke-x java.lang.String.<init>(this, a, b, c, ...) into:
+  //         invoke-x StringFactory(a, b, c, ...)
+  // by effectively dropping the first virtual register from the invoke.
+  //
+  // (at this point the ArtMethod has already been replaced,
+  // so we just need to fix-up the arguments)
+  uint32_t string_init_vreg_this = is_range ? vregC : arg[0];
+  if (UNLIKELY(code_item == nullptr && string_init)) {
+    DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
+
+    DCHECK_GT(num_regs, 0u);  // As the method is an instance method, there should be at least 1.
+    // The new StringFactory call is static and has one fewer argument.
+    num_regs--;
+    number_of_inputs--;
+
+    // Rewrite the var-args, dropping the 0th argument ("this")
+    for (uint32_t i = 1; i < Instruction::kMaxVarArgRegs; ++i) {
+      arg[i - 1] = arg[i];
+    }
+    arg[Instruction::kMaxVarArgRegs - 1] = 0;
+
+    // Rewrite the non-var-arg case
+    vregC++;  // Skips the 0th vreg in the range ("this").
+  }
+
+  // Parameter registers go at the end of the shadow frame.
+  DCHECK_GE(num_regs, number_of_inputs);
+  size_t first_dest_reg = num_regs - number_of_inputs;
+  DCHECK_NE(first_dest_reg, (size_t)-1);
+
   // Allocate shadow frame on the stack.
-  const char* old_cause = self->StartAssertNoThreadSuspension("DoCall");
+  const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon");
   void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
   ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, called_method, 0,
                                                     memory));
 
-  // Initialize new shadow frame.
-  size_t first_dest_reg = num_regs - num_ins;
+  // Initialize new shadow frame by copying the registers from the callee shadow frame.
   if (do_assignability_check) {
     // Slow path.
     // We might need to do class loading, which incurs a thread state change to kNative. So
@@ -530,33 +574,23 @@
     uint32_t shorty_len = 0;
     const char* shorty = new_shadow_frame->GetMethod()->GetShorty(&shorty_len);
 
-    // TODO: find a cleaner way to separate non-range and range information without duplicating
-    //       code.
-    uint32_t arg[5];  // only used in invoke-XXX.
-    uint32_t vregC;   // only used in invoke-XXX-range.
-    if (is_range) {
-      vregC = inst->VRegC_3rc();
-    } else {
-      inst->GetVarArgs(arg, inst_data);
-    }
-
     // Handle receiver apart since it's not part of the shorty.
     size_t dest_reg = first_dest_reg;
     size_t arg_offset = 0;
+
     if (!new_shadow_frame->GetMethod()->IsStatic()) {
       size_t receiver_reg = is_range ? vregC : arg[0];
       new_shadow_frame->SetVRegReference(dest_reg, shadow_frame.GetVRegReference(receiver_reg));
       ++dest_reg;
       ++arg_offset;
-    } else if (string_init) {
-      // Skip the referrer for the new static StringFactory call.
-      ++dest_reg;
-      ++arg_offset;
     }
+
+    // Copy the caller's invoke-* arguments into the callee's parameter registers.
     for (uint32_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
       DCHECK_LT(shorty_pos + 1, shorty_len);
       const size_t src_reg = (is_range) ? vregC + arg_offset : arg[arg_offset];
       switch (shorty[shorty_pos + 1]) {
+        // Handle Object references. 1 virtual register slot.
         case 'L': {
           Object* o = shadow_frame.GetVRegReference(src_reg);
           if (do_assignability_check && o != nullptr) {
@@ -581,50 +615,40 @@
           new_shadow_frame->SetVRegReference(dest_reg, o);
           break;
         }
+        // Handle doubles and longs. 2 consecutive virtual register slots.
         case 'J': case 'D': {
-          uint64_t wide_value = (static_cast<uint64_t>(shadow_frame.GetVReg(src_reg + 1)) << 32) |
-                                static_cast<uint32_t>(shadow_frame.GetVReg(src_reg));
+          uint64_t wide_value =
+              (static_cast<uint64_t>(shadow_frame.GetVReg(src_reg + 1)) << BitSizeOf<uint32_t>()) |
+               static_cast<uint32_t>(shadow_frame.GetVReg(src_reg));
           new_shadow_frame->SetVRegLong(dest_reg, wide_value);
+          // Skip the next virtual register slot since we already used it.
           ++dest_reg;
           ++arg_offset;
           break;
         }
+        // Handle all other primitives that are always 1 virtual register slot.
         default:
           new_shadow_frame->SetVReg(dest_reg, shadow_frame.GetVReg(src_reg));
           break;
       }
     }
   } else {
+    size_t arg_index = 0;
+
     // Fast path: no extra checks.
     if (is_range) {
-      uint16_t first_src_reg = inst->VRegC_3rc();
-      if (string_init) {
-        // Skip the referrer for the new static StringFactory call.
-        ++first_src_reg;
-        ++first_dest_reg;
-      }
+      // TODO: Implement the range version of invoke-lambda
+      uint16_t first_src_reg = vregC;
+
       for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
           ++dest_reg, ++src_reg) {
         AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg);
       }
     } else {
-      DCHECK_LE(num_ins, 5U);
-      uint16_t regList = inst->Fetch16(2);
-      uint16_t count = num_ins;
-      size_t arg_index = 0;
-      if (count == 5) {
-        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + 4U,
-                       (inst_data >> 8) & 0x0f);
-        --count;
-      }
-      if (string_init) {
-        // Skip the referrer for the new static StringFactory call.
-        regList >>= 4;
-        ++first_dest_reg;
-        --count;
-      }
-      for (; arg_index < count; ++arg_index, regList >>= 4) {
-        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, regList & 0x0f);
+      DCHECK_LE(number_of_inputs, Instruction::kMaxVarArgRegs);
+
+      for (; arg_index < number_of_inputs; ++arg_index) {
+        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, arg[arg_index]);
       }
     }
     self->EndAssertNoThreadSuspension(old_cause);
@@ -660,8 +684,7 @@
 
   if (string_init && !self->IsExceptionPending()) {
     // Set the new string result of the StringFactory.
-    uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-    shadow_frame.SetVRegReference(vregC, result->GetL());
+    shadow_frame.SetVRegReference(string_init_vreg_this, result->GetL());
     // Overwrite all potential copies of the original result of the new-instance of string with the
     // new result of the StringFactory. Use the verifier to find this set of registers.
     ArtMethod* method = shadow_frame.GetMethod();
@@ -692,6 +715,56 @@
   return !self->IsExceptionPending();
 }
 
+template<bool is_range, bool do_assignability_check>
+bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
+                  const Instruction* inst, uint16_t inst_data, JValue* result) {
+  const uint4_t num_additional_registers = inst->VRegB_25x();
+  // Argument word count.
+  const uint16_t number_of_inputs = num_additional_registers + 1;
+  // The first input register is always present and is not encoded in the count.
+
+  // TODO: find a cleaner way to separate non-range and range information without duplicating
+  //       code.
+  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in invoke-XXX.
+  uint32_t vregC = 0;   // only used in invoke-XXX-range.
+  if (is_range) {
+    vregC = inst->VRegC_3rc();
+  } else {
+    // TODO(iam): See if it's possible to remove inst_data dependency from 35x to avoid this path
+    UNUSED(inst_data);
+    inst->GetAllArgs25x(arg);
+  }
+
+  // TODO: if there's an assignability check, throw instead?
+  DCHECK(called_method->IsStatic());
+
+  return DoCallCommon<is_range, do_assignability_check>(
+      called_method, self, shadow_frame,
+      result, number_of_inputs, arg, vregC);
+}
+
+template<bool is_range, bool do_assignability_check>
+bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
+            const Instruction* inst, uint16_t inst_data, JValue* result) {
+  // Argument word count.
+  const uint16_t number_of_inputs = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
+
+  // TODO: find a cleaner way to separate non-range and range information without duplicating
+  //       code.
+  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in invoke-XXX.
+  uint32_t vregC = 0;
+  if (is_range) {
+    vregC = inst->VRegC_3rc();
+  } else {
+    vregC = inst->VRegC_35c();
+    inst->GetVarArgs(arg, inst_data);
+  }
+
+  return DoCallCommon<is_range, do_assignability_check>(
+      called_method, self, shadow_frame,
+      result, number_of_inputs, arg, vregC);
+}
+
 template <bool is_range, bool do_access_check, bool transaction_active>
 bool DoFilledNewArray(const Instruction* inst, const ShadowFrame& shadow_frame,
                       Thread* self, JValue* result) {
@@ -733,8 +806,8 @@
     DCHECK(self->IsExceptionPending());
     return false;
   }
-  uint32_t arg[5];  // only used in filled-new-array.
-  uint32_t vregC;   // only used in filled-new-array-range.
+  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in filled-new-array.
+  uint32_t vregC = 0;   // only used in filled-new-array-range.
   if (is_range) {
     vregC = inst->VRegC_3rc();
   } else {
@@ -815,6 +888,20 @@
 EXPLICIT_DO_CALL_TEMPLATE_DECL(true, true);
 #undef EXPLICIT_DO_CALL_TEMPLATE_DECL
 
+// Explicit DoLambdaCall template function declarations.
+#define EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(_is_range, _do_assignability_check)               \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                          \
+  bool DoLambdaCall<_is_range, _do_assignability_check>(ArtMethod* method, Thread* self,        \
+                                                        ShadowFrame& shadow_frame,              \
+                                                        const Instruction* inst,                \
+                                                        uint16_t inst_data,                     \
+                                                        JValue* result)
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, false);
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, true);
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, false);
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, true);
+#undef EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL
+
 // Explicit DoFilledNewArray template function declarations.
 #define EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(_is_range_, _check, _transaction_active)       \
   template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                            \
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 6fafcd1..2a15087 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -97,6 +97,127 @@
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result);
 
+// Invokes the given lambda closure. This is part of the invocation support and is used by
+// DoLambdaInvoke functions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<bool is_range, bool do_assignability_check>
+bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
+                  const Instruction* inst, uint16_t inst_data, JValue* result);
+
+// Validates that the art method corresponding to a lambda method target
+// is semantically valid:
+//
+// Must be ACC_STATIC and ACC_LAMBDA. Must be a concrete managed implementation
+// (i.e. not native, not proxy, not abstract, ...).
+//
+// If the validation fails, return false and raise an exception.
+static inline bool IsValidLambdaTargetOrThrow(ArtMethod* called_method)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool success = false;
+
+  if (UNLIKELY(called_method == nullptr)) {
+    // The shadow frame should already be pushed, so we don't need to update it.
+  } else if (UNLIKELY(called_method->IsAbstract())) {
+    ThrowAbstractMethodError(called_method);
+    // TODO(iam): Also handle the case when the method is non-static, what error do we throw?
+    // TODO(iam): Also make sure that ACC_LAMBDA is set.
+  } else if (UNLIKELY(called_method->GetCodeItem() == nullptr)) {
+    // Method could be native, proxy method, etc. Lambda targets have to be concrete impls,
+    // so don't allow this.
+  } else {
+    success = true;
+  }
+
+  return success;
+}
+
+// Handles create-lambda instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+//
+// As a work-in-progress implementation, this shoves the ArtMethod object corresponding
+// to the target dex method index into the target register vA and vA + 1.
+template<bool do_access_check>
+static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame,
+                                  const Instruction* inst) {
+  /*
+   * create-lambda is opcode 0x21c
+   * - vA is the target register where the closure will be stored into
+   *   (also stores into vA + 1)
+   * - vB is the method index which will be the target for a later invoke-lambda
+   */
+  const uint32_t method_idx = inst->VRegB_21c();
+  mirror::Object* receiver = nullptr;  // Always static. (see 'kStatic')
+  ArtMethod* sf_method = shadow_frame.GetMethod();
+  ArtMethod* const called_method = FindMethodFromCode<kStatic, do_access_check>(
+      method_idx, &receiver, &sf_method, self);
+
+  uint32_t vregA = inst->VRegA_21c();
+
+  if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
+    CHECK(self->IsExceptionPending());
+    shadow_frame.SetVReg(vregA, 0u);
+    shadow_frame.SetVReg(vregA + 1, 0u);
+    return false;
+  }
+
+  // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers.
+  uint32_t called_method_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(called_method));
+  uint32_t called_method_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(called_method)
+                                                    >> BitSizeOf<uint32_t>());
+  // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit.
+  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
+
+  DCHECK_NE(called_method_lo | called_method_hi, 0u);
+
+  shadow_frame.SetVReg(vregA, called_method_lo);
+  shadow_frame.SetVReg(vregA + 1, called_method_hi);
+  return true;
+}
+
+template<bool do_access_check>
+static inline bool DoInvokeLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
+                                  uint16_t inst_data, JValue* result) {
+  /*
+   * invoke-lambda is opcode 0x25
+   *
+   * - vC is the closure register (both vC and vC + 1 will be used to store the closure).
+   * - vB is the number of additional registers up to |{vD,vE,vF,vG}| (4)
+   * - the rest of the registers are always var-args
+   *
+   * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB)
+   */
+  uint32_t vC = inst->VRegC_25x();
+
+  // TODO(iam): Introduce a closure abstraction that will contain the captured variables
+  // instead of just an ArtMethod. We also should only need to use 1 register instead of 2.
+  uint32_t vc_value_lo = shadow_frame.GetVReg(vC);
+  uint32_t vc_value_hi = shadow_frame.GetVReg(vC + 1);
+
+  uint64_t vc_value_ptr = (static_cast<uint64_t>(vc_value_hi) << BitSizeOf<uint32_t>())
+                           | vc_value_lo;
+
+  // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit.
+  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
+  ArtMethod* const called_method = reinterpret_cast<ArtMethod* const>(vc_value_ptr);
+
+  // Guard against the user passing a null closure, which is odd but (sadly) semantically valid.
+  if (UNLIKELY(called_method == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    result->SetJ(0);
+    return false;
+  }
+
+  if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
+    CHECK(self->IsExceptionPending());
+    result->SetJ(0);
+    return false;
+  } else {
+    return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
+                                                result);
+  }
+}
+
 // Handles invoke-XXX/range instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
@@ -417,6 +538,26 @@
 EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true);   // invoke-virtual-quick-range.
 #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
 
+// Explicitly instantiate all DoCreateLambda functions.
+#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check)                                    \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                 \
+bool DoCreateLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame,              \
+                        const Instruction* inst)
+
+EXPLICIT_DO_CREATE_LAMBDA_DECL(false);  // create-lambda
+EXPLICIT_DO_CREATE_LAMBDA_DECL(true);   // create-lambda
+#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
+
+// Explicitly instantiate all DoInvokeLambda functions.
+#define EXPLICIT_DO_INVOKE_LAMBDA_DECL(_do_check)                                    \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                 \
+bool DoInvokeLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
+                               uint16_t inst_data, JValue* result);
+
+EXPLICIT_DO_INVOKE_LAMBDA_DECL(false);  // invoke-lambda
+EXPLICIT_DO_INVOKE_LAMBDA_DECL(true);   // invoke-lambda
+#undef EXPLICIT_DO_INVOKE_LAMBDA_DECL
+
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 86027c5..7bc8c15 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -75,6 +75,17 @@
 #define HANDLE_INSTRUCTION_START(opcode) op_##opcode:  // NOLINT(whitespace/labels)
 #define HANDLE_INSTRUCTION_END() UNREACHABLE_CODE_CHECK()
 
+// Use with instructions labeled with kExperimental flag:
+#define HANDLE_EXPERIMENTAL_INSTRUCTION_START(opcode)                                             \
+  HANDLE_INSTRUCTION_START(opcode);                                                               \
+  DCHECK(inst->IsExperimental());                                                                 \
+  if (Runtime::Current()->AreExperimentalLambdasEnabled()) {
+#define HANDLE_EXPERIMENTAL_INSTRUCTION_END()                                                     \
+  } else {                                                                                        \
+      UnexpectedOpcode(inst, shadow_frame);                                                       \
+  } HANDLE_INSTRUCTION_END();
+
+
 /**
  * Interpreter based on computed goto tables.
  *
@@ -1609,6 +1620,14 @@
   }
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(INVOKE_LAMBDA) {
+    bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
+                                                   &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(NEG_INT)
     shadow_frame.SetVReg(
         inst->VRegA_12x(inst_data), -shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
@@ -2390,6 +2409,12 @@
     ADVANCE(2);
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CREATE_LAMBDA) {
+    bool success = DoCreateLambda<true>(self, shadow_frame, inst);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_3E)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2422,10 +2447,6 @@
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(UNUSED_F3)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_F4)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2434,10 +2455,6 @@
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(UNUSED_F6)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_F7)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index fcf083c..8040197 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -53,6 +53,11 @@
     }                                                                                           \
   } while (false)
 
+static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
+  DCHECK(inst->IsExperimental());
+  return Runtime::Current()->AreExperimentalLambdasEnabled();
+}
+
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
                          ShadowFrame& shadow_frame, JValue result_register) {
@@ -2217,8 +2222,39 @@
                              (inst->VRegC_22b() & 0x1f));
         inst = inst->Next_2xx();
         break;
+      case Instruction::INVOKE_LAMBDA: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
+                                                       &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::CREATE_LAMBDA: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoCreateLambda<do_access_check>(self, shadow_frame, inst);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::UNUSED_F4:
+      case Instruction::UNUSED_F5:
+      case Instruction::UNUSED_F7 ... Instruction::UNUSED_F9: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        CHECK(false);  // TODO(iam): Implement opcodes for lambdas
+        break;
+      }
       case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-      case Instruction::UNUSED_F3 ... Instruction::UNUSED_FF:
+      case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
       case Instruction::UNUSED_79:
       case Instruction::UNUSED_7A:
         UnexpectedOpcode(inst, shadow_frame);
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 4b563b5..5e84df5 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -260,6 +260,10 @@
       .Define("--cpu-abilist=_")
           .WithType<std::string>()
           .IntoKey(M::CpuAbiList)
+      .Define({"-Xexperimental-lambdas", "-Xnoexperimental-lambdas"})
+          .WithType<bool>()
+          .WithValues({true, false})
+          .IntoKey(M::ExperimentalLambdas)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
@@ -544,6 +548,12 @@
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
+  if (args.GetOrDefault(M::ExperimentalLambdas)) {
+    LOG(WARNING) << "Experimental lambdas have been enabled. All lambda opcodes have "
+                 << "an unstable specification and are nearly guaranteed to change over time. "
+                 << "Do not attempt to write shipping code against these opcodes.";
+  }
+
   *runtime_options = std::move(args);
   return true;
 }
@@ -663,6 +673,8 @@
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
   UsageMessage(stream, "  -Xno-dex-file-fallback "
                        "(Don't fall back to dex files without oat files)\n");
+  UsageMessage(stream, "  -X[no]experimental-lambdas\n"
+                       "     (Enable new experimental dalvik opcodes, off by default)\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following previously supported Dalvik options are ignored:\n");
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 9d651bf..103332a 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -189,7 +189,8 @@
       implicit_so_checks_(false),
       implicit_suspend_checks_(false),
       is_native_bridge_loaded_(false),
-      zygote_max_failed_boots_(0) {
+      zygote_max_failed_boots_(0),
+      experimental_lambdas_(false) {
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
 }
@@ -836,6 +837,7 @@
   }
 
   zygote_max_failed_boots_ = runtime_options.GetOrDefault(Opt::ZygoteMaxFailedBoots);
+  experimental_lambdas_ = runtime_options.GetOrDefault(Opt::ExperimentalLambdas);
 
   XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption);
   ATRACE_BEGIN("CreateHeap");
diff --git a/runtime/runtime.h b/runtime/runtime.h
index e569333..3cd7404 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -527,6 +527,10 @@
     return zygote_max_failed_boots_;
   }
 
+  bool AreExperimentalLambdasEnabled() const {
+    return experimental_lambdas_;
+  }
+
   // Create the JIT and instrumentation and code cache.
   void CreateJit();
 
@@ -727,6 +731,12 @@
   // zygote.
   uint32_t zygote_max_failed_boots_;
 
+  // Enable experimental opcodes that aren't fully specified yet. The intent is to
+  // eventually publish them as public-usable opcodes, but they aren't ready yet.
+  //
+  // Experimental opcodes should not be used by other production code.
+  bool experimental_lambdas_;
+
   MethodRefToStringInitRegMap method_ref_string_init_reg_map_;
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 4a307d5..fc527b5 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -110,6 +110,7 @@
 RUNTIME_OPTIONS_KEY (unsigned int,        ZygoteMaxFailedBoots,           10)
 RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
+RUNTIME_OPTIONS_KEY (bool,                ExperimentalLambdas,            false) // -X[no]experimental-lambdas
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 4d88227..33b0935 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -572,6 +572,7 @@
     case VERIFY_ERROR_ACCESS_METHOD:
     case VERIFY_ERROR_INSTANTIATION:
     case VERIFY_ERROR_CLASS_CHANGE:
+    case VERIFY_ERROR_FORCE_INTERPRETER:
       if (Runtime::Current()->IsAotCompiler() || !can_load_classes_) {
         // If we're optimistically running verification at compile time, turn NO_xxx, ACCESS_xxx,
         // class change and instantiation errors into soft verification errors so that we re-verify
@@ -2828,10 +2829,31 @@
       }
       break;
     }
+    case Instruction::INVOKE_LAMBDA: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement invoke-lambda verification
+      break;
+    }
+    case Instruction::CREATE_LAMBDA: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement create-lambda verification
+      break;
+    }
+
+    case 0xf4:
+    case 0xf5:
+    case 0xf7 ... 0xf9: {
+      DCHECK(false);  // TODO(iam): Implement opcodes for lambdas
+      FALLTHROUGH_INTENDED;  // Conservatively fail verification on release builds.
+    }
 
     /* These should never appear during verification. */
     case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-    case Instruction::UNUSED_F3 ... Instruction::UNUSED_FF:
+    case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
     case Instruction::UNUSED_79:
     case Instruction::UNUSED_7A:
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 873b8ab..824daf6 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -77,6 +77,16 @@
   VERIFY_ERROR_ACCESS_METHOD,   // IllegalAccessError.
   VERIFY_ERROR_CLASS_CHANGE,    // IncompatibleClassChangeError.
   VERIFY_ERROR_INSTANTIATION,   // InstantiationError.
+  // For opcodes that don't have complete verifier support (such as lambda opcodes),
+  // we need a way to continue execution at runtime without attempting to re-verify
+  // (since we know it will fail no matter what). Instead, run as the interpreter
+  // in a special "do access checks" mode which will perform verifier-like checking
+  // on the fly.
+  //
+  // TODO: Once all new opcodes have implemented full verifier support, this can be removed.
+  VERIFY_ERROR_FORCE_INTERPRETER,  // Skip the verification phase at runtime;
+                                   // force the interpreter to do access checks.
+                                   // (sets a soft fail at compile time).
 };
 std::ostream& operator<<(std::ostream& os, const VerifyError& rhs);
 
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
new file mode 100644
index 0000000..ed1f875
--- /dev/null
+++ b/test/955-lambda-smali/expected.txt
@@ -0,0 +1,4 @@
+SanityCheck
+Hello world! (0-args, no closure)
+ABCD Hello world! (4-args, no closure)
+Caught NPE
diff --git a/test/955-lambda-smali/info.txt b/test/955-lambda-smali/info.txt
new file mode 100644
index 0000000..aed5e84
--- /dev/null
+++ b/test/955-lambda-smali/info.txt
@@ -0,0 +1,3 @@
+Smali-based tests for experimental lambda intructions.
+
+Obviously needs to run under ART.
diff --git a/test/955-lambda-smali/run b/test/955-lambda-smali/run
new file mode 100755
index 0000000..e0c9586
--- /dev/null
+++ b/test/955-lambda-smali/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
+${RUN} "$@" --runtime-option -Xexperimental-lambdas -Xcompiler-option "--runtime-arg -Xexperimental-lambdas"
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
new file mode 100644
index 0000000..1851399
--- /dev/null
+++ b/test/955-lambda-smali/smali/Main.smali
@@ -0,0 +1,29 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LMain;
+
+.super Ljava/lang/Object;
+
+.method public static main([Ljava/lang/String;)V
+    .registers 2
+
+    invoke-static {}, LSanityCheck;->run()I
+    invoke-static {}, LTrivialHelloWorld;->run()V
+
+# TODO: add tests when verification fails
+
+    return-void
+.end method
diff --git a/test/955-lambda-smali/smali/SanityCheck.smali b/test/955-lambda-smali/smali/SanityCheck.smali
new file mode 100644
index 0000000..4c807d7
--- /dev/null
+++ b/test/955-lambda-smali/smali/SanityCheck.smali
@@ -0,0 +1,36 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LSanityCheck;
+.super Ljava/lang/Object;
+
+
+.method public constructor <init>()V
+.registers 1
+   invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+   return-void
+.end method
+
+# This test is just here to make sure that we can at least execute basic non-lambda
+# functionality such as printing (when lambdas are enabled in the runtime).
+.method public static run()I
+# Don't use too many registers here to avoid hitting the Stack::SanityCheck frame<2KB assert
+.registers 3
+    const-string v0, "SanityCheck"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    const v2, 123456
+    return v2
+.end method
diff --git a/test/955-lambda-smali/smali/TrivialHelloWorld.smali b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
new file mode 100644
index 0000000..38ee95a
--- /dev/null
+++ b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
@@ -0,0 +1,94 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LTrivialHelloWorld;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static run()V
+.registers 8
+    # Trivial 0-arg hello world
+    create-lambda v0, LTrivialHelloWorld;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Slightly more interesting 4-arg hello world
+    create-lambda v2, doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+    # TODO: create-lambda should not write to both v2 and v3
+    const-string v4, "A"
+    const-string v5, "B"
+    const-string v6, "C"
+    const-string v7, "D"
+    invoke-lambda v2, {v4, v5, v6, v7}
+
+    invoke-static {}, LTrivialHelloWorld;->testFailures()V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of ArtMethod.
+.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    .registers 3 # 1 parameters, 2 locals
+
+    const-string v0, "Hello world! (0-args, no closure)"
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of ArtMethod.
+.method public static doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+    .registers 7 # 5 parameters, 2 locals
+
+    const-string v0, " Hello world! (4-args, no closure)"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    invoke-virtual {v1, p1}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p3}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v0, 0  # v0 = null
+    const v1, 0  # v1 = null
+:start
+    invoke-lambda v0, {}  # invoking a null lambda shall raise an NPE
+:end
+    return-void
+
+:handler
+    const-string v2, "Caught NPE"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
+.end method
diff --git a/test/etc/default-build b/test/etc/default-build
index fbe97f9..92954a9 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -18,6 +18,7 @@
 set -e
 
 DX_FLAGS=""
+SKIP_DX_MERGER="false"
 
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
@@ -38,22 +39,36 @@
   exit 0
 fi
 
-mkdir classes
-${JAVAC} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+if [ -d src ]; then
+  mkdir classes
+  ${JAVAC} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+fi
 
 if [ -d src2 ]; then
+  mkdir -p classes
   ${JAVAC} -d classes `find src2 -name '*.java'`
 fi
 
-if [ ${NEED_DEX} = "true" ]; then
+if ! [ -d src ] && ! [ -d src2 ]; then
+  # No src directory? Then forget about trying to run dx.
+  SKIP_DX_MERGER="true"
+fi
+
+if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
   ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
     --dump-width=1000 ${DX_FLAGS} classes
 fi
 
 if [ -d smali ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx256m --output smali_classes.dex `find smali -name '*.smali'`
-  ${DXMERGER} classes.dex classes.dex smali_classes.dex
+  ${SMALI} -JXmx256m --experimental --api-level 23 --output smali_classes.dex `find smali -name '*.smali'`
+
+  # Don't bother with dexmerger if we provide our own main function in a smali file.
+  if [ ${SKIP_DX_MERGER} = "false" ]; then
+    ${DXMERGER} classes.dex classes.dex smali_classes.dex
+  else
+    mv smali_classes.dex classes.dex
+  fi
 fi
 
 if [ -d src-ex ]; then