Compile-time tuning

Specialized the dataflow iterators and did a few other minor tweaks.
Showing ~5% compile-time improvement in a single-threaded environment;
less in multi-threaded (presumably because we're blocked by something
else).

Change-Id: I2e2ed58d881414b9fc97e04cd0623e188259afd2
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index 630e990..41adb94 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -99,12 +99,11 @@
     int native_reg_id;
     if (cu_->instruction_set == kX86) {
       // If x86, location differs depending on whether memory/reg operation.
-      native_reg_id = (GetTargetInstFlags(this_lir->opcode) & IS_STORE) ? this_lir->operands[2]
-          : this_lir->operands[0];
+      native_reg_id = (target_flags & IS_STORE) ? this_lir->operands[2] : this_lir->operands[0];
     } else {
       native_reg_id = this_lir->operands[0];
     }
-    bool is_this_lir_load = GetTargetInstFlags(this_lir->opcode) & IS_LOAD;
+    bool is_this_lir_load = target_flags & IS_LOAD;
     LIR* check_lir;
     /* Use the mem mask to determine the rough memory location */
     uint64_t this_mem_mask = (this_lir->use_mask | this_lir->def_mask) & ENCODE_MEM;