Fix method tracing and allow alloc counting during tracing.

Forcing alignment on art_trace_exit_from_code was preventing the LR of
profiled frames from matching. Also, the merge of the different types of
stalk walks introduced a small bug.

The metrod tracer now also supports the TRACE_COUNT_ALLOCS flag to count
allocations during method tracing.

Change-Id: Ief9e4612471a134a90eabf15432135162b633b92
diff --git a/src/oat/runtime/arm/runtime_support_arm.S b/src/oat/runtime/arm/runtime_support_arm.S
index 6163b3e..9c55e66 100644
--- a/src/oat/runtime/arm/runtime_support_arm.S
+++ b/src/oat/runtime/arm/runtime_support_arm.S
@@ -759,9 +759,11 @@
     DELIVER_PENDING_EXCEPTION
 
     .global art_trace_entry_from_code
+    .global art_trace_exit_from_code
     .extern artTraceMethodEntryFromCode
+    .extern artTraceMethodExitFromCode
     /*
-     * Routine that intercepts method calls.
+     * Routine that intercepts method calls and returns.
      */
     ALIGN_FUNCTION_ENTRY
 art_trace_entry_from_code:
@@ -772,14 +774,6 @@
     mov   r12, r0        @ r12 holds reference to code
     pop   {r0-r3}        @ restore arguments
     blx   r12            @ call method
-    /* intentional fallthrough */
-
-    .global art_trace_exit_from_code
-    .extern artTraceMethodExitFromCode
-    /*
-     * Routine that intercepts method returns.
-     */
-    ALIGN_FUNCTION_ENTRY
 art_trace_exit_from_code:
     push  {r0-r1}        @ save return value
     blx   artTraceMethodExitFromCode  @ ()
diff --git a/src/thread.cc b/src/thread.cc
index 846aa39..125480e 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -1189,7 +1189,9 @@
       if (LIKELY(!method_tracing_active)) {
         pc = ManglePc(return_pc);
       } else {
-        if (IsTraceExitPc(return_pc)) {
+        // While profiling, the return pc is restored from the side stack, except when walking
+        // the stack for an exception where the side stack will be unwound in VisitFrame.
+        if (IsTraceExitPc(return_pc) && !include_upcalls) {
           TraceStackFrame trace_frame = GetTraceStackFrame(trace_stack_depth++);
           CHECK(trace_frame.method_ == frame.GetMethod());
           pc = ManglePc(trace_frame.return_pc_);
@@ -1484,7 +1486,7 @@
     } else if (method->IsNative()) {
       native_method_count_++;
     } else {
-      // Unwind stack during method tracing
+      // Unwind stack when an exception occurs during method tracing
       if (UNLIKELY(method_tracing_active_)) {
         if (IsTraceExitPc(DemanglePc(pc))) {
           pc = ManglePc(TraceMethodUnwindFromCode(Thread::Current()));
diff --git a/src/trace.cc b/src/trace.cc
index c38f017..3b4c3e5 100644
--- a/src/trace.cc
+++ b/src/trace.cc
@@ -200,11 +200,6 @@
     return;
   }
 
-  // TODO: implement alloc counting.
-  if (flags != 0) {
-    UNIMPLEMENTED(FATAL) << "trace flags";
-  }
-
   ScopedThreadStateChange tsc(Thread::Current(), Thread::kRunnable);
   Runtime::Current()->GetThreadList()->SuspendAll(false);
 
@@ -226,7 +221,13 @@
   }
 
   // Create Trace object.
-  Trace* tracer(new Trace(trace_file, buffer_size));
+  Trace* tracer(new Trace(trace_file, buffer_size, flags));
+
+  // Enable count of allocs if specified in the flags.
+  if ((flags && kTraceCountAllocs) != 0) {
+    Runtime::Current()->SetStatsEnabled(true);
+  }
+
   Runtime::Current()->EnableMethodTracing(tracer);
   tracer->BeginTracing();
 
@@ -297,6 +298,10 @@
   size_t final_offset = cur_offset_;
   uint32_t clock_overhead = GetClockOverhead();
 
+  if ((flags_ & kTraceCountAllocs) != 0) {
+    Runtime::Current()->SetStatsEnabled(false);
+  }
+
   GetVisitedMethods(final_offset);
 
   std::ostringstream os;
@@ -317,6 +322,11 @@
   os << StringPrintf("num-method-calls=%zd\n", (final_offset - kTraceHeaderLength) / record_size_);
   os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead);
   os << StringPrintf("vm=art\n");
+  if ((flags_ & kTraceCountAllocs) != 0) {
+    os << StringPrintf("alloc-count=%d\n", Runtime::Current()->GetStat(KIND_ALLOCATED_OBJECTS));
+    os << StringPrintf("alloc-size=%d\n", Runtime::Current()->GetStat(KIND_ALLOCATED_BYTES));
+    os << StringPrintf("gc-count=%d\n", Runtime::Current()->GetStat(KIND_GC_INVOCATIONS));
+  }
   os << StringPrintf("%cthreads\n", kTraceTokenChar);
   DumpThreadList(os);
   os << StringPrintf("%cmethods\n", kTraceTokenChar);
diff --git a/src/trace.h b/src/trace.h
index b0366d9..8dbf924 100644
--- a/src/trace.h
+++ b/src/trace.h
@@ -52,6 +52,10 @@
     kMethodTraceUnwind = 2,
   };
 
+  enum TraceFlag {
+    kTraceCountAllocs = 1,
+  };
+
   static void Start(const char* trace_filename, int trace_fd, int buffer_size, int flags, bool direct_to_ddms);
   static void Stop();
   static void Shutdown();
@@ -66,9 +70,9 @@
   void ResetSavedCode(Method* method);
 
  private:
-  explicit Trace(File* trace_file, int buffer_size)
-      : trace_file_(trace_file), buf_(new uint8_t[buffer_size]()), overflow_(false), buffer_size_(buffer_size),
-        start_time_(0), trace_version_(0), record_size_(0), cur_offset_(0) {
+  explicit Trace(File* trace_file, int buffer_size, int flags)
+      : trace_file_(trace_file), buf_(new uint8_t[buffer_size]()), flags_(flags), overflow_(false),
+        buffer_size_(buffer_size), start_time_(0), trace_version_(0), record_size_(0), cur_offset_(0) {
   }
 
   void BeginTracing();
@@ -100,6 +104,9 @@
   // Buffer to store trace data.
   UniquePtr<uint8_t> buf_;
 
+  // Flags enabling extra tracing of things such as alloc counts.
+  int flags_;
+
   bool overflow_;
   int buffer_size_;
   uint64_t start_time_;