Improve PyPerf sample handling and output (#2260)

* Add common interface for PyPerf sample handling

* Better printing for enum values
diff --git a/examples/cpp/pyperf/PyPerfUtil.cc b/examples/cpp/pyperf/PyPerfUtil.cc
index d439083..252a0fe 100644
--- a/examples/cpp/pyperf/PyPerfUtil.cc
+++ b/examples/cpp/pyperf/PyPerfUtil.cc
@@ -9,7 +9,6 @@
 #include <cstdio>
 #include <cstring>
 #include <exception>
-#include <unordered_map>
 
 #include <dirent.h>
 #include <linux/elf.h>
@@ -29,10 +28,6 @@
 extern OffsetConfig kPy36OffsetConfig;
 extern std::string PYPERF_BPF_PROGRAM;
 
-const static std::string kLostSymbol = "[Lost Symbol]";
-const static std::string kIncompleteStack = "[Truncated Stack]";
-const static std::string kErrorStack = "[Stack Error]";
-const static std::string kNonPythonStack = "[Non-Python Code]";
 const static int kPerfBufSizePages = 32;
 
 const static std::string kPidCfgTableName("pid_config");
@@ -107,7 +102,8 @@
     file = file.substr(pos + 1);
   }
   if (file.find(kPy36LibName) == 0) {
-    logInfo(1, "Found Python library %s loaded at %lx-%lx for PID %d\n", name, st, en, helper->pid);
+    logInfo(1, "Found Python library %s loaded at %lx-%lx for PID %d\n", name,
+            st, en, helper->pid);
     helper->found = true;
     helper->st = st;
     helper->en = en;
@@ -239,7 +235,8 @@
 void PyPerfUtil::handleLostSamples(int lostCnt) { lostSamples_ += lostCnt; }
 
 PyPerfUtil::PyPerfResult PyPerfUtil::profile(int64_t sampleRate,
-                                             int64_t durationMs) {
+                                             int64_t durationMs,
+                                             PyPerfSampleProcessor* processor) {
   if (!initCompleted_) {
     std::fprintf(stderr, "PyPerfUtil::init not invoked or failed\n");
     return PyPerfResult::NO_INIT;
@@ -285,7 +282,12 @@
   }
   logInfo(2, "Finished draining remaining samples\n");
 
-  // Get symbol names and output samples
+  processor->processSamples(samples_, this);
+
+  return PyPerfResult::SUCCESS;
+}
+
+std::unordered_map<int32_t, std::string> PyPerfUtil::getSymbolMapping() {
   auto symbolTable = bpf_.get_hash_table<Symbol, int32_t>("symbols");
   std::unordered_map<int32_t, std::string> symbols;
   for (auto& x : symbolTable.get_table_offline()) {
@@ -294,47 +296,7 @@
     symbols.emplace(x.second, std::move(symbolName));
   }
   logInfo(1, "Total %d unique Python symbols\n", symbols.size());
-
-  for (auto& sample : samples_) {
-    if (sample.threadStateMatch != THREAD_STATE_THIS_THREAD_NULL &&
-        sample.threadStateMatch != THREAD_STATE_BOTH_NULL) {
-      for (const auto stackId : sample.pyStackIds) {
-        auto symbIt = symbols.find(stackId);
-        if (symbIt != symbols.end()) {
-          std::printf("    %s\n", symbIt->second.c_str());
-        } else {
-          std::printf("    %s\n", kLostSymbol.c_str());
-          lostSymbols_++;
-        }
-      }
-      switch (sample.stackStatus) {
-      case STACK_STATUS_TRUNCATED:
-        std::printf("    %s\n", kIncompleteStack.c_str());
-        truncatedStack_++;
-        break;
-      case STACK_STATUS_ERROR:
-        std::printf("    %s\n", kErrorStack.c_str());
-        break;
-      default:
-        break;
-      }
-    } else {
-      std::printf("    %s\n", kNonPythonStack.c_str());
-    }
-
-    std::printf("PID: %d TID: %d (%s)\n", sample.pid, sample.tid,
-                sample.comm.c_str());
-    std::printf("GIL State: %d Thread State: %d PthreadID Match State: %d\n\n",
-                sample.threadStateMatch, sample.gilState,
-                sample.pthreadIDMatch);
-  }
-
-  logInfo(0, "%d samples collected\n", totalSamples_);
-  logInfo(0, "%d samples lost\n", lostSamples_);
-  logInfo(0, "%d samples with truncated stack\n", truncatedStack_);
-  logInfo(0, "%d times Python symbol lost\n", lostSymbols_);
-
-  return PyPerfResult::SUCCESS;
+  return symbols;
 }
 
 std::string PyPerfUtil::getSymbolName(Symbol& sym) const {
@@ -378,18 +340,23 @@
   }
 
   if (!getAddrOfPythonBinary(path, data)) {
-    std::fprintf(stderr, "Failed getting addresses in potential Python library in PID %d\n", pid);
+    std::fprintf(
+        stderr,
+        "Failed getting addresses in potential Python library in PID %d\n",
+        pid);
     return false;
   }
   data.offsets = kPy36OffsetConfig;
   data.current_state_addr += helper.st;
-  logInfo(2, "PID %d has _PyThreadState_Current at %lx\n", pid, data.current_state_addr);
+  logInfo(2, "PID %d has _PyThreadState_Current at %lx\n", pid,
+          data.current_state_addr);
   data.tls_key_addr += helper.st;
   logInfo(2, "PID %d has autoTLSKey at %lx\n", pid, data.current_state_addr);
   data.gil_locked_addr += helper.st;
   logInfo(2, "PID %d has gil_locked at %lx\n", pid, data.current_state_addr);
   data.gil_last_holder_addr += helper.st;
-  logInfo(2, "PID %d has gil_last_holder at %lx\n", pid, data.current_state_addr);
+  logInfo(2, "PID %d has gil_last_holder at %lx\n", pid,
+          data.current_state_addr);
 
   return true;
 }