Improve PyPerf sample handling and output (#2260)

* Add common interface for PyPerf sample handling

* Better printing for enum values
diff --git a/examples/cpp/pyperf/CMakeLists.txt b/examples/cpp/pyperf/CMakeLists.txt
index 8b80275..6f963c6 100644
--- a/examples/cpp/pyperf/CMakeLists.txt
+++ b/examples/cpp/pyperf/CMakeLists.txt
@@ -5,7 +5,7 @@
 include_directories(${CMAKE_SOURCE_DIR}/src/cc/api)
 include_directories(${CMAKE_SOURCE_DIR}/src/cc/libbpf/include/uapi)
 
-add_executable(PyPerf PyPerf.cc PyPerfUtil.cc PyPerfBPFProgram.cc PyPerfLoggingHelper.cc Py36Offsets.cc)
+add_executable(PyPerf PyPerf.cc PyPerfUtil.cc PyPerfBPFProgram.cc PyPerfLoggingHelper.cc PyPerfDefaultPrinter.cc Py36Offsets.cc)
 target_link_libraries(PyPerf bcc-static)
 
 if(INSTALL_CPP_EXAMPLES)
diff --git a/examples/cpp/pyperf/PyPerf.cc b/examples/cpp/pyperf/PyPerf.cc
index bee9b59..bad2ba0 100644
--- a/examples/cpp/pyperf/PyPerf.cc
+++ b/examples/cpp/pyperf/PyPerf.cc
@@ -16,10 +16,12 @@
 #include <string>
 #include <vector>
 
+#include "PyPerfDefaultPrinter.h"
 #include "PyPerfLoggingHelper.h"
 #include "PyPerfUtil.h"
 
 int main(int argc, char** argv) {
+  // Argument parsing helpers
   int pos = 1;
 
   auto parseIntArg = [&](std::vector<std::string> argNames, uint64_t& target) {
@@ -45,9 +47,29 @@
     return false;
   };
 
+  auto parseBoolArg = [&](std::vector<std::string> argNames, bool& target) {
+    std::string arg(argv[pos]);
+    for (const auto& name : argNames) {
+      if (arg == ("--" + name)) {
+        target = true;
+        return true;
+      }
+      if (arg == "--no-" + name) {
+        target = false;
+        return true;
+      }
+    }
+    return false;
+  };
+
+  // Default argument values
   uint64_t sampleRate = 1000000;
   uint64_t durationMs = 1000;
   uint64_t verbosityLevel = 0;
+  bool showGILState = true;
+  bool showThreadState = true;
+  bool showPthreadIDState = false;
+
   while (true) {
     if (pos >= argc) {
       break;
@@ -56,6 +78,10 @@
     found = found || parseIntArg({"-c", "--sample-rate"}, sampleRate);
     found = found || parseIntArg({"-d", "--duration"}, durationMs);
     found = found || parseIntArg({"-v", "--verbose"}, verbosityLevel);
+    found = found || parseBoolArg({"show-gil-state"}, showGILState);
+    found = found || parseBoolArg({"show-thread-state"}, showThreadState);
+    found =
+        found || parseBoolArg({"show-pthread-id-state"}, showPthreadIDState);
     if (!found) {
       std::fprintf(stderr, "Unexpected argument: %s\n", argv[pos]);
       std::exit(1);
@@ -66,10 +92,17 @@
   ebpf::pyperf::setVerbosity(verbosityLevel);
   ebpf::pyperf::logInfo(1, "Profiling Sample Rate: %" PRIu64 "\n", sampleRate);
   ebpf::pyperf::logInfo(1, "Profiling Duration: %" PRIu64 "ms\n", durationMs);
+  ebpf::pyperf::logInfo(1, "Showing GIL state: %d\n", showGILState);
+  ebpf::pyperf::logInfo(1, "Showing Thread state: %d\n", showThreadState);
+  ebpf::pyperf::logInfo(1, "Showing Pthread ID state: %d\n",
+                        showPthreadIDState);
 
   ebpf::pyperf::PyPerfUtil util;
   util.init();
-  util.profile(sampleRate, durationMs);
+
+  ebpf::pyperf::PyPerfDefaultPrinter printer(showGILState, showThreadState,
+                                             showPthreadIDState);
+  util.profile(sampleRate, durationMs, &printer);
 
   return 0;
 }
diff --git a/examples/cpp/pyperf/PyPerfDefaultPrinter.cc b/examples/cpp/pyperf/PyPerfDefaultPrinter.cc
new file mode 100644
index 0000000..22ec2c3
--- /dev/null
+++ b/examples/cpp/pyperf/PyPerfDefaultPrinter.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ */
+
+#include <map>
+#include <string>
+
+#include "PyPerfDefaultPrinter.h"
+#include "PyPerfUtil.h"
+
+namespace ebpf {
+namespace pyperf {
+
+const static std::string kLostSymbol = "[Lost Symbol]";
+const static std::string kIncompleteStack = "[Truncated Stack]";
+const static std::string kErrorStack = "[Stack Error]";
+const static std::string kNonPythonStack = "[Non-Python Code]";
+
+const static std::map<int, const char*> kGILStateValues = {
+    {GIL_STATE_NO_INFO, "No GIL Info"},
+    {GIL_STATE_ERROR, "Error Reading GIL State"},
+    {GIL_STATE_UNINITIALIZED, "GIL Uninitialized"},
+    {GIL_STATE_NOT_LOCKED, "GIL Not Locked"},
+    {GIL_STATE_THIS_THREAD, "GIL on This Thread"},
+    {GIL_STATE_GLOBAL_CURRENT_THREAD,
+     "GIL on Global _PyThreadState_Current Thread"},
+    {GIL_STATE_OTHER_THREAD, "GIL on Unexpected Thread"},
+    {GIL_STATE_NULL, "GIL State Empty"}};
+
+const static std::map<int, const char*> kThreadStateValues = {
+    {THREAD_STATE_UNKNOWN, "ThreadState Unknown"},
+    {THREAD_STATE_MATCH, "TLS ThreadState is Global _PyThreadState_Current"},
+    {THREAD_STATE_MISMATCH,
+     "TLS ThreadState is not Global _PyThreadState_Current"},
+    {THREAD_STATE_THIS_THREAD_NULL, "TLS ThreadState is NULL"},
+    {THREAD_STATE_GLOBAL_CURRENT_THREAD_NULL,
+     "Global _PyThreadState_Current is NULL"},
+    {THREAD_STATE_BOTH_NULL,
+     "Both TLS ThreadState and Global _PyThreadState_Current is NULL"},
+};
+
+const static std::map<int, const char*> kPthreadIDStateValues = {
+    {PTHREAD_ID_UNKNOWN, "Pthread ID Unknown"},
+    {PTHREAD_ID_MATCH, "System Pthread ID is Python ThreadState Pthread ID"},
+    {PTHREAD_ID_MISMATCH,
+     "System Pthread ID is not Python ThreadState Pthread ID"},
+    {PTHREAD_ID_THREAD_STATE_NULL, "No Pthread ID: TLS ThreadState is NULL"},
+    {PTHREAD_ID_NULL, "Pthread ID on TLS ThreadState is NULL"},
+    {PTHREAD_ID_ERROR, "Error Reading System Pthread ID"}};
+
+void PyPerfDefaultPrinter::processSamples(
+    const std::vector<PyPerfSample>& samples, PyPerfUtil* util) {
+  auto symbols = util->getSymbolMapping();
+  uint32_t lostSymbols = 0;
+  uint32_t truncatedStack = 0;
+
+  for (auto& sample : samples) {
+    if (sample.threadStateMatch != THREAD_STATE_THIS_THREAD_NULL &&
+        sample.threadStateMatch != THREAD_STATE_BOTH_NULL) {
+      for (const auto stackId : sample.pyStackIds) {
+        auto symbIt = symbols.find(stackId);
+        if (symbIt != symbols.end()) {
+          std::printf("    %s\n", symbIt->second.c_str());
+        } else {
+          std::printf("    %s\n", kLostSymbol.c_str());
+          lostSymbols++;
+        }
+      }
+      switch (sample.stackStatus) {
+      case STACK_STATUS_TRUNCATED:
+        std::printf("    %s\n", kIncompleteStack.c_str());
+        truncatedStack++;
+        break;
+      case STACK_STATUS_ERROR:
+        std::printf("    %s\n", kErrorStack.c_str());
+        break;
+      default:
+        break;
+      }
+    } else {
+      std::printf("    %s\n", kNonPythonStack.c_str());
+    }
+
+    std::printf("PID: %d TID: %d (%s)\n", sample.pid, sample.tid,
+                sample.comm.c_str());
+    if (showGILState_)
+      std::printf("GIL State: %s\n", kGILStateValues.at(sample.gilState));
+    if (showThreadState_)
+      std::printf("Thread State: %s\n",
+                  kThreadStateValues.at(sample.threadStateMatch));
+    if (showPthreadIDState_)
+      std::printf("Pthread ID State: %s\n",
+                  kPthreadIDStateValues.at(sample.pthreadIDMatch));
+
+    std::printf("\n");
+  }
+
+  std::printf("%d samples collected\n", util->getTotalSamples());
+  std::printf("%d samples lost\n", util->getLostSamples());
+  std::printf("%d samples with truncated stack\n", truncatedStack);
+  std::printf("%d times Python symbol lost\n", lostSymbols);
+}
+
+}  // namespace pyperf
+}  // namespace ebpf
diff --git a/examples/cpp/pyperf/PyPerfDefaultPrinter.h b/examples/cpp/pyperf/PyPerfDefaultPrinter.h
new file mode 100644
index 0000000..89c8153
--- /dev/null
+++ b/examples/cpp/pyperf/PyPerfDefaultPrinter.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ */
+
+#pragma once
+
+#include "PyPerfSampleProcessor.h"
+
+namespace ebpf {
+namespace pyperf {
+
+class PyPerfDefaultPrinter : public PyPerfSampleProcessor {
+ public:
+  PyPerfDefaultPrinter(bool showGILState, bool showThreadState,
+                       bool showPthreadIDState)
+      : showGILState_(showGILState),
+        showThreadState_(showThreadState),
+        showPthreadIDState_(showPthreadIDState) {}
+
+  void processSamples(const std::vector<PyPerfSample>& samples,
+                      PyPerfUtil* util) override;
+
+ private:
+  bool showGILState_;
+  bool showThreadState_;
+  bool showPthreadIDState_;
+};
+
+}  // namespace pyperf
+}  // namespace ebpf
diff --git a/examples/cpp/pyperf/PyPerfLoggingHelper.h b/examples/cpp/pyperf/PyPerfLoggingHelper.h
index d08d93e..c101666 100644
--- a/examples/cpp/pyperf/PyPerfLoggingHelper.h
+++ b/examples/cpp/pyperf/PyPerfLoggingHelper.h
@@ -3,6 +3,8 @@
  * Licensed under the Apache License, Version 2.0 (the "License")
  */
 
+#pragma once
+
 #include <cstdint>
 
 namespace ebpf {
diff --git a/examples/cpp/pyperf/PyPerfSampleProcessor.h b/examples/cpp/pyperf/PyPerfSampleProcessor.h
new file mode 100644
index 0000000..5f2fe5e
--- /dev/null
+++ b/examples/cpp/pyperf/PyPerfSampleProcessor.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ */
+
+#pragma once
+
+#include <vector>
+
+#include "PyPerfType.h"
+
+namespace ebpf {
+namespace pyperf {
+
+class PyPerfUtil;
+
+class PyPerfSampleProcessor {
+ public:
+  virtual void processSamples(const std::vector<PyPerfSample>& samples,
+                              PyPerfUtil* util) = 0;
+};
+
+}  // namespace pyperf
+}  // namespace ebpf
diff --git a/examples/cpp/pyperf/PyPerfType.h b/examples/cpp/pyperf/PyPerfType.h
index 9a54e9e..7df07c7 100644
--- a/examples/cpp/pyperf/PyPerfType.h
+++ b/examples/cpp/pyperf/PyPerfType.h
@@ -3,7 +3,12 @@
  * Licensed under the Apache License, Version 2.0 (the "License")
  */
 
+#pragma once
+
+#include <sys/types.h>
 #include <cstdint>
+#include <string>
+#include <vector>
 
 #define PYTHON_STACK_FRAMES_PER_PROG 25
 #define PYTHON_STACK_PROG_CNT 3
@@ -99,5 +104,26 @@
   int32_t stack[STACK_MAX_LEN];
 } Event;
 
+struct PyPerfSample {
+  pid_t pid;
+  pid_t tid;
+  std::string comm;
+  uint8_t threadStateMatch;
+  uint8_t gilState;
+  uint8_t pthreadIDMatch;
+  uint8_t stackStatus;
+  std::vector<int32_t> pyStackIds;
+
+  explicit PyPerfSample(const Event* raw, int rawSize)
+      : pid(raw->pid),
+        tid(raw->tid),
+        comm(raw->comm),
+        threadStateMatch(raw->thread_state_match),
+        gilState(raw->gil_state),
+        pthreadIDMatch(raw->pthread_id_match),
+        stackStatus(raw->stack_status),
+        pyStackIds(raw->stack, raw->stack + raw->stack_len) {}
+};
+
 }  // namespace pyperf
 }  // namespace ebpf
diff --git a/examples/cpp/pyperf/PyPerfUtil.cc b/examples/cpp/pyperf/PyPerfUtil.cc
index d439083..252a0fe 100644
--- a/examples/cpp/pyperf/PyPerfUtil.cc
+++ b/examples/cpp/pyperf/PyPerfUtil.cc
@@ -9,7 +9,6 @@
 #include <cstdio>
 #include <cstring>
 #include <exception>
-#include <unordered_map>
 
 #include <dirent.h>
 #include <linux/elf.h>
@@ -29,10 +28,6 @@
 extern OffsetConfig kPy36OffsetConfig;
 extern std::string PYPERF_BPF_PROGRAM;
 
-const static std::string kLostSymbol = "[Lost Symbol]";
-const static std::string kIncompleteStack = "[Truncated Stack]";
-const static std::string kErrorStack = "[Stack Error]";
-const static std::string kNonPythonStack = "[Non-Python Code]";
 const static int kPerfBufSizePages = 32;
 
 const static std::string kPidCfgTableName("pid_config");
@@ -107,7 +102,8 @@
     file = file.substr(pos + 1);
   }
   if (file.find(kPy36LibName) == 0) {
-    logInfo(1, "Found Python library %s loaded at %lx-%lx for PID %d\n", name, st, en, helper->pid);
+    logInfo(1, "Found Python library %s loaded at %lx-%lx for PID %d\n", name,
+            st, en, helper->pid);
     helper->found = true;
     helper->st = st;
     helper->en = en;
@@ -239,7 +235,8 @@
 void PyPerfUtil::handleLostSamples(int lostCnt) { lostSamples_ += lostCnt; }
 
 PyPerfUtil::PyPerfResult PyPerfUtil::profile(int64_t sampleRate,
-                                             int64_t durationMs) {
+                                             int64_t durationMs,
+                                             PyPerfSampleProcessor* processor) {
   if (!initCompleted_) {
     std::fprintf(stderr, "PyPerfUtil::init not invoked or failed\n");
     return PyPerfResult::NO_INIT;
@@ -285,7 +282,12 @@
   }
   logInfo(2, "Finished draining remaining samples\n");
 
-  // Get symbol names and output samples
+  processor->processSamples(samples_, this);
+
+  return PyPerfResult::SUCCESS;
+}
+
+std::unordered_map<int32_t, std::string> PyPerfUtil::getSymbolMapping() {
   auto symbolTable = bpf_.get_hash_table<Symbol, int32_t>("symbols");
   std::unordered_map<int32_t, std::string> symbols;
   for (auto& x : symbolTable.get_table_offline()) {
@@ -294,47 +296,7 @@
     symbols.emplace(x.second, std::move(symbolName));
   }
   logInfo(1, "Total %d unique Python symbols\n", symbols.size());
-
-  for (auto& sample : samples_) {
-    if (sample.threadStateMatch != THREAD_STATE_THIS_THREAD_NULL &&
-        sample.threadStateMatch != THREAD_STATE_BOTH_NULL) {
-      for (const auto stackId : sample.pyStackIds) {
-        auto symbIt = symbols.find(stackId);
-        if (symbIt != symbols.end()) {
-          std::printf("    %s\n", symbIt->second.c_str());
-        } else {
-          std::printf("    %s\n", kLostSymbol.c_str());
-          lostSymbols_++;
-        }
-      }
-      switch (sample.stackStatus) {
-      case STACK_STATUS_TRUNCATED:
-        std::printf("    %s\n", kIncompleteStack.c_str());
-        truncatedStack_++;
-        break;
-      case STACK_STATUS_ERROR:
-        std::printf("    %s\n", kErrorStack.c_str());
-        break;
-      default:
-        break;
-      }
-    } else {
-      std::printf("    %s\n", kNonPythonStack.c_str());
-    }
-
-    std::printf("PID: %d TID: %d (%s)\n", sample.pid, sample.tid,
-                sample.comm.c_str());
-    std::printf("GIL State: %d Thread State: %d PthreadID Match State: %d\n\n",
-                sample.threadStateMatch, sample.gilState,
-                sample.pthreadIDMatch);
-  }
-
-  logInfo(0, "%d samples collected\n", totalSamples_);
-  logInfo(0, "%d samples lost\n", lostSamples_);
-  logInfo(0, "%d samples with truncated stack\n", truncatedStack_);
-  logInfo(0, "%d times Python symbol lost\n", lostSymbols_);
-
-  return PyPerfResult::SUCCESS;
+  return symbols;
 }
 
 std::string PyPerfUtil::getSymbolName(Symbol& sym) const {
@@ -378,18 +340,23 @@
   }
 
   if (!getAddrOfPythonBinary(path, data)) {
-    std::fprintf(stderr, "Failed getting addresses in potential Python library in PID %d\n", pid);
+    std::fprintf(
+        stderr,
+        "Failed getting addresses in potential Python library in PID %d\n",
+        pid);
     return false;
   }
   data.offsets = kPy36OffsetConfig;
   data.current_state_addr += helper.st;
-  logInfo(2, "PID %d has _PyThreadState_Current at %lx\n", pid, data.current_state_addr);
+  logInfo(2, "PID %d has _PyThreadState_Current at %lx\n", pid,
+          data.current_state_addr);
   data.tls_key_addr += helper.st;
   logInfo(2, "PID %d has autoTLSKey at %lx\n", pid, data.current_state_addr);
   data.gil_locked_addr += helper.st;
   logInfo(2, "PID %d has gil_locked at %lx\n", pid, data.current_state_addr);
   data.gil_last_holder_addr += helper.st;
-  logInfo(2, "PID %d has gil_last_holder at %lx\n", pid, data.current_state_addr);
+  logInfo(2, "PID %d has gil_last_holder at %lx\n", pid,
+          data.current_state_addr);
 
   return true;
 }
diff --git a/examples/cpp/pyperf/PyPerfUtil.h b/examples/cpp/pyperf/PyPerfUtil.h
index 3e69a29..c3396f4 100644
--- a/examples/cpp/pyperf/PyPerfUtil.h
+++ b/examples/cpp/pyperf/PyPerfUtil.h
@@ -6,12 +6,14 @@
 #pragma once
 
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 #include <linux/perf_event.h>
 #include <sys/types.h>
 
 #include "BPF.h"
+#include "PyPerfSampleProcessor.h"
 #include "PyPerfType.h"
 
 namespace ebpf {
@@ -28,37 +30,23 @@
     EVENT_DETACH_FAIL
   };
 
-  struct Sample {
-    pid_t pid;
-    pid_t tid;
-    std::string comm;
-    uint8_t threadStateMatch;
-    uint8_t gilState;
-    uint8_t pthreadIDMatch;
-    uint8_t stackStatus;
-    std::vector<int32_t> pyStackIds;
-
-    explicit Sample(const Event* raw, int rawSize)
-        : pid(raw->pid),
-          tid(raw->tid),
-          comm(raw->comm),
-          threadStateMatch(raw->thread_state_match),
-          gilState(raw->gil_state),
-          pthreadIDMatch(raw->pthread_id_match),
-          stackStatus(raw->stack_status),
-          pyStackIds(raw->stack, raw->stack + raw->stack_len) {}
-  };
-
   // init must be invoked exactly once before invoking profile
   PyPerfResult init();
 
-  PyPerfResult profile(int64_t sampleRate, int64_t durationMs);
+  PyPerfResult profile(int64_t sampleRate, int64_t durationMs,
+                       PyPerfSampleProcessor* processor);
+
+  std::unordered_map<int32_t, std::string> getSymbolMapping();
+
+  uint32_t getTotalSamples() const { return totalSamples_; }
+
+  uint32_t getLostSamples() const { return lostSamples_; }
 
  private:
-  uint32_t lostSymbols_ = 0, totalSamples_ = 0, lostSamples_ = 0, truncatedStack_ = 0;
+  uint32_t totalSamples_ = 0, lostSamples_ = 0;
 
   ebpf::BPF bpf_{0, nullptr, false, "", true};
-  std::vector<Sample> samples_;
+  std::vector<PyPerfSample> samples_;
   bool initCompleted_{false};
 
   void handleSample(const void* data, int dataSize);