annotate program tag

during debug of production systems it's difficult to trace back
the kernel reported 'bpf_prog_4985bb0bd6c69631' symbols to the source code
of the program, hence teach bcc to store the main function source
in the /var/tmp/bcc/bpf_prog_4985bb0bd6c69631/ directory.

This program tag is stable. Every time the script is called the tag
will be the same unless source code of the program changes.
During active development of bcc scripts the /var/tmp/bcc/ dir can
get a bunch of stale tags. The users have to trim that dir manually.

Python scripts can be modified to use this feature too, but probably
need to be gated by the flag. For c++ api I think it makes sense
to store the source code always, since the cost is minimal and
c++ api is used by long running services.

Example:
$ ./examples/cpp/LLCStat
$ ls -l /var/tmp/bcc/bpf_prog_4985bb0bd6c69631/
total 16
-rw-r--r--. 1 root root 226 Sep  1 17:30 on_cache_miss.c
-rw-r--r--. 1 root root 487 Sep  1 17:30 on_cache_miss.rewritten.c
-rw-r--r--. 1 root root 224 Sep  1 17:30 on_cache_ref.c
-rw-r--r--. 1 root root 484 Sep  1 17:30 on_cache_ref.rewritten.c

Note that there are two .c files there, since two different
bpf programs have exactly the same bytecode hence same prog_tag.

$ cat /var/tmp/bcc/bpf_prog_4985bb0bd6c69631/on_cache_miss.c
int on_cache_miss(struct bpf_perf_event_data *ctx) {
    struct event_t key = {};
    get_key(&key);

    u64 zero = 0, *val;
    val = miss_count.lookup_or_init(&key, &zero);
...

Signed-off-by: Alexei Starovoitov <ast@fb.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f637b52..b40e5f7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,6 +57,10 @@
   set(BCC_KERNEL_MODULES_DIR "/lib/modules")
 endif()
 
+if(NOT DEFINED BCC_PROG_TAG_DIR)
+  set(BCC_PROG_TAG_DIR "/var/tmp/bcc")
+endif()
+
 # As reported in issue #735, GCC 6 has some behavioral problems when
 # dealing with -isystem. Hence, skip the warning optimization
 # altogether on that compiler.
diff --git a/src/cc/CMakeLists.txt b/src/cc/CMakeLists.txt
index 7a4647c..fd6037a 100644
--- a/src/cc/CMakeLists.txt
+++ b/src/cc/CMakeLists.txt
@@ -14,7 +14,7 @@
 add_definitions(${LLVM_DEFINITIONS})
 configure_file(libbcc.pc.in ${CMAKE_CURRENT_BINARY_DIR}/libbcc.pc @ONLY)
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -DBCC_PROG_TAG_DIR='\"${BCC_PROG_TAG_DIR}\"'")
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
 
 include(static_libstdc++)
diff --git a/src/cc/api/BPF.cc b/src/cc/api/BPF.cc
index 0a6cb72..f2ba396 100644
--- a/src/cc/api/BPF.cc
+++ b/src/cc/api/BPF.cc
@@ -488,6 +488,10 @@
 
   if (fd < 0)
     return StatusTuple(-1, "Failed to load %s: %d", func_name.c_str(), fd);
+
+  bpf_module_->annotate_prog_tag(func_name, fd,
+                                 reinterpret_cast<struct bpf_insn*>(func_start),
+                                 func_size);
   funcs_[func_name] = fd;
   return StatusTuple(0);
 }
diff --git a/src/cc/bpf_module.cc b/src/cc/bpf_module.cc
index bb385a8..eaad314 100644
--- a/src/cc/bpf_module.cc
+++ b/src/cc/bpf_module.cc
@@ -113,6 +113,7 @@
     local_ts_ = createSharedTableStorage();
     ts_ = &*local_ts_;
   }
+  func_src_ = ebpf::make_unique<FuncSource>();
 }
 
 static StatusTuple unimplemented_sscanf(const char *, void *) {
@@ -133,6 +134,7 @@
   engine_.reset();
   rw_engine_.reset();
   ctx_.reset();
+  func_src_.reset();
 
   ts_->DeletePrefix(Path({id_}));
 }
@@ -456,7 +458,7 @@
 // load an entire c file as a module
 int BPFModule::load_cfile(const string &file, bool in_memory, const char *cflags[], int ncflags) {
   clang_loader_ = ebpf::make_unique<ClangLoader>(&*ctx_, flags_);
-  if (clang_loader_->parse(&mod_, *ts_, file, in_memory, cflags, ncflags, id_))
+  if (clang_loader_->parse(&mod_, *ts_, file, in_memory, cflags, ncflags, id_, *func_src_))
     return -1;
   return 0;
 }
@@ -468,7 +470,7 @@
 // build an ExecutionEngine.
 int BPFModule::load_includes(const string &text) {
   clang_loader_ = ebpf::make_unique<ClangLoader>(&*ctx_, flags_);
-  if (clang_loader_->parse(&mod_, *ts_, text, true, nullptr, 0, ""))
+  if (clang_loader_->parse(&mod_, *ts_, text, true, nullptr, 0, "", *func_src_))
     return -1;
   return 0;
 }
@@ -632,6 +634,68 @@
   return get<0>(section->second);
 }
 
+const char * BPFModule::function_source(const string &name) const {
+  return func_src_->src(name);
+}
+
+const char * BPFModule::function_source_rewritten(const string &name) const {
+  return func_src_->src_rewritten(name);
+}
+
+int BPFModule::annotate_prog_tag(const string &name, int prog_fd,
+                                 struct bpf_insn *insns, int prog_len) {
+  unsigned long long tag1, tag2;
+  int err;
+
+  err = bpf_prog_compute_tag(insns, prog_len, &tag1);
+  if (err)
+    return err;
+  err = bpf_prog_get_tag(prog_fd, &tag2);
+  if (err)
+    return err;
+  if (tag1 != tag2) {
+    fprintf(stderr, "prog tag mismatch %llx %llx\n", tag1, tag2);
+    return -1;
+  }
+
+  err = mkdir(BCC_PROG_TAG_DIR, 0777);
+  if (err && errno != EEXIST) {
+    fprintf(stderr, "cannot create " BCC_PROG_TAG_DIR "\n");
+    return -1;
+  }
+
+  char buf[128];
+  ::snprintf(buf, sizeof(buf), BCC_PROG_TAG_DIR "/bpf_prog_%llx", tag1);
+  err = mkdir(buf, 0777);
+  if (err && errno != EEXIST) {
+    fprintf(stderr, "cannot create %s\n", buf);
+    return -1;
+  }
+
+  ::snprintf(buf, sizeof(buf), BCC_PROG_TAG_DIR "/bpf_prog_%llx/%s.c",
+             tag1, name.data());
+  FileDesc fd(open(buf, O_CREAT | O_WRONLY | O_TRUNC,  0644));
+  if (fd < 0) {
+    fprintf(stderr, "cannot create %s\n", buf);
+    return -1;
+  }
+
+  const char *src = function_source(name);
+  write(fd, src, strlen(src));
+
+  ::snprintf(buf, sizeof(buf), BCC_PROG_TAG_DIR "/bpf_prog_%llx/%s.rewritten.c",
+             tag1, name.data());
+  fd = open(buf, O_CREAT | O_WRONLY | O_TRUNC,  0644);
+  if (fd < 0) {
+    fprintf(stderr, "cannot create %s\n", buf);
+    return -1;
+  }
+
+  src = function_source_rewritten(name);
+  write(fd, src, strlen(src));
+  return 0;
+}
+
 size_t BPFModule::function_size(size_t id) const {
   if (id >= function_names_.size())
     return 0;
diff --git a/src/cc/bpf_module.h b/src/cc/bpf_module.h
index a4b0369..333fff2 100644
--- a/src/cc/bpf_module.h
+++ b/src/cc/bpf_module.h
@@ -37,6 +37,7 @@
 class TableStorage;
 class BLoader;
 class ClangLoader;
+class FuncSource;
 
 class BPFModule {
  private:
@@ -68,6 +69,10 @@
   size_t num_functions() const;
   uint8_t * function_start(size_t id) const;
   uint8_t * function_start(const std::string &name) const;
+  const char * function_source(const std::string &name) const;
+  const char * function_source_rewritten(const std::string &name) const;
+  int annotate_prog_tag(const std::string &name, int fd,
+			struct bpf_insn *insn, int prog_len);
   const char * function_name(size_t id) const;
   size_t function_size(size_t id) const;
   size_t function_size(const std::string &name) const;
@@ -108,6 +113,7 @@
   std::unique_ptr<llvm::Module> mod_;
   std::unique_ptr<BLoader> b_loader_;
   std::unique_ptr<ClangLoader> clang_loader_;
+  std::unique_ptr<FuncSource> func_src_;
   std::map<std::string, std::tuple<uint8_t *, uintptr_t>> sections_;
   std::vector<TableDesc *> tables_;
   std::map<std::string, size_t> table_names_;
diff --git a/src/cc/frontends/clang/b_frontend_action.cc b/src/cc/frontends/clang/b_frontend_action.cc
index 29e6d31..40af8d2 100644
--- a/src/cc/frontends/clang/b_frontend_action.cc
+++ b/src/cc/frontends/clang/b_frontend_action.cc
@@ -26,6 +26,7 @@
 #include <clang/Rewrite/Core/Rewriter.h>
 
 #include "b_frontend_action.h"
+#include "loader.h"
 #include "common.h"
 #include "table_storage.h"
 
@@ -222,6 +223,9 @@
   auto real_start_loc = rewriter_.getSourceMgr().getFileLoc(D->getLocStart());
   if (D->isExternallyVisible() && D->hasBody()) {
     current_fn_ = D->getName();
+    string bd = rewriter_.getRewrittenText(expansionRange(D->getSourceRange()));
+    fe_.func_src_.set_src(current_fn_, bd);
+    fe_.func_range_[current_fn_] = expansionRange(D->getSourceRange());
     string attr = string("__attribute__((section(\"") + BPF_FN_PREFIX + D->getName().str() + "\")))\n";
     rewriter_.InsertText(real_start_loc, attr);
     if (D->param_size() > MAX_CALLING_CONV_REGS + 1) {
@@ -776,12 +780,18 @@
 }
 
 BFrontendAction::BFrontendAction(llvm::raw_ostream &os, unsigned flags, TableStorage &ts,
-                                 const std::string &id)
-    : os_(os), flags_(flags), ts_(ts), id_(id), rewriter_(new Rewriter) {}
+                                 const std::string &id, FuncSource& func_src)
+    : os_(os), flags_(flags), ts_(ts), id_(id), rewriter_(new Rewriter), func_src_(func_src) {}
 
 void BFrontendAction::EndSourceFileAction() {
   if (flags_ & DEBUG_PREPROCESSOR)
     rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).write(llvm::errs());
+
+  for (auto func : func_range_) {
+    auto f = func.first;
+    string bd = rewriter_->getRewrittenText(func_range_[f]);
+    func_src_.set_src_rewritten(f, bd);
+  }
   rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).write(os_);
   os_.flush();
 }
diff --git a/src/cc/frontends/clang/b_frontend_action.h b/src/cc/frontends/clang/b_frontend_action.h
index bff1eb1..6708265 100644
--- a/src/cc/frontends/clang/b_frontend_action.h
+++ b/src/cc/frontends/clang/b_frontend_action.h
@@ -42,6 +42,7 @@
 namespace ebpf {
 
 class BFrontendAction;
+class FuncSource;
 
 // Type visitor and rewriter for B programs.
 // It will look for B-specific features and rewrite them into a valid
@@ -122,7 +123,8 @@
  public:
   // Initialize with the output stream where the new source file contents
   // should be written.
-  BFrontendAction(llvm::raw_ostream &os, unsigned flags, TableStorage &ts, const std::string &id);
+  BFrontendAction(llvm::raw_ostream &os, unsigned flags, TableStorage &ts, const std::string &id,
+                  FuncSource& func_src);
 
   // Called by clang when the AST has been completed, here the output stream
   // will be flushed.
@@ -141,6 +143,9 @@
   TableStorage &ts_;
   std::string id_;
   std::unique_ptr<clang::Rewriter> rewriter_;
+  friend class BTypeVisitor;
+  std::map<std::string, clang::SourceRange> func_range_;
+  FuncSource& func_src_;
 };
 
 }  // namespace visitor
diff --git a/src/cc/frontends/clang/loader.cc b/src/cc/frontends/clang/loader.cc
index 5c1f8be..972e168 100644
--- a/src/cc/frontends/clang/loader.cc
+++ b/src/cc/frontends/clang/loader.cc
@@ -105,7 +105,8 @@
 }
 
 int ClangLoader::parse(unique_ptr<llvm::Module> *mod, TableStorage &ts, const string &file,
-                       bool in_memory, const char *cflags[], int ncflags, const std::string &id) {
+                       bool in_memory, const char *cflags[], int ncflags, const std::string &id,
+                       FuncSource& func_src) {
   using namespace clang;
 
   string main_path = "/virtual/main.c";
@@ -276,7 +277,7 @@
   // capture the rewritten c file
   string out_str1;
   llvm::raw_string_ostream os1(out_str1);
-  BFrontendAction bact(os1, flags_, ts, id);
+  BFrontendAction bact(os1, flags_, ts, id, func_src);
   if (!compiler1.ExecuteAction(bact))
     return -1;
   unique_ptr<llvm::MemoryBuffer> out_buf1 = llvm::MemoryBuffer::getMemBuffer(out_str1);
@@ -312,5 +313,26 @@
   return 0;
 }
 
+const char * FuncSource::src(const std::string& name) {
+  auto src = funcs_.find(name);
+  if (src == funcs_.end())
+    return "";
+  return src->second.src_.data();
+}
+
+const char * FuncSource::src_rewritten(const std::string& name) {
+  auto src = funcs_.find(name);
+  if (src == funcs_.end())
+    return "";
+  return src->second.src_rewritten_.data();
+}
+
+void FuncSource::set_src(const std::string& name, const std::string& src) {
+  funcs_[name].src_ = src;
+}
+
+void FuncSource::set_src_rewritten(const std::string& name, const std::string& src) {
+  funcs_[name].src_rewritten_ = src;
+}
 
 }  // namespace ebpf
diff --git a/src/cc/frontends/clang/loader.h b/src/cc/frontends/clang/loader.h
index b67ac2a..73d363d 100644
--- a/src/cc/frontends/clang/loader.h
+++ b/src/cc/frontends/clang/loader.h
@@ -30,12 +30,29 @@
 
 namespace ebpf {
 
+class FuncSource {
+  class SourceCode {
+   public:
+    SourceCode(const std::string& s1 = "", const std::string& s2 = ""): src_(s1), src_rewritten_(s2) {}
+    std::string src_;
+    std::string src_rewritten_;
+  };
+  std::map<std::string, SourceCode> funcs_;
+ public:
+  FuncSource() {}
+  const char * src(const std::string& name);
+  const char * src_rewritten(const std::string& name);
+  void set_src(const std::string& name, const std::string& src);
+  void set_src_rewritten(const std::string& name, const std::string& src);
+};
+
 class ClangLoader {
  public:
   explicit ClangLoader(llvm::LLVMContext *ctx, unsigned flags);
   ~ClangLoader();
   int parse(std::unique_ptr<llvm::Module> *mod, TableStorage &ts, const std::string &file,
-            bool in_memory, const char *cflags[], int ncflags, const std::string &id);
+            bool in_memory, const char *cflags[], int ncflags, const std::string &id,
+	    FuncSource& func_src);
 
  private:
   static std::map<std::string, std::unique_ptr<llvm::MemoryBuffer>> remapped_files_;